@ARTICLE{MARZI_2023_ARTICLE_MP_485504, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {A discriminative information-theoretical analysis of the regularity gradient in inflectional morphology}, YEAR = {2023}, ABSTRACT = {Over the last decades, several independent lines of research in morphology have questioned the hypothesis of a direct correspondence between sublexical units and their mental correlates. Word and paradigm models of morphology shifted the fundamental part-whole relation in an inflection system onto the relation between individual inflected word forms and inflectional paradigms. In turn, the use of artificial neural networks of densely interconnected parallel processing nodes for morphology learning marked a radical departure from a morpheme-based view of the mental lexicon. Lately, in computational models of Discriminative Learning, a network architecture has been combined with an uncertainty reducing mechanism that dispenses with the need for a one-to-one association between formal contrasts and meanings, leading to the dissolution of a discrete notion of the morpheme.The paper capitalises on these converging lines of development to offer a unifying information-theoretical, simulation-based analysis of the costs incurred in processing (ir)regularly inflected forms belonging to the verb systems of English, German, French, Spanish and Italian. Using Temporal Self-Organising Maps as a computational model of lexical storage and access, we show that a discriminative, recurrent neural network, based on Rescorla-Wagner's equations, can replicate speakers' exquisite sensitivity to widespread effects of word frequency, paradigm entropy and morphological (ir)regularity in lexical processing. The evidence suggests an explanatory hypothesis linking Word and paradigm morphology with principles of information theory and human perception of morphological structure. According to this hypothesis, the ways more or less regularly inflected words are structured in the mental lexicon are more related to a reduction in processing uncertainty and maximisation of predictive efficiency than to economy of storage.}, KEYWORDS = {Morphological inflection, Morphological regularity, Prediction-driven processing, Discriminative learning, Lexical self-organisation, Gradient structure, Information theory, Non-linear modelling}, PAGES = {1-51}, URL = {https://doi.org/10.1007/s11525-023-09415-6}, DOI = {10.1007/s11525-023-09415-6}, PUBLISHER = {Springer (Heidelberg, Paesi Bassi)}, ISSN = {1871-5621}, JOURNAL = {Morphology (Dordrecht)}, } @EDITORIAL{MARZI_2023_EDITORIAL_MP_490518, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Integrative Views on Representations and Processes in Morphology}, YEAR = {2023}, ABSTRACT = {One of the most enduring conceptualisations of the language architecture rests on a modular subdivision of work between lexical representations of stored items on the one hand, and dynamic processes, modelled as procedural rules working on such items, on the other hand. In morphology, network-based approaches have suggested an alternative "integrative" view of word representations and processes, where lexical representations consist of partially overlapping activation patterns spreading over several processing units. From this integrative perspective, the resulting network is both a lexicon and a word processor. We argue that the network-based view provides a stimulating research framework for several complementary levels of language inquiry (including theoretical, computational and neuro-psychological approaches) to be fruitfully integrated into a novel, comprehensive understanding of morphology. We discuss some implications of this view and delineate prospects of progress in this area.}, KEYWORDS = {morphology, mental lexicon, Connectionism, Network science, Discriminative Learning}, PAGES = {397-556}, URL = {https://link.springer.com/journal/11525/volumes-and-issues/33-4}, VOLUME = {33(4)}, DOI = {10.1007/s11525-023-09416-5}, PUBLISHER = {Springer (Dordrecht, NLD)}, } @EDITORIAL{MARZI_2023_EDITORIAL_MP_492243, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Integrative views of representations and processes in morphology: an introduction}, YEAR = {2023}, KEYWORDS = {Morphology, Mental Lexicon, Connectionism, Network science, Discriminative learning}, PAGES = {397-408}, URL = {https://link.springer.com/article/10.1007/s11525-023-09416-5}, VOLUME = {33}, DOI = {10.1007/s11525-023-09416-5}, PUBLISHER = {Springer (Heidelberg, Paesi Bassi)}, ISSN = {1871-5621}, BOOKTITLE = {Morphology (Dordrecht)}, } @ARTICLE{MARZI_2022_ARTICLE_MNMMP_471441, AUTHOR = {Marzi, C. and Narzisi, A. and Milone, A. and Masi, G. and Pirrelli, V.}, TITLE = {Reading behaviors through patterns of finger-tracking in Italian children with autism spectrum disorder}, YEAR = {2022}, ABSTRACT = {The paper proposes an ecological and portable protocol for the large-scale collection of reading data in high-functioning autism spectrum disorder (ASD) children based on recording the finger movements of a subject reading a text displayed on a tablet touchscreen. By capitalizing on recent evidence that movements of a finger that points to a scene or text during visual exploration or reading may approximate eye fixations, we focus on recognition of written content and function words, pace of reading, and accuracy in reading comprehension. The analysis showed significant differences between typically developing and ASD children, with the latter group exhibiting greater variation in levels of reading ability, slower developmental pace in reading speed, less accurate comprehension, greater dependency on word length and word frequency, less significant prediction-based processing, as well as a monotonous, steady reading pace with reduced attention to weak punctuation. Finger-tracking patterns provides evidence that ASD readers may fail to integrate single word processing into major syntactic structures and lends support to the hypothesis of an impaired use of contextual information to predict upcoming stimuli, suggesting that difficulties in perception may arise as difficulties in prediction.}, KEYWORDS = {reading, autism, finger-tracking, deleloping readers, prediction-driven processing}, PAGES = {1-17}, URL = {https://www.mdpi.com/2076-3425/12/10/1316}, VOLUME = {12}, DOI = {10.3390/brainsci12101316}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2076-3425}, JOURNAL = {Brain sciences}, } @INCOLLECTION{CREPALDI_2022_INCOLLECTION_CFMNPT_471258, AUTHOR = {Crepaldi, D. and Ferro, M. and Marzi, C. and Nadalini, A. and Pirrelli, V. and Taxitari, L.}, TITLE = {Finger movements and eye movements during adults' silent and oral reading}, YEAR = {2022}, ABSTRACT = {Using a common tablet and a web application, we can record the finger movements of a reader that is concurrently reading and finger-pointing a text displayed on the tablet touchscreen. In a preliminary analysis of "finger-tracking" data of early-graders we showed that finger movements can replicate established reading effects observed in more controlled settings. Here, we analyse and discuss reading evidence collected by (i) tracking the finger movements of adults reading a short essay displayed on a tablet touchscreen, and (ii) tracking the eye movements of adults reading a comparable text displayed on the screen of a computer. Texts in the two conditions were controlled for linguistic complexity and page layout. In addition, we tested adults' comprehension in both silent and oral reading, by asking them multiple-choice questions after reading each text. We show and discuss the reading evidence that the two (optical and tactile) protocols provide, and to what extent they show comparable effects. We conclude with some remarks on the importance of ecology and portability of protocols for large-scale collection of naturalistic reading data.}, KEYWORDS = {Reading, finger-tracking, digital technology}, PAGES = {443-471}, URL = {https://link.springer.com/book/9783030998905}, VOLUME = {23}, PUBLISHER = {Springer (Dordrecht, NLD)}, ISBN = {978-3-030-99890-5}, BOOKTITLE = {Developing language and literacy-Studies in Honor of Dorit Diskin Ravid}, EDITOR = {Levie, R. and Bar On, A. and Ashkenazi, O. and Dattner, E. and Brandes, G.}, } @INCOLLECTION{MARZI_2022_INCOLLECTION_MP_464598, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Psycholinguistic Research on Inflectional Morphology in the Romance Languages}, YEAR = {2022}, ABSTRACT = {Over the past decades, psycholinguistic aspects of word processing have made a considerable impact on views of language theory and language architecture. In the quest for the principles governing the ways human speakers perceive, store, access, and produce words, inflection issues have provided a challenging realm of scientific inquiry, and a battlefield for radically opposing views. It is somewhat ironic that some of the most influential cognitive models of inflection have long been based on evidence from an inflectionally impoverished language like English, where the notions of inflectional regularity, (de)composability, predictability, phonological complexity, and default productivity appear to be mutually implied. An analysis of more "complex" inflection systems such as those of Romance languages shows that this mutual implication is not a universal property of inflection, but a contingency of poorly contrastive, nearly isolating inflection systems. Far from presenting minor faults in a solid, theoretical edifice, Romance evidence appears to call into question the subdivision of labor between rules and exceptions, the on-line processing vs. long-term memory dichotomy, and the distinction between morphological processes and lexical representations. A dynamic, learning-based view of inflection is more compatible with this data, whereby morphological structure is an emergent property of the ways inflected forms are processed and stored, grounded in universal principles of lexical self-organization and their neuro-functional correlates.}, KEYWORDS = {Romance language morphology, paradigms, inflectional classes, lexical self-organisation, frequency effects, priming, discriminative learning, lexical blocking, long-term and short-term memory}, PAGES = {1-44}, URL = {https://oxfordre.com/linguistics/view/10.1093/acrefore/9780199384655.001.0001/acrefore-9780199384655-e-709}, DOI = {10.1093/acrefore/9780199384655.013.709}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {9780199384655}, BOOKTITLE = {Oxford Encyclopedia of Romance Linguistics}, EDITOR = {Loporcaro, M.}, } @INPROCEEDINGS{MARZI_2022_INPROCEEDINGS_MNFMMVPTP_471602, AUTHOR = {Marzi, C. and Narzisi, A. and Ferro, M. and Masi, G. and Milone, A. and Viglione, V. and Pelagatti, S. and Tomassini, I. and Pirrelli, V.}, TITLE = {Patterns of finger-tracking in Italian early readers with Autism Spectrum Disorder}, YEAR = {2022}, ABSTRACT = {Background: Of late, the synergistic interaction of eye and hand movements in the exploration of a visual scene displayed on a computer touchscreen was shown to provide a congruent signature of the "attention maps" of subjects with autism spectrum disorders (ASD). A familiar context where this visual and tactile interaction is exploited is when children use the finger of their dominant hand to point the letters of written words as they are reading, particularly at early stages of their literacy development. In the present work, a dedicated app running on a common tablet is used to capture and analyse the finger-tracking behaviour of children with ASD while they are reading few episodes of a connected text on the tablet touchscreen. The reader's voice is also recorded through the tablet built-in microphone. The sliding movements of the finger across the tablet touchscreen are discretized into a series of densely distributed "touch events", which are then mapped onto the text lines in much the same way eye fixations are projected onto a sequence of words using an eye-tracker. Reading texts are linguistically annotated, to control for levels of reading difficulty, and finger-tracking times are associated with linguistic glosses. Objectives: Investigate patterns of finger-tracking as a potential non biological marker for identification of children with ASD . Methods: A preliminary analysis is offered of evidence of the finger-tracking behaviour of 20 Italian children with high functioning ASD, aged 7-11 years, while they are engaged in reading. A grade-matched control group of children with typical development was included. Patterns of finger-tracking are assessed in connection with three complementary aspects of reading behaviour: (1) word recognition, (2) pace of reading of multi-word intonation units, and (3) text comprehension, controlled by asking children a few multiple-choice questions on text content after each reading session. Results: Considerable variation in levels of reading ability was observed in the ASD sample, with a few children showing clear evidence of impaired reading comprehension. However, fluent readers with ASD exhibit the same correlation between accurate decoding (assessed by measuring per-word reading speed) and high levels of reading comprehension found in controls. Likewise, decoding rates were found to significantly increase with increasing grade levels, following the typical developmental pattern observed in controls. On a less local level of linguistic analysis, the reading pace of ASD readers fails to be modulated according to major syntactic structures, punctuation marks and direct speech turns, an effect concomitant with a flat prosodic intonation of oral reading. Conclusions: Preliminary findings confirm the heterogeneous nature of reading skills in children with ASD, showing that the use of a tablet screen as a tactile interface for visual perception analysis can offer a robust experimental protocol for large-scale, multimodal collection of naturalistic data for extensive assessment of readers with ASD.}, KEYWORDS = {reading, autism, finger-tracking, developing readers, prediction-driven processing}, PAGES = {192-192}, URL = {https://cdn.ymaws.com/www.autism-insar.org/resource/resmgr/files/insar_2022/2022_Abstract_Book.pdf}, VOLUME = {2022}, CONFERENCE_NAME = {INSAR}, CONFERENCE_PLACE = {Austin, Texas}, CONFERENCE_DATE = {11-14/05/2022}, BOOKTITLE = {2022 annual meeting abstract book}, } @INPROCEEDINGS{MARZI_2022_INPROCEEDINGS_MP_471259, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {An information-theoretic analysis of the inflectional regular-irregular gradient for optimal processing units}, YEAR = {2022}, ABSTRACT = {Prediction-driven word processing defines the human ability to anticipate upcoming input words in recognition. From this perspective, input word forms need to be processed as quickly and efficiently as possible. Under the reasonable assumption that spoken words are memorized and processed as word trees (e.g. Marslen-Wilson's "cohorts"), the larger the size of the cohort of an input word at a certain point in time (and the later its uniqueness point), the harder and slower to process the word is. Regularly and irregularly inflected verb forms have different stem family sizes and different uniqueness points. Using a Recurrent Neural Network (RNN) as a computational model of the human lexical proces- sor, we explore here how their distributional and structural properties may affect (optimal) processing strategies.}, KEYWORDS = {Morphological inflection, prediction-driven processing, discriminability, non-linearity, learnability}, PAGES = {50-51}, URL = {http://www.nytud.hu/imm20/abstracts/main.pdf}, CONFERENCE_NAME = {20th International Morphology Meeting-(Dedicated to the memory of Ferenc Kiefer)}, CONFERENCE_PLACE = {Budapest}, CONFERENCE_DATE = {01-04/09/2022}, } @INPROCEEDINGS{TAXITARI_2021_INPROCEEDINGS_TCFMNP_441870, AUTHOR = {Taxitari, L. and Cappa, C. and Ferro, M. and Marzi, C. and Nadalini, A. and Pirrelli, V.}, TITLE = {Using mobile technology for reading assessment}, YEAR = {2021}, ABSTRACT = {The enormous potential of Information and Communication Technologies (ICT) for addressing critical educational issues is generally acknowledged, but its use in the assessment of the complex skills of reading and understanding a text has been very limited to date. The paper contrasts traditional reading assessment protocols with ReadLet, an ICT platform with a tablet front-end, designed to support online monitoring of silent and oral reading abilities in early graders. ReadLet makes use of cloud computing and mobile technology for large-scale data collection and allows the time alignment of the child's reading behaviour with texts tagged using Natural Language Processing (NLP) tools. Initial findings replicate established benchmarks from the psycholinguistic literature on reading in both typically and atypically developing children, making the application a new ground-breaking approach in the evaluation of reading skills. Index Terms--reading assessment, reading research, mobile technology, NLP, cloud computing, special education needs.}, KEYWORDS = {reading assessment, reading research, mobile technology, NLP, cloud computing, special education needs}, PAGES = {1-6}, URL = {http://www.ieee.ma/cist20/component/content/?id=26\&Itemid=185}, ISBN = {9781728166469}, CONFERENCE_NAME = {6th IEEE Congress on Information Science \& Technology (IEEE CIST'20)}, CONFERENCE_PLACE = {online}, CONFERENCE_DATE = {05/06/2021}, } @INPROCEEDINGS{MARZI_2021_INPROCEEDINGS_MTFNP_445743, AUTHOR = {Marzi, C. and Taxitari, L. and Ferro, M. and Nadalini, A. and Pirrelli, V.}, TITLE = {Valutare la lettura "in tempo reale": un esempio di integrazione tra linguistica computazionale e linguistica applicata}, YEAR = {2021}, ABSTRACT = {In anni recenti, linguistica computazionale e linguistica applicata hanno ampliato i loro rispettivi ambiti d'indagine, utilizzando l'ontologia formale della linguistica teorica e i modelli cognitivi della psicolinguistica per studiare le difficoltà che i parlanti incontrano nello svolgimento di "compiti" linguistici specifici. Nell'ambito della lettura, le tecnologie per il Trattamento Automatico del Linguaggio (TAL) si sono dimostrate capaci di classificare il livello di leggibilità di un testo, basandosi sulla distribuzione di alcuni parametri linguistici in testi pre-classificati per età dei lettori destinatari, o per grado di scolarità, o per livello di sviluppo cognitivo. Ad esempio, parole o frasi più lunghe, o parole più rare tendono a distribuirsi in testi di più difficile comprensione, o destinati a lettori più maturi. E' possibile così assegnare a un testo, o a ogni singola frase, un punteggio di leggibilità in funzione (inversa) della complessità lessicale, morfologica, sintattica o pragmatica dell'unità testuale analizzata. In Linguistica Applicata (LA) la valutazione della difficoltà di lettura ha seguito un approccio funzionale. Nel modello semplice di lettura, ad esempio, la capacità di leggere un testo è analizzata come il prodotto dell'interazione tra decodifica e comprensione. Attraverso l'osservazione di un campione di bambini impegnati nella lettura, è possibile valutare la loro fluenza in decodifica, gli errori di decodifica e comprensione, e l'efficacia di percorsi educativi personalizzati. La piattaforma ReadLet è stata sviluppata con l'obiettivo di integrare l'approccio classificatorio del TAL con quello funzionale della LA. Il bambino legge un breve testo visualizzato sullo schermo di un tablet, ad alta voce o in modalità silente. In entrambi i casi, al bambino viene chiesto di "tenere il segno" con il dito sullo schermo nel corso della lettura. La traccia tattile è registrata e allineata con il testo visualizzato sullo schermo mediante un algoritmo di convoluzione. Al contempo, il testo è annotato automaticamente per tratti linguistici. Alla fine della sessione di lettura silente, il bambino risponde ad alcune semplici domande sul contenuto del testo. I dati raccolti consentono di valutare le difficoltà (rallentamenti o errori) che il bambino incontra nella lettura, e di mettere in relazione "in tempo reale" queste difficoltà con aspetti linguistici specifici del testo. Un'analisi preliminare dei dati raccolti da ReadLet su oltre 400 allievi di alcune scuole elementari toscane e della Svizzera italiana, ha evidenziato il differente "passo" di lettura tra lettori con sviluppo tipico e atipico, e il peso che variabili come lunghezza, frequenza e lessicalità hanno su profili di lettura individuali e aggregati. La possibilità di "controllare" automaticamente la distribuzione di queste variabili nel testo e di correlarle con le difficoltà del singolo bambino consente, infine, di somministrare testi con livelli di difficoltà gradualmente crescenti, rendendo possibili percorsi personalizzati di potenziamento.}, KEYWORDS = {reading assessment, reading strategies, NLP, ICT mobile technologies}, PAGES = {5-5}, URL = {https://publications.cnr.it/doc/445743}, VOLUME = {2021}, CONFERENCE_NAME = {XXI Congresso Internazionale di AItLA}, CONFERENCE_PLACE = {Bergamo (I)}, CONFERENCE_DATE = {11-12/02/2021}, BOOKTITLE = {FARE LINGUISTICA APPLICATA CON LE DIGITAL HUMANITIES}, } @ARTICLE{MASINI_2020_ARTICLE_MP_444782, AUTHOR = {Masini, F. and Pirrelli, V.}, TITLE = {L'evidenza morfologica nell'era digitale: per un'integrazione di teoria e computazione}, YEAR = {2020}, ABSTRACT = {This article proposes a research perspective on morphological and lexical data based on an integrated approach that merges linguistic theory and computational analyses of a large quantity of textual data. Starting from a description of the units and processes of morphology, and of the issues they raise, we discuss to what extent these theoretical notions can be translated into the algorithmic procedures of Natural Language Processing (NLP) and what resources and methods are nowadays available to make morphological and lexical knowledge explicit within texts. At the same time, we explore the repercussions that the application of computational (but also psycho-/neuro-linguistic) techniques may have on our theoretical representations and on their plausibility.}, KEYWORDS = {morphology-lexicon-categories-Natural Language Processing-Italian}, PAGES = {77-126}, URL = {https://publications.cnr.it/doc/444782}, VOLUME = {VI}, PUBLISHER = {Editrice CLUEB; [poi] Edizioni dell'Orso (Bologna; [poi] Alessandria, Italia)}, ISSN = {0393-1226}, JOURNAL = {Quaderni di semantica (Testo stampato)}, } @INCOLLECTION{MARZI_2020_INCOLLECTION_MBBP_421742, AUTHOR = {Marzi, C. and Blevins, J. P. and Booij, G. and Pirrelli, V.}, TITLE = {Inflection at the morphology-syntax interface}, YEAR = {2020}, ABSTRACT = {What is inflection? Is it part of language morphology, syntax or both? What are the basic units of inflection and how do speakers acquire and process them? How do they vary across languages? Are some inflection systems somewhat more complex than others, and does inflectional complexity affect the way speakers process words? This chapter addresses these and other related issues from an interdisciplinary perspective. Our main goal is to map out the place of inflection in our current understanding of the grammar architecture. In doing that, we will embark on an interdisciplinary tour, which will touch upon theoretical, psychological, typological, historical and computational issues in morphology, with a view to looking for points of methodological and substantial convergence from a rather heterogeneous array of scientific approaches and theoretical perspectives. The main upshot is that we can learn more from this than just an additive medley of domain-specific results. In the end, a cross-domain survey can help us look at traditional issues in a surprisingly novel light.}, KEYWORDS = {inflection, paradigmatic relations, word processing, word learning, inflectional complexity, family size, entropy}, PAGES = {228-294}, URL = {https://www.degruyter.com/view/book/9783110440577/10.1515/9783110440577-007.xml}, VOLUME = {337}, DOI = {10.1515/9783110440577-007}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, BOOKTITLE = {Word Knowledge and Word Usage. A cross-interdisciplinary guide to the mental lexicon}, EDITOR = {Pirrelli, V. and Plag, I. and Dressler, W. U.}, } @INCOLLECTION{PIRRELLI_2020_INCOLLECTION_PMFCBM_421741, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Baayen, H. R. and Milin, P.}, TITLE = {Psycho-computational modelling of the mental lexicon}, YEAR = {2020}, ABSTRACT = {Over the last decades, a growing body of evidence on the mechanisms governing lexical storage, access, acquisition and processing has questioned traditional models of language architecture and word usage based on the hypothesis of a direct correspondence between modular components of grammar competence (lexicon vs. rules), processing correlates (memory vs. computation) and neuro-anatomical localizations (prefrontal vs. temporo-parietal perisylvian areas of the left hemisphere). In the present chapter, we explore the empirical and theoretical consequences of a distributed, integrative model of the mental lexicon, whereby words are seen as emergent properties of the functional interaction between basic, language-independent processing principles and the language- specific nature and organization of the input. From this perspective, language learning appears to be inextricably related to the way language is processed and internalized by the speakers, and key to an interdisciplinary understanding of such a way, in line with Tomaso Poggio's suggestion that the development of a cognitive skill is causally and ontogenetically prior to its execution (and sits "on top of it"). In particular, we discuss conditions, potential and prospects of the epistemological continuity between psycholinguistic and computational modelling of word learning, and illustrate the yet largely untapped potential of their integration. We use David Marr's hierarchy to clarify the complementarity of the two viewpoints. Psycholinguistic models are informative about how speakers learn to use language (interfacing Marr's levels 1 and 2). When we move from the psycholinguistic analysis of the functional operations involved in language learning to an algorithmic description of how they are computed, computer simulations can help us explore the relation between speakers' behavior and general learning principles in more detail. In the end, psycho-computational models can be instrumental to bridge Marr's levels 2 and 3, bringing us closer to understanding the nature of word knowledge in the brain.}, KEYWORDS = {mental lexicon, word storage and processing, psycholinguistics, computational linguistics, connectionist models, discriminative learning}, PAGES = {23-82}, URL = {https://www.degruyter.com/view/book/9783110440577/10.1515/9783110440577-002.xml}, VOLUME = {337}, DOI = {10.1515/9783110440577-002}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, BOOKTITLE = {Word Knowledge and Word Usage}, EDITOR = {Pirrelli, V. and Plag, I. and Dressler, W. U.}, } @INCOLLECTION{PIRRELLI_2020_INCOLLECTION_PPD_423388, AUTHOR = {Pirrelli, V. and Plag, I. and Dressler, U. W.}, TITLE = {Word knowledge in a cross-disciplinary world}, YEAR = {2020}, ABSTRACT = {This editorial project stemmed from a 4-year period of intense interdisciplinary research networking funded by the European Science Foundation within the framework of the NetWordS project (09-RNP-089). The project mission was to bring together experts of various research fields (from brain sciences and computing to cognition and linguistics) and of different theoretical inclinations, to advance the current awareness of theoretical, typological, psycholinguistic, computational and neurophysiological evidence on the structure and processing of words, with a view to promoting novel methods of research and assessment for grammar architecture and language usage. The unprecedented cross-disciplinary fertilization prompted by a wide range of scientific and educational initiatives (three international workshops, two summer schools, one main conference and over a hundred grants supporting short visits and multilateral exchanges) persuaded us to pursue this effort beyond the project lifespan, spawning the idea of an interdisciplinary handbook, where a wide range of central topics on word knowledge and usage are dealt with by teams of authors with common interests and different backgrounds. Unsurprisingly (with the benefit of the hindsight), the project turned out to be more challenging and time-consuming than initially planned. Cross-boundary talking and mutual understanding are neither short-term, nor immediately rewarding efforts, but part of a long-sighted, strategic vision, where stamina, motivation and planning ahead play a prominent role. We believe that this book, published as an open access volume, significantly sharpens the current understanding of issues of word knowledge and usage, and has a real potential for promoting novel research paradigms, and bringing up a new generation of language scholars.}, KEYWORDS = {interdisciplinarity, word knowledge, word usage, language units, statistical and computer modeling, levels of understanding, between-level mapping, linking hypotheses, scale effects}, PAGES = {1-20}, URL = {https://doi.org/10.1515/9783110440577}, VOLUME = {337}, DOI = {10.1515/9783110440577}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, BOOKTITLE = {Word Knowledge and Word Usage. A Cross-Disciplinary Guide to the Mental Lexicon}, EDITOR = {Pirrelli, V. and Plag, I. and Dressler, U. W.}, } @EDITORIAL{PIRRELLI_2020_EDITORIAL_PPD_424203, AUTHOR = {Pirrelli, V. and Plag, I. and Dressler, W. U.}, TITLE = {Word knowledge and word usage: a cross-disciplinary guide to the mental lexicon}, YEAR = {2020}, ABSTRACT = {This editorial project stemmed from a 4-year period of intense interdisciplinary research networking funded by the European Science Foundation within the framework of the NetWordS project (09-RNP-089).}, KEYWORDS = {interdisciplinarity, word knowledge, word usage, language units, statistical and computer modeling, levels of understanding, between-level mapping, linking hypotheses, scale effects}, PAGES = {1-717}, URL = {https://doi.org/10.1515/9783110440577}, VOLUME = {337}, DOI = {10.1515/9783110440577}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {978-3-11-051748-4}, } @INPROCEEDINGS{MARZI_2020_INPROCEEDINGS_MRNTP_438979, AUTHOR = {Marzi, C. and Rodella, A. and Nadalini, A. and Taxitari, L. and Pirrelli, V.}, TITLE = {Does finger-tracking point to child reading strategies?}, YEAR = {2020}, ABSTRACT = {The movement of a child's index finger that points to a printed text while (s)he is reading may provide a proxy for the child's eye movements and attention focus. We validated this correlation by showing a quantitative analysis of patterns of "finger-tracking" of Italian early graders engaged in reading a text displayed on a tablet. A web application interfaced with the tablet monitors the reading behaviour by modelling the way the child points to the text while reading. The analysis found significant developmental trends in reading strategies, marking an interesting contrast between typically developing and atypically developing readers.}, KEYWORDS = {reading assessment, reading strategies, mobile technology, special educiation needs}, PAGES = {1-7}, URL = {http://ceur-ws.org/Vol-2769/paper_60.pdf}, VOLUME = {vol-2769}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Italian Conference on Computational Linguistics 2020}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {1-3/03/2021}, BOOKTITLE = {Proceedings of the Seventh Italian Conference on Computational Linguistics}, EDITOR = {Monti, J. and Dell'Orletta, F. and Tamburini, F.}, } @INPROCEEDINGS{PIRRELLI_2020_INPROCEEDINGS_PCCDFGMNT_442758, AUTHOR = {Pirrelli, V. and Cappa, C. and Crepaldi, D. and Del Pinto, V. and Ferro, M. and Giulivi, S. and Marzi, C. and Nadalini, A. and Taxitari, L.}, TITLE = {Tracking the pace of reading with finger movements}, YEAR = {2020}, ABSTRACT = {Recent experimental evidence in visual perception analysis shows that eye and finger movements strongly correlate during scene exploration, at both individual and group levels. A familiar context which exploits this synergistic behaviour is when children learn to read, with the practice of finger-pointing to text as a support for their attention focus, directional movement and voice-print match. Using a tablet to display short texts, we collected evidence on the finger-pointing behaviour of 3rd-6th Italian graders engaged in both silent and oral reading. "Finger-tracking" data, sampled by the tablet and aligned with the text, made it possible to time a child's reading paceat word and sentence level. Results are shown to replicate established benchmarks in the reading literature, such as the difference in reading pace between age-matched typical and atypical readers as a function of word frequency and length, and neighbourhood entropy and Old20. Atypical readers show increasing difficulty with longer words, with a steeper time increment for word length > 6, integrating previous evidence. In addition, neighbourhood density plays a sparse facilitative role in atypical reading, with no significant interaction with neighbourhood entropy, pointing to a non trivial developmental interplay between sublexical reading and the richness of the Italian orthographic-phonological lexicon. Despite their different dynamics, optical and tactile strategies for text exploration prove to be highly congruent: this suggests that finger-tracking can be used as an ecological proxy for eye-tracking in reading assessment.}, KEYWORDS = {Reading, Finger tracking, Mental Lexicon, Word frequency, Word Length, Neighbourhood entropy}, PAGES = {1}, URL = {https://osf.io/hr62g/}, CONFERENCE_NAME = {Words in the World International Conference}, CONFERENCE_PLACE = {Montreal (Canada)}, CONFERENCE_DATE = {16-18/10/2020}, } @ARTICLE{MARZI_2019_ARTICLE_MFP_406277, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {A processing-oriented investigation of inflectional complexity}, YEAR = {2019}, ABSTRACT = {Due to the typological diversity of their inflectional processes, some languages are intuitively more difficult than other languages. Yet, finding a single measure to quantitatively assess the comparative complexity of an inflectional system proves an exceedingly difficult endeavor. In this paper we propose to investigate the issue from a processing-oriented standpoint, using data processed by a type of recurrent neural network to quantitatively model the dynamic of word processing and learning in different input conditions. We evaluate the relative complexity of a set of typologically different inflectional systems (Greek, Italian, Spanish, German, English and Standard Modern Arabic) by training a Temporal Self-Organizing Map (TSOM), a recurrent variant of Kohonen's Self-Organizing Maps, on a fixed set of verb forms from top-frequency verb paradigms, with no information about the morphosemantic and morphosyntactic content conveyed by the forms. After training, the behavior of each language-specific TSOM is assessed on different tasks, looking at self-organizing patterns of temporal connectivity and functional responses. Our simulations show that word processing is facilitated by maximally contrastive inflectional systems, where verb forms exhibit the earliest possible point of lexical discrimination. Conversely, word learning is favored by a maximally generalizable system, where forms are inferred from the smallest possible number of their paradigm companions. Based on evidence from the literature and our own data, we conjecture that the resulting balance is the outcome of the interaction between form frequency and morphological regularity. Big families of stem-sharing, regularly inflected forms are the productive core of an inflectional system. Such a core is easier to learn but slower to discriminate. In contrast, less predictable verb forms, based on alternating and possibly suppletive stems, are easier to process but are learned by rote. Inflection systems thus strike a balance between these conflicting processing and communicative requirements, while staying within tight learnability bounds, in line with Ackermann and Malouf's Low Conditional Entropy Conjecture. Our quantitative investigation supports a discriminative view of morphological inflection as a collective, emergent system, whose global self-organization rests on a surprisingly small handful of language-independent principles of word coactivation and competition.}, KEYWORDS = {Morphological complexity, Discriminative learning, Recurrent neural networks (RNNs), self-organization, emergence, processing uncertainty, stem-family size}, PAGES = {1-23}, URL = {https://www.frontiersin.org/articles/10.3389/fcomm.2019.00048/full}, VOLUME = {4}, DOI = {10.3389/fcomm.2019.00048}, PUBLISHER = {Frontiers Media (Lausanne, Svizzera)}, ISSN = {2297-900X}, JOURNAL = {Frontiers in communication}, } @INPROCEEDINGS{PIRRELLI_2019_INPROCEEDINGS_P_424205, AUTHOR = {Pirrelli, V.}, TITLE = {Investigating inflection as a complex system}, YEAR = {2019}, ABSTRACT = {From a cross-linguistic perspective, different inflection systems appear to apportion word processing costs differently, depending on when and where, in the full form, morpho-lexical and morpho-syntactic information is encoded. The resulting balance is the outcome of an interaction between form frequency and morphological productivity, responding to basic communicative requirements. Big families of stem-sharing inflected forms constitute the productive core of an inflection system. This core is easy to learn, as it requires memorization of one stem only, with all inflected forms being redundantly built upon it. Unsurprisingly, generalizable paradigms are less sensitive to token frequency effects, and tend to be located in the long, low-frequency tail of the Zipfian distribution of word forms. In contrast, the head of the Zipfian distribution mostly contains small families of alternating and possibly suppletive stems, which, however shorter, morpho-phonologically simpler and easier to process, require high token frequency to be learned and resist pressure towards regularization.}, KEYWORDS = {Morphological paradigms, Mental Lexicon, Inflectional morphology}, PAGES = {23-24}, URL = {https://publications.cnr.it/doc/424205}, CONFERENCE_NAME = {International Symposium of Morphology (ISMo) 2019}, CONFERENCE_PLACE = {Université de Paris, Paris}, CONFERENCE_DATE = {25/9(2019, 27/9/2019}, } @ARTICLE{CARDILLO_2018_ARTICLE_CFMP_396348, AUTHOR = {Cardillo, F. A. and Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Deep Learning of Inflection and the Cell-Filling Problem}, YEAR = {2018}, ABSTRACT = {Machine learning offers two basic strategies for morphology induction: lexical segmentation and surface word relation. The first approach assumes that words can be segmented into morphemes. Inferring a novel inflected form requires identification of morphemic constituents and a strategy for their recombination. The second approach dispenses with segmentation: lexical representations form part of a network of associatively related inflected forms. Production of a novel form consists in filling in one empty node in the network. Here, we present the results of a task of word inflection by a recurrent LSTM network that learns to fill in paradigm cells of incomplete verb paradigms. Although the task does not require morpheme segmentation, we show that accuracy in carrying out the inflection task is a function of the model's sensitivity to paradigm distribution and morphological structure.}, KEYWORDS = {Deep Learning, LSTM, Cell-Filling Problem}, PAGES = {57-75}, URL = {https://publications.cnr.it/doc/396348}, VOLUME = {4}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{FERRO_2018_ARTICLE_FMP_397012, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Discriminative word learning is sensitive to inflectional entropy}, YEAR = {2018}, ABSTRACT = {Psycholinguistic evidence based on inflectional and derivational word families has emphasised the combined role of Paradigm Entropy and Inflectional Entropy in human word processing. Although the way frequency distributions affect behavioural evidence is clear in broad outline, we still miss a clear algorithmic model of how such a complex interaction takes place and why. The main challenge is to understand how the local interaction of learning and processing principles in morphology can result in global effects that require knowledge of the overall distribution of stems and affixes in word families. We show that principles of discriminative learning can shed light on this issue. We simulate learning of verb inflection with a discriminative recurrent network of specialised processing units, whose level of temporal connectivity reflects the frequency distribution of input symbols in context. We analyse the temporal dynamic with which connection weights are adjusted during discriminative learning, to show that self-organised connections are optimally functional to word processing when the distribution of inflected forms in a paradigm (Paradigm Entropy) and the distribution of their inflectional affixes across paradigms (Inflectional Entropy) diverge minimally.}, KEYWORDS = {discriminative learning, word processing, recurrent neural networks, relative entropy}, PAGES = {307-327}, URL = {https://www.rivisteweb.it/doi/10.1418/91871}, VOLUME = {XVII}, DOI = {10.1418/91871}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @INCOLLECTION{PIRRELLI_2018_INCOLLECTION_P_398877, AUTHOR = {Pirrelli, V.}, TITLE = {Morphological Theory And Computational Linguistics}, YEAR = {2018}, ABSTRACT = {For decades, processing issues have taken centre stage in the debate on the theoretical foundations of linguistic morphology. The present chapter provides a computer-based, algorithmic view on these issues, ranging from the encoding of input data to the structure of output representations, going through the basic operations of word splitting, storage, access, retrieval, and assembly of intermediate representations.}, KEYWORDS = {word processing, word storage, computational morphology, lexical modelling, machine language learning, finite state technology, artificial neural networks}, PAGES = {573-593}, URL = {http://www.oxfordhandbooks.com/view/10.1093/oxfordhb/9780199668984.001.0001/oxfordhb-9780199668984-e-32?rskey=qZuY8Z\&result=9}, DOI = {10.1093/oxfordhb/9780199668984.013.32}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {978-0-19-966898-4}, BOOKTITLE = {The Oxford Handbook of Morphological Theory}, EDITOR = {Audring, J. and Masini, F.}, } @INPROCEEDINGS{FERRO_2018_INPROCEEDINGS_FCGMNCP_390504, AUTHOR = {Ferro, M. and Cappa, C. and Giulivi, S. and Marzi, C. and Nahli, O. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: Reading for Understanding}, YEAR = {2018}, ABSTRACT = {This paper focuses on motivation, objectives, design issues and preliminary results of ReadLet, an ICT platform for assessing reading efficiency in primary school children. Test data are discussed on a sample of 200 early graders, reading French, Italian and Standard Modern Arabic (SMA).}, KEYWORDS = {Reading, text comprehension, Specific Learning Disorders, multimodal signal processing, cloud computing, portable assistive technology}, PAGES = {404-409}, URL = {https://publications.cnr.it/doc/390504}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {IEEE-CIST2018 LED-ICT}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {21-27/10/2018}, } @INPROCEEDINGS{MARZI_2018_INPROCEEDINGS_MFNBBP_388016, AUTHOR = {Marzi, C. and Ferro, M. and Nahli, O. and Belik, P. and Bompolas, S. and Pirrelli, V.}, TITLE = {Evaluating Inflectional Complexity Crosslinguistically: a Processing Perspective}, YEAR = {2018}, ABSTRACT = {The paper provides a cognitively motivated method for evaluating the inflectional complexity of a language, based on a sample of "raw" inflected word forms processed and learned by a recurrent self-organising neural network with fixed parameter setting. Training items contain no information about either morphological content or structure. This makes the proposed method independent of both meta-linguistic issues (e.g. format and expressive power of descriptive rules, manual or automated segmentation of input forms, number of inflectional classes etc.) and language-specific typological aspects (e.g. word-based, stem-based or template-based morphology). Results are illustrated by contrasting Arabic, English, German, Greek, Italian and Spanish.}, KEYWORDS = {paradigm-based morphology, inflectional complexity, prediction-based processing, recurrent self-organising networks, Statistical And Machine Learning Methods, Language Modelling}, PAGES = {3860-3866}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/summaries/745.html}, VOLUME = {2018}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{CAPPA_2018_INPROCEEDINGS_CFGMNCP_396593, AUTHOR = {Cappa, C. and Ferro, M. and Giulivi, S. and Marzi, C. and Nahli, O. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: piattaforma ICT per valutare l'efficienza di lettura}, YEAR = {2018}, ABSTRACT = {ReadLet è una piattaforma ICT pensata per valutare accuratamente l'efficienza di lettura nei bambini della scuola primaria. Combina tecnologia ICT portatile e cloud-computing con una serie di moduli software, specifici per modalità di somministrazione. Questi, implementati come servizi web, includono: i) valutazione dell'elaborazione del testo e della leggibilità; ii) valutazione della velocità di lettura (ad alta voce e silente) e delle sue fluttuazioni); iii) valutazione della correttezza della decodifica ad alta voce; iv) valutazione della comprensione del testo (in lettura silente e da ascolto). Un prototipo della tecnologia ReadLet è stato sperimentato su circa 200 alunni (8-11 anni), che variano per stato socio-economico, lingua (italiana, francese, araba) e area geografica (Italia, Svizzera, Marocco). L'utilizzo del tablet per la lettura è stato percepito dai bambini come un'esperienza coinvolgente e piacevole. Gli insegnanti hanno trovato lo strumento facile da utilizzare e in grado di fornire maggiori informazioni rispetto agli strumenti tradizionali.}, KEYWORDS = {leggere per capire, disturbi del linguaggio, screening}, URL = {https://www.airipa.it/congresso/pluginfile.php/2781/mod_resource/content/1/Programma%20Congresso%20AIRIPA_Arezzo_dettagliato-3.pdf}, CONFERENCE_NAME = {XXVII Congresso Nazionale AIRIPA}, CONFERENCE_PLACE = {Arezzo (Italy)}, CONFERENCE_DATE = {28-29/09/2018}, } @INPROCEEDINGS{FERRO_2018_INPROCEEDINGS_FCGMCP_396591, AUTHOR = {Ferro, M. and Cappa, C. and Giulivi, S. and Marzi, C. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: an ICT platform for the assessment of reading efficiency in early graders}, YEAR = {2018}, ABSTRACT = {Reading is not just word decoding, but the joint product of decoding and deep linguistic comprehension [ 1 , 2 ]. Effective linguistic comprehension relies on language skills such as semantic and syntactic awareness. Both decoding and linguistic comprehension are necessary for reading comprehension, and neither is by itself sufficient [ 2 ]. However, current protocols for reading assessment measure decoding (reading accuracy and speed) and reading comprehension separately [ 3 , 4 , 5 ]. This does not allow evaluation of reading efficiency [ 6 ], defined as the ability to fully understand connected texts by minimising reading time, a cognitive ability that lies at the roots of students' academic achievement [ 8 , 7 ]. ReadLet is an ICT platform specifically designed to provide accurate, evidence-based assessment of reading efficiency in early grade children, by offering an ecological, non-invasive protocol for extensive data elicitation, storage and analysis. With ReadLet, early graders at school can read a one or two page text displayed on a tablet touchscreen, either silently or aloud. Children are asked to slide their finger across the words as they read, to guide directional tracking. After reading, the child is prompted with a few multiple-answer questions on text content presented one at a time, while the text remains displayed on the screen for the child to be able to retrieve relevant information. In the process, the tablet keeps track of time-aligned multimodal data: voice recording, finger sliding time, time of reading, time of question answering, and number of correct answers. Data are recorded, stored locally, sent to the ReadLet server through an internet connection, and processed remotely by a battery of cloud-based services, analysing data automatically to produce a detailed quantitative signature of each reading session. A server-based database aggregates anonymised data to make them available for specialists. Also individual's longitudinal profiles are stored, for them be queried and inspected upon authorised access. The platform combines portable ICT technology and cloud computing with a number of modality-specific software modules, implemented as web services including: i) a text processing and readability assessment service, consisting in a battery of tools for automated linguistic annotation of written texts and a machine-learning component assigning a readability score to annotated texts [ 9 ]; ii) a finger touch processing service aligning the child's finger sliding with the written text and measuring speed fluctuations; iii) a speech processing and decoding assessment service, aligning the acoustic record of child's reading with the written text and assessing correctness of recoding [ 10 ]. At the time of writing, the platform includes the first two modules only. Preliminary testing of a prototype version of ReadLet technology with a population of about 200 pupils aged 8 to 11, both male and female, varying for socio-economic status, language (Italian, French and Arabic) and geographical area (Italy and Morocco), showed that children are extremely responsive to using a tablet for reading, and very easy to engage in what they perceive as an enjoyable experience. We expect online databases of automatically classified cross-sectional and longitudinal data, accurate statistical modelling and developmental trends of reading literacy to help education professionals and clinical specialists assess the level of reading skills reached by the child, and decide which intervention programmes and measures are most appropriate. While information technology cannot and should not supplant the role and professional judgement of teachers and therapists, the project intends to provide portable tools, models and data for timely screening and daily management of reading difficulties and disorders.}, KEYWORDS = {reading efficiency, decoding, comprehension, language specific disorders}, PAGES = {61-61}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, CONFERENCE_PLACE = {Edmonton, Alberta (Canada)}, CONFERENCE_DATE = {25-28/09/2018}, } @INPROCEEDINGS{MARZI_2018_INPROCEEDINGS_MFP_396356, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Is inflectional irregularity dysfunctional to human processing?}, YEAR = {2018}, ABSTRACT = {Regularly inflected verb forms are classically associated with the formal transparency and predictability of their internal constituents [ 1 , 2 , 3 ]. Transparency ensures that full forms can be segmented uniquely into their internal constituents: as in walk-s/walk-ed. Predictability allows for a speaker to fill in an empty paradigm cell, using information from other known forms of the same lexical paradigm and its inflection macro-class. From this perspective, irregulars appear to be dysfunctional to the human processing system, as they make it hard to infer - say - bought from buy , or segment bought appropriately into its constituent parts. Likewise, an influential psycholinguistic tradition relegates irregulars to the lexical store, whereas regulars are segmented by rules into their simpler constituents [ 4 , 5 ]. Here, we offer a few reasons for questioning this view. First, transparency and predictability are not dichotomous notions. Secondly, their influence on processing is not unidirectional. Unpredictable stems in irregularly inflected forms of complex inflectional systems provide a lot of processing information, by dynamically constraining the number of possible alternative endings during serial processing. Thirdly, acquisition of word inflection does not consist in associating co-occurring cues and outcomes, but in discriminating between multiple cues that are constantly in competition for their predictive value for a given outcome. We present the results of a few computer simulations with Self-organising Recurrent Neural Networks (TSOMs, [ 8 , 9 ]) that learn how to inflect high-frequency verb paradigms in 6 languages: English, German, Italian, Modern Greek, Modern Standard Arabic and Spanish. After training, each TSOM was tested on a word recognition (serial recoding) and a word production (serial recall) task, and results were analysed with generalised regression models. Processing uncertainty is differently apportioned on regulars and irregulars, depending on the nature of the processing task. While irregulars are harder to produce when they are unknown because they typically have fewer neighbours than regulars have, they are readily accessed once they are acquired, for exactly the same reason. Our data are in line with psycholinguistic evidence [ 10 , 11 ] that lexical processing is paced by two types of uniqueness point: Marslen-Wilson's Uniqueness Point (UP), distinguishing unrelated onset-overlapping words [ 12 ], and the Complex Uniqueness Point (CUP), distinguishing paradigmatically-related words [ 11 ]. Late UPs are inhibitory and elicit prolonged reaction times in acoustic word recognition, explaining an early delay in word recognition of irregular stems. Similarly, late CUPs are inhibitory, and this accounts for a slowdown in the processing advantage of regulars, compared to irregulars, after UP. These structural factors interact in a variety of ways and concurrently affect human processing, to show that irregularly-inflected forms may in fact reflect communicative and processing constraints of the word processor. They provide strong evidence against a processing architecture that assumes compartmentalized, independent processing routes for some specific combinations of these factors (e.g. a rule-based route for a combination of transparency and predictability, and a memory-based route for all other combinations). In addition, they seem incompatible with Bayesian approaches to auditory word comprehension ignoring a word's internal structure [ 13 ]. We suggest that a different design of the human language processor, based on a computational architecture integrating memory and processing as two different dynamics of the same underlying mechanism, can shed light on the complexity of inflection, and vindicate the role of irregular inflection in the system.}, KEYWORDS = {inflectional processing, temporal self organizing maps, letter prediction, morpheme boundary}, PAGES = {60-60}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, CONFERENCE_PLACE = {Edmonton, Alberta (Canada)}, CONFERENCE_DATE = {25-28/09/2018}, } @INPROCEEDINGS{PIRRELLI_2018_INPROCEEDINGS_P_399032, AUTHOR = {Pirrelli, V.}, TITLE = {NLP-based assessment of reading efficiency in early grade children}, YEAR = {2018}, ABSTRACT = {Assessing reading skills is a laborious and time-consuming task, which requires monitoring a variety of interlocked abilities, ranging from accurate word rendering, reading fluency and lexical access, to linguistic comprehension, and interpretation, management and inference of complex events in working memory. No existing software, to our knowledge, is able to cover and integrate reading performance monitoring, instant feedback, personalised potentiation and intelligent decision support to teachers and speech therapists, assessment of response to intervention. NLP and ICT technologies can make such an ambitious platform an achievable target.}, KEYWORDS = {NLP-based methods, reading efficiency, early graders}, PAGES = {5-6}, URL = {http://dcl.bas.bg/clib/wp-content/uploads/2018/07/CLIB_2018_Proceedings_v2_final.pdf}, CONFERENCE_NAME = {Computational Linguistics in Bulgaria}, CONFERENCE_PLACE = {Sofia, Bulgaria}, CONFERENCE_DATE = {27-29/05/2018}, } @INPROCEEDINGS{PIRRELLI_2018_INPROCEEDINGS_PFMGSM_396353, AUTHOR = {Pirrelli, V. and Ferro, M. and Marzi, C. and Gagné, C. and Spalding, T. and Marelli, M.}, TITLE = {Processing compounds: what frequency (alone) cannot explain}, YEAR = {2018}, ABSTRACT = {Observed elevation in typing latency for the initial letter of the second constituent of an English compound, compared with the typing time of the final letter of the first constituent (Gagné \& Spalding 2016), suggests that both compounds ( snowball ) and pseudo-compounds ( carpet ) are decomposed but also that full form representations are available in the lexical store. To gain further insight into the lexical representations underlying typing, we used computational modelling. In particular, we used superpositional models of word memory, based on Self-Organising Recurrent Maps (TSOMs) (Ferro et al. 2016; Marzi et al. 2016), where both simple and compound words are processed (and stored) using the same pool of processing (and memory) resources, to model the elevation in typing time at the constituent boundary and the rate of typing. In addition, we also considered models based in the Compositional Distributional Semantics framework (CAOSS, Marelli et al. 2017), to simulate independent effects of semantic transparency on compound typing (Gagné \& Spalding 2016). Due to co-activation and competition between compounds and their constituent words in TSOMs, levels of activation of processing nodes per letter positions appear to reflect degrees of context-sensitive predictability: the higher the level, the more expected the letter in that position. In English compounds, activation levels appeared to exhibit a characteristically U-shaped pattern, with min values centred on the constituent boundary. A similar pattern was found for pseudo-compounds, which nonetheless present a less pronounced U-shaped pattern and a higher activation value at the morpheme boundary than compounds do. The difference is in line with the higher speed-up rate in typing pseudo-compounds than compounds reported in Gagné and Spalding (2016). TSOMs were trained on letter-based representations, so computer experiments could simulate peripheral effects of serial processing of compound structure before lexical access. To investigate post-lexical issues, we also tested computational models of generation of the meanings of novel compounds based on CAOSS, which proved to be able to account for well-established relational effects in compound processing (Gagné 2001; Gagné \& Shoben 1997) with an unsupervised data-driven framework (Marelli et al. 2017). We ran a mixed-effects regression analysis of the data in Gagné and Spalding (2016) using vector-semantics estimates and TSOM activation levels to predict typing time for the initial letter of the second constituent. There was a negative effect of TSOM letter activation levels: i.e. the more active a letter node is, the faster a subject is at typing the letter ( t =-2.7 p =.007). Also, there was a positive effect of CAOSS-based compositionality estimates: i.e. the more easily a compound's lexicalized meaning can be obtained through compositional operations on single constituent vectors, the slower participants were at typing the first letter of the second constituent ( t =2.4, p =.017). These results have interesting implications for an integrative computational architecture accounting for the whole range of experimental evidence reported by Gagné and Spalding (2016). In particular we will focus on evidence of a stronger competition (and longer typing time) in Transparent-Transparent and Transparent-Opaque compounds, vs. Opaque-Transparent compounds, which gives an indication of a non-trivial interaction between semantic compositionality and serial processing effects.}, KEYWORDS = {compound processing, Temporal Self-organizing Map, letter production latency, constituent boundary}, PAGES = {60-60}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, CONFERENCE_PLACE = {Edmonton (Canada)}, CONFERENCE_DATE = {25-28/09/2018}, } @ARTICLE{BOMPOLAS_2017_ARTICLE_BFMCP_380237, AUTHOR = {Bompolas, S. and Ferro, M. and Marzi, C. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {For a performance-oriented notion of regularity in inflection: the case of Modern Greek conjugation}, YEAR = {2017}, ABSTRACT = {Paradigm-based approaches to word processing/learning assume that word forms are not acquired in isolation, but through associative relations linking members of the same word family (e.g. a paradigm, or a set of forms filling the same paradigm cell). Principles of correlative learning offer a set of equations that are key to modelling this complex dynamic at a considerable level of detail. We use these equations to simulate acquisition of Modern Greek conjugation, and we compare the results with evidence from German and Italian. Simulations show that different Greek verb classes are processed and acquired differentially, as a function of their degrees of formal transparency and predictability. We relate these results to psycholinguistic evidence of Modern Greek word processing, and interpret our findings as supporting a view of the mental lexicon as an emergent integrative system.}, KEYWORDS = {paradigm-based morphology, gradient (ir)regularity, recurrent self-organisng networks}, PAGES = {77-92}, URL = {http://www.ai-lc.it/IJCoL/v3n1/IJCOL_3_1_5_bompolas_et_al.pdf?v=2a47ad90f2ae}, VOLUME = {3}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @EDITORIAL{PIRRELLI_2017_EDITORIAL_PZ_381161, AUTHOR = {Pirrelli, V. and Zarghili, A.}, TITLE = {Arabic Natural Language Processing: Models, systems and applications}, YEAR = {2017}, KEYWORDS = {Natural Language Processing, Standard Modern Arabic}, PAGES = {A1-A3}, URL = {https://www.sciencedirect.com/science/article/pii/S1319157817301155}, VOLUME = {29}, DOI = {10.1016/j.jksuci.2017.04.004}, PUBLISHER = {Elsevier (Amsterdam, Paesi Bassi)}, ISSN = {2213-1248}, BOOKTITLE = {Journal of King Saud University. Computer and information sciences (Online)}, } @INPROCEEDINGS{CARDILLO_2017_INPROCEEDINGS_CFMP_381090, AUTHOR = {Cardillo, F. A. and Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {How "deep" is learning word inflection?}, YEAR = {2017}, ABSTRACT = {Machine learning offers two basic strategies for morphology induction: lexical segmentation and surface word relation. The first one assumes that words can be segmented into morphemes. Inducing a novel inflected form requires identification of morphemic constituents and a strategy for their recombination. The second approach dispenses with segmentation: lexical representations form part of a network of associatively related inflected forms. Production of a novel form consists in filling in one empty node in the network. Here, we present the results of a recurrent LSTM network that learns to fill in paradigm cells of incomplete verb paradigms. Although the process is not based on morpheme segmentation, the model shows sensitivity to stem selection and stem-ending boundaries.}, KEYWORDS = {LSTM, Morphology induction, Cognitive modelling}, PAGES = {77-82}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85037368972\&origin=inward}, VOLUME = {2006}, DOI = {10.4000/books.aaccademia.2314}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {978-88-99982-76-8}, CONFERENCE_NAME = {Fourth Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {11-13/12/2017}, BOOKTITLE = {Proceedings of the Fourth Italian Conference on Computational Linguistics (CLiC-it 2017)}, EDITOR = {Basili, R. and Nissim, M. and Satta, G.}, } @INPROCEEDINGS{PIRRELLI_2017_INPROCEEDINGS_P_398875, AUTHOR = {Pirrelli, V.}, TITLE = {Co-activation and competition effects in lexical storage and processing}, YEAR = {2017}, ABSTRACT = {According to traditional wisdom in Linguistics, morphologically simple words reside in the mental lexicon, a kind of brain dictionary that contains unpredictable mappings between lexical features. Here I illustrate some of the defining features of an alternative view of the language architecture, where computation and storage are just the short-term and long-term dynamics of the same underlying process. Empirical results of a computational model of this view are reported and general implications for a theory of the lexicon are discussed.}, KEYWORDS = {Mental Lexicon, Morphology, Human Language Processing, artificial neural networks, lexical self-organization}, PAGES = {1-21}, URL = {https://picgl4.files.wordpress.com/2015/11/4-paper_1_pirrelli.pdf}, CONFERENCE_NAME = {4th Patras International Conference of Graduate Students in Linguistics}, CONFERENCE_PLACE = {Patras, Greece}, CONFERENCE_DATE = {20-22/05/ 2016}, } @INPROCEEDINGS{BOMPOLAS_2017_INPROCEEDINGS_BMFCPR_381125, AUTHOR = {Bompolas, S. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V. and Ralli, A.}, TITLE = {Transparency and predictability in Modern Greek conjugation: Implications for models of word processing}, YEAR = {2017}, ABSTRACT = {We argue that the Greek evidence calls for a substantial revision of the clear-cut interaction between transparency/predictability and regularity, to make room for a more process-oriented notion of regularity. According to this view, regularity is no longer an epiphenomenon of the design of the human language faculty and the purported dualism between rule-based and memory-based routes, but the graded result of the varying interaction of several structural factors concurrently affecting the human word processor.}, KEYWORDS = {Inflectional regularity, Word Processing, Modern Greek Conjugation}, PAGES = {17-19}, URL = {http://www.lilec.it/mmm/wp/wp-content/uploads/2017/02/Book-of-abstracts_MMM11_Final.pdf}, CONFERENCE_NAME = {MMM 11: 11th Mediterranean Morphology Meeting}, CONFERENCE_PLACE = {Cyprus}, CONFERENCE_DATE = {22-25/06/2017}, } @INPROCEEDINGS{PIRRELLI_2017_INPROCEEDINGS_P_381136, AUTHOR = {Pirrelli, V.}, TITLE = {Storage vs. Processing in Models of Word Inflection. A Neuro-computational Hebbian Perspective}, YEAR = {2017}, ABSTRACT = {The advent of connectionism in the 80's popularised the idea that the lexical processor consists of a network of parallel processing units selectively firing in response to sensory stimuli. In the light of these assumptions, the most important contribution of connectionism to the theoretical debate on lexical modelling at the time was the utter rejection of the widely accepted idea that word recognition and production require a dichotomous choice between storage and processing. However, in spite of the prima facie psycho-computational allure of this view of the lexicon, early connectionist models also embraced a number of unsatisfactory assumptions about word learning and processing. More recently, a growing number of approaches to inflection in both Psycholinguistics and Theoretical Linguistics developed the view that surface word relations represent a fundamental domain of morphological competence. Learning the morphology of a language amounts to acquiring relations between fully stored lexical forms, which are concurrently available in the speaker's mental lexicon and jointly facilitate processing of morphologically related forms through patterns of emergent self-organisation. This novel view presupposes an integrative language architecture, where storage and processing, far from being conceived of as insulated and poorly interacting modules, are the short-term and the long-term dynamics of the same underlying process of adaptive specialisation of synaptic connections. This view, upheld by recent evidence of the neuro-anatomical bases of short-term and long-term memory processes, crucially hinges on Hebbian principles of synaptic plasticity, which are, in turn, in keeping with mathematical models of discriminative learning. I contend that integrative computer models of Hebbian language learning represent an exciting way forward in current neuro-computational research on word processing, and a persistently fertile legacy of the connectionist revolution.}, KEYWORDS = {Hebbian Learning, Recurrent Neural Networks, Word Inflection}, PAGES = {19-19}, URL = {https://indico.sissa.it/event/12/abstract-book.pdf}, CONFERENCE_NAME = {International Morphological Processing Conference (MoProc)}, CONFERENCE_PLACE = {Trieste}, CONFERENCE_DATE = {22-24/06/2017}, } @INPROCEEDINGS{PIRRELLI_2017_INPROCEEDINGS_PMFC_381117, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M. and Cardillo, F. A.}, TITLE = {Paradigm Relative Entropy and Discriminative Learning}, YEAR = {2017}, ABSTRACT = {In the present contribution, we show that principles of discriminative learning of symbolic time series go a long way in accounting for these effects, thus making an important contribution to our understanding of the human lexical processor and its sensitivity to word distributions both within and across paradigms.}, KEYWORDS = {Paradigm Entropy, Discriminative Learning, Mental Lexicon, Verb Inflection}, PAGES = {5}, URL = {http://w3.erss.univ-tlse2.fr/ParadigMo2017/program.html}, CONFERENCE_NAME = {ParadigMo 2017: First Workshop on Paradigmatic Word Formation Modeling}, CONFERENCE_PLACE = {Toulouse}, CONFERENCE_DATE = {19-20/06/2017}, } @ARTICLE{MARZI_2016_ARTICLE_MFCP_360723, AUTHOR = {Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {Effects of frequency and regularity in an integrative model of word storage and processing}, YEAR = {2016}, ABSTRACT = {Considerable evidence has accrued on the role of paradigms as both theoretical and cognitive structures regimenting the way words are processed and acquired. The evidence supports a view of the lexicon as an emergent integrative system, where word forms are concurrently and competitively stored as repeatedly successful processing patterns, and on-line processing crucially depends on the internal organisation of stored patterns.}, KEYWORDS = {Lexical access, word recall, serial processing, parallel activation, inflectional paradigms, mental lexicon}, PAGES = {79-114}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84986550295\&origin=inward}, VOLUME = {28}, PUBLISHER = {Pacini (Ospedaletto, Italia)}, ISSN = {1120-2726}, JOURNAL = {Rivista di Linguistica}, } @EDITORIAL{MARZI_2016_EDITORIAL_MP_360724, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Word knowledge and word usage: A Foreword}, YEAR = {2016}, ABSTRACT = {This special issue, together with its companion issue to appear in Lingue e Linguaggio, stems from the NetWordS Final Conference Word knowledge and word usage: representations and processes in the mental lexicon.* The conference, held on the 30th and 31st of March, and the 1st of April 2015 in Pisa, concluded the 4-year NetWordS project, the European Network of Word Structure funded by the European Science Foundation within the Research Networking Programme. In line with the highly multidisciplinary profile of NetWordS agenda, the conference offered a comprehensive and inclusive forum focussing on two main lines of lexical inquiry: (i) usage-based approaches to bootstrapping word form and structure (morpho-phonological and morpho-syntactic issues), including: acquisition of lexical categories, emergence of morphological structure, lexical memories, anticipatory prediction-based mechanisms of word recognition, word production, frequency-based models of lexical productivity, word encoding, models of lexical architecture, family-based effects in word processing, word reading and writing; (ii) usage-based approaches to word meanings (lexical semantics and pragmatics in morphologically simple and complex words), including: distributional semantics, compound interpretation, concept composition and coercion, conceptualization of perception and action, time and space in the lexicon, metonymy and metaphor, lexico-semantic relations, perceptual grounding and embodied cognition, context-based and encyclopedic knowledge, semantic association and categorization. The multidisciplinary focus on word knowledge and word usage promoted by the Conference led participants to openly discuss an impressive range of approaches and empirical data: priming and lexical decision in a number of contexts, distributional semantics and models of semantic composition, neural networks, machine learning and mathematical modelling of empirical evidence, as well as their neuro-biological and neuro-functional correlates.}, KEYWORDS = {word knowledge, word usage, mental lexicon, interdisciplinary approach, NetWordS}, PAGES = {3-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84986558643\&origin=inward}, VOLUME = {28. 1}, PUBLISHER = {Pacini (Pisa, ITA)}, } @EDITORIAL{MARZI_2016_EDITORIAL_MP_360725, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Word knowledge and word usage: A foreword}, YEAR = {2016}, ABSTRACT = {This special issue, together with its companion issue to appear in Italian Journal of Linguistics, stems from the NetWordS Final Conference "Word knowledge and word usage: representations and processes in the mental lexicon". The conference, held on the 30th and 31st of March, and the 1st of April 2015 in Pisa, concluded the 4-year NetWordS project, the European Network of Word Structure funded by the European Science Foundation within the Research Networking Programme. In line with the highly multidisciplinary profile of NetWordS agenda, the conference offered a comprehensive and inclusive forum focussing on two main lines of lexical inquiry: (i) usage-based approaches to bootstrapping word form and structure (morpho-phonological and morpho-syntactic issues), including: acquisition of lexical categories, emergence of morphological structure, lexical memories, anticipatory prediction-based mechanisms of word recognition, word production, frequency-based models of lexical productivity, word encoding, models of lexical architecture, family-based effects in word processing, word reading and writing; (ii) usage-based approaches to word meanings (lexical semantics and pragmatics in morphologically simple and complex words), including: distributional semantics, compound interpretation, concept composition and coercion, conceptualization of perception and action, time and space in the lexicon, metonymy and metaphor, lexico-semantic relations, perceptual grounding and embodied cognition, context-based and encyclopedic knowledge, semantic association and categorization. The multidisciplinary focus on word knowledge and word usage promoted by the Conference led participants to openly discuss an impressive range of approaches and empirical data: priming and lexical decision in a number of contexts, distributional semantics and models of semantic composition, neural networks, machine learning and mathematical modelling of empirical evidence, as well as their neuro-biological and neuro-functional correlates. It is widely acknowledged that looking at the same problem from different angles has an additive effect on the impact of current language research. Certainly more can be achieved, however, if, rather than simply adding more perspectives on the same subject, with individual research efforts staying within the boundaries of single knowledge domains, scholars manage to integrate them into a boundary-shifting methodological perspective. When psycholinguistic evidence from humans is successfully replicated algorithmically through a computational model implementing a few well-understood principles of time-series processing, we are in a position to empirically assess what input conditions favour memorisation and acquisition of symbolic strings by the model, and test these algorithmic predictions back on human subjects, thus going full circle. This may have a multiplicative effect on current research, providing not only mathematical modelling of present behavioural evidence, but amounting to fully explanatory mechanisms. Our current understanding of WHERE and WHEN some cognitive processes are implemented in the brain will be complemented by knowledge of WHAT information they rely on and HOW they integrate it. Other compelling examples of the full potential of cross-disciplinary integration can be found in the present volume and in the twin issue of Italian Journal of Linguistics. As a general point, we contend that only by putting single-domain acquisitions into the wider context of human communication, and developing an interdisciplinary framework whereby each specialist will take advantage of insights from other disciplines, we can make substantial progress in our understanding of the lexical roots of human verbal communication in real contexts. The edited selection of papers presented here provides a representative sample of the range of approaches debated at the NetWordS Pisa Conference, by way of illustration of how aspects of knowledge integration and methodological innovation can be put at the service of a better understanding of broad lexical issues.}, KEYWORDS = {word knowledge, word usage, interdisciplinary approach, mental lexicon, NetWordS}, PAGES = {3-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84978285090\&origin=inward}, VOLUME = {XV. 1}, DOI = {10.1418/83651}, PUBLISHER = {Il Mulino (Bologna, ITA)}, ISBN = {978-88-15-26226-4}, } @INPROCEEDINGS{BOMPOLAS_2016_INPROCEEDINGS_BMFCP_362297, AUTHOR = {Bompolas, S. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {Reassessing inflectional regularity in Modern Greek conjugation}, YEAR = {2016}, ABSTRACT = {Paradigm-based approaches to word processing/learning assume that word forms are not acquired in isolation, but through associative relations linking members of the same word family (e.g. a paradigm, or a set of forms filling the same paradigm cell). Principles of correlative learning offer a set of dynamic equations that are key to modelling this complex dynamic at a considerable level of detail. We use these dynamic equations to simulate acquisition of Modern Greek conjugation, and we compare the results with evidence from German and Italian. Simulations show that different Greek verb classes are processed and acquired differentially, depending on their degrees of formal transparency and predictability. We relate these results to psycholinguistic evidence on Modern Greek word processing, and interpret our findings as supporting a view of the mental lexicon as an emergent integrative system.}, KEYWORDS = {word processing, paradigm-based learning, morphological processing, Greek stem allomoprhy, Temporal Self-Organising Map}, PAGES = {72-77}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009242702\&origin=inward}, VOLUME = {1749}, DOI = {10.4000/books.aaccademia.1721}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {978-88-99982-08-9}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}, CONFERENCE_PLACE = {Napoli, Italy}, CONFERENCE_DATE = {05-07/12/2016}, BOOKTITLE = {CLiC-it \& EVALITA 2016-Proceedings of Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}, EDITOR = {Basile, P. and Corazza, A. and Monetmagni, S. and Nissim, M. and Patti, V. and Semeraro, G. and Sprugnoli, R.}, } @INPROCEEDINGS{FERRO_2016_INPROCEEDINGS_FCPGS_362349, AUTHOR = {Ferro, M. and Cardillo, F. A. and Pirrelli, V. and Gagné, C. L. and Spalding, T. L.}, TITLE = {Written word production and lexical self-organisation: evidence from English (pseudo)compounds}, YEAR = {2016}, ABSTRACT = {Elevation in typing latency for the initial letter of the second constituent of an English compound, relative to the latency for the final letter of the first constituent of the same compound, provides evidence that implementation of a motor plan for written compound production involves smaller constituents, in both semantically transparent and semantically opaque compounds. We investigate here the implications of this evidence for algorithmic models of lexical organisation, to show that effects of differential perception of the internal structure of compounds and pseudo-compounds can also be simulated as peripheral stages of lexical access by a self-organising connectionist architecture, even in the absence of morphosemantic information. This complementary evidence supports a maximizationof-opportunity approach to lexical modelling, accounting for the integration of effects of pre-lexical and lexical access.}, KEYWORDS = {compound, pseudo-compound, written word production, lexical self-organisation, temporal self organising map}, PAGES = {146-151}, URL = {http://ceur-ws.org/Vol-1749/}, VOLUME = {1749}, DOI = {10.4000/books.aaccademia.1775}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {9788899982546}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it 2016)}, CONFERENCE_PLACE = {Napoli (Italia)}, CONFERENCE_DATE = {5-6/12/2016}, BOOKTITLE = {Proceedings CLiC-it 2016}, EDITOR = {Basile, P. and Corazza, A. and Cutugno, F. and Montemagni, S. and Nissim, M. and Patti, V. and Semeraro, G. and Sprugnoli, R.}, } @ARTICLE{MARZI_2015_ARTICLE_MP_346413, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {A Neuro-Computational Approach to Understanding the Mental Lexicon}, YEAR = {2015}, ABSTRACT = {Human lexical knowledge does not appear to be organised to minimise storage, but rather to maximise processing efficiency. The way lexical information is stored reflects the way it is dynamically processed, accessed and retrieved. A detailed analysis of the way words are memorised, of the dynamic interaction between lexical representations and distribution and degrees of regularity in input data, can shed some light on the emergence of structures and relations within fully-stored words. We believe that a bottom-up investigation of low-level memory and processing functions can help understand the cognitive mechanisms that govern word processing in the mental lexicon. Neuro-computational models can play an important role in this inquiry, as they help understand the dynamic nature of lexical representations by establishing an explanatory connection between lexical structures and processing models dictated by the micro-functions of human brain. Starting from some linguistic, psycholinguistic and neuro-physiological evidence supporting a dynamic view of the mental lexicon as an integrative system, we illustrate Temporal Self Organising-Maps (TSOMs), artificial neural networks that can model such a view by memorising time series of symbolic units (words) as routinized patterns of short-term node activation. On the basis of a simple pool of principles of adaptive Hebbian synchronisation, TSOMs can perceive possible surface relations between word forms and store them by partially overlapping activation patterns, reflecting gradient levels of lexical specificity, from holistic to decompositional lexical representations. We believe that TSOMs offer an algorithmic model of the emergence of high-level, global and language-specific morphological structure through the working of low-level, language-aspecific processing functions, thus promising to bridge the persisting gap between high-level principles of grammar architecture (lexicon vs. rules), computational correlates (storage vs. processing) and low-level principles and localisations of brain functions. Extensions of the current TSOM architecture are envisaged and their theoretical implications are discussed.}, KEYWORDS = {Mental lexicon dynamic storage parallel distributed processing hebbian learning temporal self-organising maps}, PAGES = {493-535}, URL = {http://jcs.snu.ac.kr/jcs/issue/vol16/no4/05+Marzi+and+Pirrelli.pdf}, VOLUME = {16}, PUBLISHER = {Institute for cognitive science, Seoul national university (Seoul, Corea del Sud)}, ISSN = {1976-6939}, JOURNAL = {Journal of cognitive science (Seoul. Online)}, } @INCOLLECTION{PIRRELLI_2015_INCOLLECTION_PFM_330234, AUTHOR = {Pirrelli, V. and Ferro, M. and Marzi, C.}, TITLE = {Computational complexity of abstractive morphology}, YEAR = {2015}, ABSTRACT = {Abstractive and constructive approaches to word structure make radically different assumptions concerning nature and role of the building blocks that make up a speaker's morphological competence. In this contribution, we show that the two views are also computationally different. In particular, we contend that a number of problems arising in connection with a subsymbolic implementation of the constructive view (as epitomised by classical multi-layered perceptrons) are tackled effectively, or disappear altogether, in a neurally-inspired implementation of associative networks, resting on key-notions such as self-organization and emergence. A particular variant of Kohonen's Self-Organizing Map is introduced as a model to explore and assess the implications of an abstractive approach in terms of its computational complexity. Details of the model (Temporal Self-Organizing Map, TSOM) and experimental data are shown to illustrate the interplay between processing and storage in language acquisition.}, KEYWORDS = {Word processing, computational complexity, mental lexicon, dynamic memories, self-organisation, word structure, morphology}, PAGES = {141-166}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84938781714\&origin=inward}, DOI = {10.1093/acprof:oso/9780198723769.003.0008}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {978-0-19-872376-9}, BOOKTITLE = {Understanding and Measuring Mprphological Complexity}, EDITOR = {Baerman, M. and Brown, D. and Corbett, G. G.}, } @EDITORIAL{PIRRELLI_2015_EDITORIAL_PMF_329357, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, TITLE = {Proceedings of the NetWordS Final Conference on Word Knowledge and Word Usage: Representations and Processes in the Mental Lexicon}, YEAR = {2015}, ABSTRACT = {The international conference "Word Knowledge and Word Usage: Representations and processes in the mental lexicon" is the final outcome of 4 years of intense multi-disciplinary research networking and cooperation funded by the European Science Foundation within the framework of the NetWordS programme (May 2011 - April 2015). NetWordS' mission was to bring together experts of various research fields (from brain sciences and computing to cognition and linguistics) and of different theoretical inclinations, to advance the current awareness of theoretical, typological, psycholinguistic, computational and neurophysiological evidence on the structure and processing of words, with a view to developing novel research paradigms and bringing up a new generation of language scholars. The conference was intended to provide a first forum for assessing current progress of crossdisciplinary research on language architecture and usage, and discussing prospects of future synergy. People are known to memorise, parse and access words in a context-sensitive and opportunistic way, by caching their most habitual and productive processing patterns into routinized behavioural schemes. Speakers not only take advantage of token-based information such as frequency of individual, holistically stored words, but they are also able to organise stored words through paradigmatic structures (or word families) whose overall size and frequency is an important determinant of ease of lexical access and interpretation. Accordingly, lexical organisation is not necessarily functional to descriptive economy and minimisation of storage, but to more performance-oriented factors such as efficiency of memorisation, access and recall. Usage-based approaches to word processing lend support to this view, to promote explanatory frameworks that aim to investigate the stable correlation patterns linking distributional entrenchment of lexical units with productivity, internal structure and ease of interpretation. Ultimately, this is intended to establish a deep interconnection between performance-oriented,low-level lexical functions such as memorisation, rehearsal, access and recall, and their neuroanatomical correlates.}, KEYWORDS = {mental lexicon, linguistics, brain sciences, psycholinguistics, computing, cognition}, PAGES = {1-189}, URL = {http://ceur-ws.org/Vol-1347/}, VOLUME = {1347}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, } @INPROCEEDINGS{FERRO_2015_INPROCEEDINGS_FMP_331183, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Lexical parsability and morphological structure}, YEAR = {2015}, ABSTRACT = {A classical tenet in the psycholinguistic literature on the mental lexicon is that a parsed affix presents high activation levels (and thus contributes to activation spreading to other words with the same affix), and that such levels are tightly correlated with the affix productivity. In a number of influential papers, it has been suggested that parsability criteria interact with frequency to define morphological productivity in the lexicon. For example, the frequency of a derivative (e.g. government) relative to its base (govern) is shown to be a good predictor for parsability/productivity. The higher the frequency ratio, the more likely the morphological structure to be perceived, and the associated affix to be used productively. The present contribution intends to offer a computational explanatory basis for this correlational evidence, and assess its applicability to the acquisition of complex inflectional paradigms. In those languages, like Italian and German, whose inflection is stem-based rather than word-based, there is often no single paradigmatic form which can act as a base by being properly contained in all other inflected variants. Yet, it seems intuitive to suggest that verbs that are inflected for one paradigm cell only (e.g. neighbouring), are learned earlier and more easily but exhibit lower levels of perceived inflectional structure than verbs with richer paradigms. This appears to be in good accord with experimental evidence of time latencies in lexical decision, which are shown to correlate negatively with token frequency, paradigm size and paradigm entropy. Our simulations, based on Temporal Self-Organizing Maps (TSOMs) allow us to establish an interesting connection between inflectional parsability, frequency-based paradigm structure, and acquisitional constraints on the interaction between the human processor and working memory. Self-organising topological models of the mental lexicon can mimic the spatial and temporal organization of memory structures supporting the processing of symbolic sequences, and can provide an interesting framework for testing integrative accounts of lexical processing/acquisition as the complex result of general-purpose operations on word stimuli (e.g. working memory, long-term storage, sensory-motor mapping, rehearsal, unit integration, unit analysis, executive control, time-series processing), in line with recent acquisitions on the neuro-functional architecture of the perisylvian language network in the left hemisphere of human brain. Simulations of the incremental acquisition of "mini-paradigms" (small islands of morphological contrast encompassing up to three different forms for the same verb support the hypothesis that perception of structure (parsability) and morphological productivity strongly correlate in the inflectional lexica of German and Italian. In particular, by monitoring longitudinal progress in storage and generalisation of differently distributed inflectional paradigms in the two languages, we show that: i) high-frequency forms are stored and accessed significantly earlier than low-frequency forms; ii) deeply entrenched but paradigmatically isolated forms tend to block usage of other forms in the same paradigm; iii) low-frequency evenly distributed (highly entropic) intra-paradigmatic forms are acquired later but are easily extended. Our investigation credits the proposed computational framework with psycholinguistic plausibility, and grounds parsability-based models of morphological productivity on a specific, explicit proposal of lexical architecture. This provides an explanatory basis for both psycholinguistic and linguistic accounts of morphological structure, and offers an intermediate framework for scientific inquiry bridging the gap between linguistic units and functional units in neurosciences. Finally, it makes the interesting suggestion that principles of morpheme-based organisation of the mental lexicon are compatible with a learning strategy requiring memorisation of full forms.}, KEYWORDS = {morphological structure, word processing, token/type frequency}, PAGES = {22-37}, URL = {http://mmm.lis.upatras.gr/index.php/mmm/issue/view/293/showToc}, PUBLISHER = {Università degli Studi di Bologna (Bologna, Italia)}, ISSN = {1826-7491}, CONFERENCE_NAME = {Morphology and Semantics-Ninth Mediterranean Morphology Meeting}, CONFERENCE_PLACE = {Dubrovnik (Croatia)}, CONFERENCE_DATE = {15-18/09/2013}, BOOKTITLE = {Morphology and Semantics}, EDITOR = {Audring, J. and Koutsoukos, N. and Masini, F. and Raffaelli, I.}, } @INPROCEEDINGS{MARZI_2015_INPROCEEDINGS_MFP_329352, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Lexical emergentism and the "frequency-by-regularity" interaction}, YEAR = {2015}, ABSTRACT = {In spite of considerable converging evidence of the role of inflectional paradigms in word acquisition and processing, little efforts have been put so far into providing detailed, algorithmic models of the interaction between lexical token frequency, paradigm frequency, paradigm regularity. We propose a neurocomputational account of this interaction, and discuss some theoretical implications of preliminary experimental results.}, KEYWORDS = {morphological strucutre, frequency distribution, temporal self-orgabnising maps}, PAGES = {37-41}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84927156830\&origin=inward}, VOLUME = {1347}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {NetWordS Final Conference on Word Knowledge and Word Usage: Representations and Processes in the Mental Lexicon}, CONFERENCE_PLACE = {Pisa (Italy)}, CONFERENCE_DATE = {30-31/03 01/04 2015}, BOOKTITLE = {Word Knowledge and Word Usage 2015}, EDITOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, } @INPROCEEDINGS{PIRRELLI_2015_INPROCEEDINGS_PNBDM_333414, AUTHOR = {Pirrelli, V. and Nahli, O. and Boschetti, F. and Del Gratta, R. and Marzi, C.}, TITLE = {Computational Linguistics and Language Physiology: Insights from Arabic NLP and Cooperative Editing}, YEAR = {2015}, ABSTRACT = {Computer processing of written Arabic raises a number of challenges to traditional parsing architectures on many levels of linguistic analysis. In this contribution, we review some of these core issues and the demands they make, to suggest different strategies to successfully tackle them. In the end, we assess these issues in connection with the behaviour of neuro-biologically inspired lexical architectures known as Temporal Self-Organising Maps. We show that, far from being language-specific problems, issues in Arabic processing can shed light on some fundamental characteristics of the human language processor, such as structure-based lexical recoding, concurrent, competitive activation of output candidates and dynamic selection of optimal solutions.}, KEYWORDS = {Non-concatenative morphology, Optical Character Recognition, WordNet, Temporal Self-organising Maps, Mental Lexicon, Language neuro-physiology}, PAGES = {1-8}, URL = {http://dl.acm.org/citation.cfm?id=2802612}, DOI = {10.1145/2802612.2802637}, ISBN = {978-1-4503-3295-8}, CONFERENCE_NAME = {Third AIUCD Annual Conference-Humanities and Their Methods in the Digital Ecosystem}, CONFERENCE_PLACE = {Bologna (IT)}, CONFERENCE_DATE = {18-19/09/2014}, BOOKTITLE = {Third AIUCD Annual Conference-Humanities and Their Methods in the Digital Ecosystem}, EDITOR = {Tomasi, F. and Del Turco, R. R. and Tammaro, A. M.}, } @ARTICLE{CHERSI_2014_ARTICLE_CFPP_283372, AUTHOR = {Chersi, F. and Ferro, M. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Topological Self-Organization and Prediction Learning Support Both Action and Lexical Chains in the Brain}, YEAR = {2014}, ABSTRACT = {A growing body of evidence in cognitive psychology and neuroscience suggests a deep interconnection between sensory-motor and language systems in the brain. Based on recent neurophysiological findings on the anatomo-functional organization of the fronto-parietal network, we present a computational model showing that language processing may have reused or co-developed organizing principles, functionality, and learning mechanisms typical of premotor circuit. The proposed model combines principles of Hebbian topological self-organization and prediction learning. Trained on sequences of either motor or linguistic units, the network develops independent neuronal chains, formed by dedicated nodes encoding only context-specific stimuli. Moreover, neurons responding to the same stimulus or class of stimuli tend to cluster together to form topologically connected areas similar to those observed in the brain cortex. Simulations support a unitary explanatory framework reconciling neurophysiological motor data with established behavioral evidence on lexical acquisition, access, and recall.}, KEYWORDS = {Motor chains, Lexical chains, Serial working memory, Computational modeling, Self-organizing maps, Somatotopic organization, Prediction}, PAGES = {476-491}, URL = {http://onlinelibrary.wiley.com/doi/10.1111/tops.12094/abstract?deniedAccessCustomisedMessage=\&userIsAuthenticated=false}, VOLUME = {6}, DOI = {10.1111/tops.12094}, PUBLISHER = {Cognitive Science Society, Inc (Hoboken, NJ, Stati Uniti d'America)}, ISSN = {1756-8757}, JOURNAL = {Topics in cognitive science (Print)}, } @ARTICLE{MARZI_2014_ARTICLE_MFP_287289, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Morphological structure through lexical parsability}, YEAR = {2014}, ABSTRACT = {The emergence of morphological structure in lexical acquisition is analysed in the computational framework of Temporal Self-Organising Maps (TSOMs), to provide an explanatory basis for both psycholinguistic and linguistic accounts of lexical parsability. The investigation we propose is grounded on the hypothesis that perception of morphological structure (parsability) and frequency strongly correlate in the acquisition of inflectional paradigms. Analysis of experimental results of word acquisition obtained by artificially varying training conditions, allows us to understand developmental competition between fully-inflected word forms, and to investigate a hierarchy of frequency effects. The computational and theoretical implications of such a memory-based view of the relationship between frequency and perception, and its potential to account}, KEYWORDS = {inflectional paradigms, morphological structure, token/type frequency, word processing}, PAGES = {263-290}, URL = {http://www.rivisteweb.it/doi/10.1418/78410}, VOLUME = {XIII}, DOI = {10.1418/78410}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{PIRRELLI_2014_ARTICLE_P_288043, AUTHOR = {Pirrelli, V.}, TITLE = {Review of "Computational Paralinguistics: Emotion, Affect and Personality in Speech and Language Processing" (by Schuller & Batliner, Wiley Publishing 2013)}, YEAR = {2014}, KEYWORDS = {Paralinguistics, Pragmatics, Language usage}, URL = {http://www.computingreviews.com/review/review_review.cfm?review_id=142608}, PUBLISHER = {Association for Computing Machinery (New York, N. Y, Stati Uniti d'America)}, ISSN = {1530-6585}, JOURNAL = {Computing reviews (Online)}, } @EDITORIAL{ELMOHAJIR_2014_EDITORIAL_EACAEPZE_330677, AUTHOR = {El Mohajir, M. and Al Achhab, M. and Chahhou, M. and Arioua, M. and El Mohajir, B. and Pirrelli, V. and Zarghili, A. and El Far, M.}, TITLE = {Proceedings of IEEE-CiST14-Third IEEE International Colloquium in Information Science and Technology (CIST)}, YEAR = {2014}, ABSTRACT = {The 3rd international IEEE Colloquium on Information Science and Technology (CIST'14) is part of the IEEE CONFERENCE SERIES that are held in Morocco, and is sponsored by the IEEE Morocco Section and the IEEE Morocco Computer \& Communication Joint Chapter, and the UAE IEEE Student Branch. The 2014 edition was organized in collaboration with the Faculty of Sciences of Tetuan, the national school of applied sciences of Tetuan and the University of Abdelmalek Essaadi. IEEE CIST is emerging as a key annual event that aims to serve as a forum to promote the exchange of the latest advances achieved by IT researchers, IT decision makers, IT managers, application designers and software engineers in the domain of information science and related technology. Computing challenges, models, applications and IT solutions will be discussed from the perspectives of academia, industry and government. In addition to the main conference topics, IEEE CIST will also provide a platform for supporting innovative and original contributions in three complementary disciplines that are: Arabic natural language processing, Information and multimedia processing and Internet of Things. We would like to extend our most sincere thanks and gratitude to the keynote speakers of IEEE CIST'14 for their important added value to this edition and to the Scientific Committee Members who helped us in the review process. We would like also to express our thanks to the IEEE Computer Society for their support through their Distinguished Lecturers Programs. We are also very glad to express our most sincere gratitude for the organizing committee members for their full dedication and professional organization of this edition. The success of this colloquium will be mainly attributed to the authors who contributed with their posters and talks. We hope that CIST will continue to offer a privileged context for participants to develop new ways and methods to achieve our objectives in advancing our research and projects. We can together achieve more and face more efficiently the challenges of the current millennium.}, PAGES = {440}, URL = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6996097}, VOLUME = {CFP1467R-ART}, DOI = {10.1109/CIST.2014.7016582}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-4799-5979-2}, } @EDITORIAL{PIRRELLI_2014_EDITORIAL_PR_300048, AUTHOR = {Pirrelli, V. and Raffaelli, I.}, TITLE = {Special Issue of Suvremena Lingvistika}, YEAR = {2014}, PAGES = {127-235}, URL = {https://publications.cnr.it/doc/300048}, PUBLISHER = {Croatian Philological Society (Zagreb, HRV)}, } @INPROCEEDINGS{LYDING_2014_INPROCEEDINGS_LSBBCDDLP_289308, AUTHOR = {Lyding, V. and Stemle, E. and Borghetti, C. and Brunello, M. and Castagnoli, S. and Dell'Orletta, F. and Dittmann, H. and Lenci, A. and Pirrelli, V.}, TITLE = {The PAISÀ Corpus of Italian Web Texts}, YEAR = {2014}, ABSTRACT = {PAIS`A is a Creative Commons licensed, large web corpus of contemporary Italian. We describe the design, harvesting, and processing steps involved in its creation.}, PAGES = {36-43}, URL = {http://aclweb.org/anthology/W14-04}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, CONFERENCE_NAME = {Corpus annotation, Tree-bank, Corpus design, Corpus harvesting}, CONFERENCE_PLACE = {Gothenburg. Sweden}, CONFERENCE_DATE = {April 26, 2014}, BOOKTITLE = {Proceedings of the 9th Web as Corpus Workshop (WaC-9)}, EDITOR = {Bildhauer, F. and Schäfer, R.}, } @INPROCEEDINGS{PIRRELLI_2014_INPROCEEDINGS_PMF_290601, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, TITLE = {Two-dimensional Wordlikeness Effects in Lexical Organisation}, YEAR = {2014}, ABSTRACT = {The main focus of research on wordlikeness has been on how serial processing strategies affect perception of similarity and, ultimately, the global network of associative relations among words in the mental lexicon. Comparatively little effort has been put so far, however, into an analysis of the reverse relationship: namely, how global organisation effects influence the speakers' perception of word similarity and of words' internal structure. In this paper, we explore the relationship between the two dimensions of wordlikeness (the "syntagmatic" and the "paradigmatic" one), to suggest that the same set of principles of memory organisation can account for both dimensions.}, KEYWORDS = {wordlikeness, lexical access, word processing, frequency, memory}, PAGES = {301-305}, URL = {http://clic.humnet.unipi.it/it/atti.html}, VOLUME = {1}, DOI = {10.12871/CLICIT2014158}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics CLiC-it 2014 \& Fourth International Workshop EVALITA 2014}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {9-11/12/2014}, BOOKTITLE = {The First Italian Conference on Computational Linguistics-Proceedings}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BDMNP_288372, AUTHOR = {Boschetti, F. and Del Gratta, R. and Marzi, C. and Nahli, O. and Pirrelli, V.}, TITLE = {Modelli, metodi e strumenti per il trattamento automatico della lingua araba e per l'editing in ambienti collaborativi}, YEAR = {2014}, ABSTRACT = {La linguistica computazionale ha portato negli ultimi vent'anni a un profondo mutamento nello studio delle lingue e delle loro testimonianze scritte, spostando l'accento della ricerca da aspetti linguistico-formali all'uso linguistico in contesti comunicativi reali. Il presente contributo illustra l'impatto di questo cambio di prospettiva sullo studio della lingua araba, attraverso una rassegna di alcune attività di ricerca in corso presso l'Istituto di Linguistica Computazionale del CNR di Pisa: I. acquisizione dei testi arabi tramite Optical Character Recognition (OCR) e sviluppo di strumenti per la correzione manuale del testo in ambienti collaborativi; II. sviluppo di algoritmi e strumenti per l'analisi morfologica della lingua araba; III. analisi delle dinamiche di acquisizione del lessico arabo mediante architetture bio-computazionali; IV. sviluppo della WordNet dell'Arabo collegata a Princeton WordNet, ItalWordNet, LatinWordNet e alla nascente AncientGreek WordNet. Queste attivit( sono rivolte sia all'analisi delle caratteristiche linguistiche dell'arabo che allo studio della produzione letteraria araba e dei suoi rapporti storico-culturali con altre lingue. In particolare, il contributo intende illustrare la fertilità di un approccio metodologico che metta in relazione le dinamiche di acquisizione del lessico arabo, con la messa a punto di procedure di analisi ed edizione critica del testo e con i principi di organizzazione ontologica di una lingua ad alta produttività derivazionale.}, URL = {http://aiucd2014.unibo.it/book-of-abstracts.pdf}, CONFERENCE_NAME = {AIUCD 3rd annual conference}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {18-19 settembre 2014}, BOOKTITLE = {La metodologia della ricerca umanistica nell'ecosistema digitale-AIUCD 2014 Terzo convegno annuale}, EDITOR = {Rossi, F. and Tomasi, F.}, } @INPROCEEDINGS{PIRRELLI_2014_INPROCEEDINGS_P_288013, AUTHOR = {Pirrelli, V.}, TITLE = {Psycho-computational modelling of lexical access and organisation: what can we learn from Arabic?}, YEAR = {2014}, ABSTRACT = {Arabic morphology raises a formidable challenge to Markovian computational models of word processing, based on fixed-order memory chaining. Computational models of human short-term and long-term memory can help us considerably to shed light on the issues involved at the level of peripheral word processing (access lexical representations). According to this view, word processing (short-term activation) and lexical representations (entrenchment of habitual activation chains) are two sides of the same coin, as they involve the same levels of brain circuitry on a different time scale. Such an "integrative" view of the lexicon as a dynamic system will be possible only we are able to foster an increasing synergy of perspectives and scientific domains of inquiry: neurosciences, (psycho)linguistics and computing. Conventions of Arabic script are no accident (maliciously intended to trip up computer algorithms)! They rather reflect some fundamental dynamics of the way human brain processes language.}, URL = {https://publications.cnr.it/doc/288013}, CONFERENCE_NAME = {IEEE CIST'14-Innovative Systems and Technologies for the Future}, CONFERENCE_PLACE = {Tetouan, Marocco}, CONFERENCE_DATE = {20-22 ottobre 2014}, } @INPROCEEDINGS{PIRRELLI_2014_INPROCEEDINGS_P_288044, AUTHOR = {Pirrelli, V.}, TITLE = {Modèles psycho-computationnels du lexique mentale}, YEAR = {2014}, ABSTRACT = {Over the last decades, a growing body of evidence on the mechanisms governing lexical storage, access, acquisition and processing has raised a considerable challenge to traditional models of language architecture and word usage. By pulling together cognitive, neurofunctional and psycho-computational implications of these mechanisms, a new view of the lexicon-grammar architecture emerges, based on the dynamic interaction between storage and processing. We call this an "integrative" view of the mental lexicon}, KEYWORDS = {Mental Lexicon, Language Acquisition, Memory}, URL = {https://publications.cnr.it/doc/288044}, CONFERENCE_NAME = {Workshop International en Traitement Automatique de la Langue Arabe}, CONFERENCE_PLACE = {Fès, Marocco}, CONFERENCE_DATE = {8 maggio 2014}, } @INPROCEEDINGS{MARZI_2013_INPROCEEDINGS_MFP_287555, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Lexical parsability and morphological structure}, YEAR = {2013}, ABSTRACT = {A classical tenet in the psycholinguistic literature on the mental lexicon is that a parsed affix presents high activation levels (and thus contributes to activation spreading to other words with the same affix), and that such levels are tightly correlated with the affix productivity. In a number of influential papers, it has been suggested that parsability criteria interact with frequency to define morphological productivity in the lexicon. For example, the frequency of a derivative (e.g. government) relative to its base (govern) is shown to be a good predictor for parsability/productivity. The higher the frequency ratio, the more likely the morphological structure to be perceived, and the associated affix to be used productively. The present contribution intends to offer a computational explanatory basis for this correlational evidence, and assess its applicability to the acquisition of complex inflectional paradigms. In those languages, like Italian and German, whose inflection is stem-based rather than word-based, there is often no single paradigmatic form which can act as a base by being properly contained in all other inflected variants. Yet, it seems intuitive to suggest that verbs that are inflected for one paradigm cell only (e.g. neighbouring), are learned earlier and more easily but exhibit lower levels of perceived inflectional structure than verbs with richer paradigms. This appears to be in good accord with experimental evidence of time latencies in lexical decision, which are shown to correlate negatively with token frequency, paradigm size and paradigm entropy. Our simulations, based on Temporal Self-Organizing Maps (TSOMs) allow us to establish an interesting connection between inflectional parsability, frequency-based paradigm structure, and acquisitional constraints on the interaction between the human processor and working memory. Self-organising topological models of the mental lexicon can mimic the spatial and temporal organization of memory structures supporting the processing of symbolic sequences [8-10], and can provide an interesting framework for testing integrative accounts of lexical processing/acquisition as the complex result of general-purpose operations on word stimuli (e.g. working memory, long-term storage, sensory-motor mapping, rehearsal, unit integration, unit analysis, executive control, time-series processing), in line with recent acquisitions on the neuro-functional architecture of the perisylvian language network in the left hemisphere of human brain. Simulations of the incremental acquisition of "mini-paradigms" (small islands of morphological contrast encompassing up to three different forms for the same verb support the hypothesis that perception of structure (parsability) and morphological productivity strongly correlate in the inflectional lexica of German and Italian. In particular, by monitoring longitudinal progress in storage and generalisation of differently distributed inflectional paradigms in the two languages, we show that: i) high-frequency forms are stored and accessed significantly earlier than low-frequency forms; ii) deeply entrenched but paradigmatically isolated forms tend to block usage of other forms in the same paradigm; iii) low-frequency evenly distributed (highly entropic) intra-paradigmatic forms are acquired later but are easily extended. Our investigation credits the proposed computational framework with psycholinguistic plausibility, and grounds parsability-based models of morphological productivity on a specific, explicit proposal of lexical architecture. This provides an explanatory basis for both psycholinguistic and linguistic accounts of morphological structure, and offers an intermediate framework for scientific inquiry bridging the gap between linguistic units and functional units in neurosciences. Finally, it makes the interesting suggestion that principles of morpheme-based organisation of the mental lexicon are compatible with a learning strategy requiring memorisation of full forms.}, KEYWORDS = {morphological structure, word paradigms, frequency, human processor}, PAGES = {33-34}, URL = {http://mmm9.ffzg.unizg.hr/wp-content/uploads/2012/10/MMM_PROGRAM4.pdf}, CONFERENCE_NAME = {9th Mediterannean Morphology Meeting on "Morphology and Semantics" (9th MMM)}, CONFERENCE_PLACE = {Dubrovnik, Croatia}, CONFERENCE_DATE = {15-18/09/2013}, BOOKTITLE = {Morphology and Semantics-Books of Abstracts}, } @TECHREPORT{MARZI_2013_TECHREPORT_MP_287848, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {NetWordS: the European Network on Word Structure (2011-2015) ESF RNP Mid-Term Report (2011-2013)}, YEAR = {2013}, ABSTRACT = {By networking experts of various research fields (including but not limited to Theoretical Linguistics, Cognition, Brain Sciences and Computing) and of different theoretical inclinations, NetWordS has set itself the fundamental goal of advancing the current awareness of theoretical, typological, psycholinguistic, computational and neurophysiological evidence on the structure and processing of words, with a view to promoting novel methods of research and assessment for grammar architecture and language physiology. The programme is pursued through knowledge sharing, dissemination and transfer, organised over a four year period, from May 2011 to April 2015. Thanks to its highly interdisciplinary profile, the programme promotes training of young scientists through short visits, exchange grants and Summer Schools. It encourages the novel integration of existing methodologies, sets common research priorities, and fosters virtual cross-disciplinary laboratories, partnerships and research infrastructures.}, KEYWORDS = {Mental lexicon, Interdisciplinary approach, word representation, word processing}, URL = {https://publications.cnr.it/doc/287848}, } @ARTICLE{MARZI_2012_ARTICLE_MFP_217399, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Word alignment and paradigm induction}, YEAR = {2012}, ABSTRACT = {The variety of morphological processes attested in inflectional system of average complexity calls for adaptive strategies of word alignment. Prefixation, suffixation, stem alternation and combinations thereof pose severe problems to unsupervised algorithms of morphology induction. The paper analyses morphological generalisation as a by-product of flexible memory self-organisation strategies for word recoding. Our model endorses the hypothesis that lexical forms are memorised as full units. At the same time, lexical units are paradigmatically organised. We show that the overall amount of redundant morphological structure emerging from paradigm-based self-organisation has a clear impact on generalisation. This supports the view that issues of word representation and issues of word processing are mutually implied in lexical acquisition.}, KEYWORDS = {Morphological Generalisation Morphological Paradigms Self-Organising Memory Word coding and Processing}, PAGES = {251-274}, URL = {http://www.rivisteweb.it/doi/10.1418/38789}, VOLUME = {XI}, DOI = {10.1418/38789}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{MARZI_2012_ARTICLE_MP_217391, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Understanding the Architecture of the Mental Lexicon}, YEAR = {2012}, ABSTRACT = {The present collection stems from the 1st NetWordS Workshop "Understanding the architecture of the mental lexicon: Integration of existing approaches", held in the Pisa Research Area of the Italian National Research Council, in November 2011. "NetWordS: the European network on Word Structure in the languages of Europe" is the Research Networking Programme of the European Science Foundation launched in May 2011 with the ambitious goal of paving the way to the European interdisciplinary research agenda on the Mental Lexicon, with particular emphasis on the following three main challenges: - lexicon and rules in the grammar, - word knowledge and word use, - words and meanings.}, KEYWORDS = {Mental Lexicon, interdisciplinary approach}, PAGES = {101-105}, URL = {https://publications.cnr.it/doc/217391}, VOLUME = {XI}, DOI = {10.1418/38780}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @INCOLLECTION{PIRRELLI_2012_INCOLLECTION_PFC_136472, AUTHOR = {Pirrelli, V. and Ferro, M. and Calderone, B.}, TITLE = {Learning Paradigms in Time and Space: Computational Evidence from Romance Languages}, YEAR = {2012}, ABSTRACT = {In the linguistic literature, paradigms have enjoyed a hybrid status, half-way between entrenched patterns of lexical organization and processing structures enforcing global constraints on the output of traditional inflection rules. We describe here an original computational model of the mental lexicon where paradigmatic structures emerge through learning as the by-product of the endogenous dynamics of lexical memorization as competitive self-organization, based on the complementary principles of formal contrast (in space) and association biuniqueness (in time).}, KEYWORDS = {Computational model, Lexical memorization, Mental lexicon, Processing structures, Self-organizing maps}, PAGES = {135-157}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84921732430\&origin=inward}, DOI = {10.1093/acprof:oso/9780199589982.003.0008}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {978-0-19-958998-2}, BOOKTITLE = {Morphological Autonomy: Perspectives for Romance Inflectional Morphology}, EDITOR = {Maiden, M. and Smith, J. C. and Goldbach, M. and Hinzelin, M.}, } @EDITORIAL{MARZI_2012_EDITORIAL_MP_287395, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Understanding the Architecture of the Mental Lexicon}, YEAR = {2012}, ABSTRACT = {The present collection stems from the 1st NetWordS Workshop "Understanding the architecture of the mental lexicon: Integration of existing approaches", held in the Pisa Research Area of the Italian National Research Council, in November 2011. "NetWordS: the European network on Word Structure in the languages of Europe" is the Research Networking Programme of the European Science Foundation launched in May 2011 with the ambitious goal of paving the way to the European interdisciplinary research agenda on the Mental Lexicon, with particular emphasis on the following three main challenges: - lexicon and rules in the grammar, - word knowledge and word use, - words and meanings.}, KEYWORDS = {Mental Lexicon, interdisciplinary approach}, PAGES = {101-274}, URL = {https://publications.cnr.it/doc/287395}, VOLUME = {XI}, PUBLISHER = {Il Mulino (Bologna, ITA)}, ISBN = {978-88-15-23601-2}, } @INPROCEEDINGS{CALDERONE_2012_INPROCEEDINGS_CP_288012, AUTHOR = {Calderone, B. and Pirrelli, V.}, TITLE = {Apprendimento morfologico, relazioni base-derivato e topologie paradigmatiche. Evidenze psico-computazionali a confronto}, YEAR = {2012}, ABSTRACT = {Il presente lavoro è volto a esplorare alcune dinamiche acquisizionali relative ai processi di maturazione della competenza morfologica in apprendenti bambini. In quest"ottica, sono riportate due differenti simulazioni computazionali dei processi di apprendimento della morfologia flessiva in Italiano e in Inglese. La prima simulazione, propria di un quadro connessionista classico, dà conto in modo inadeguato delle differenti scale temporali nell"apprendimento di alcune forme flesse verbali in inglese e italiano. La letteratura sull"argomento (Pizzuto \& Caselli 1992, Noccetti 2003) documenta in modo convergente una maggiore rapidità nell"apprendimento delle forme del presente indicativo da parte dei bambini italiani rispetto al ritmo di acquisizione delle forme verbali corrispondenti (la forma di base e la terza persona singolare in -s) da parte di bambini di madre lingua inglese. La seconda simulazione, basata su un modello di memorie associative ,,a cascata" addestrate tramite protocollo non-supervisionato, rende conto in maniera non banale del paradosso acquisizionale, confermato su base inter-linguistica da un recente studio di Dressler e colleghi (Bittner et al., 2003), secondo cui sistemi flessivi più complessi e completi sono appresi con maggiore facilità di sistemi flessivi più semplici ed estesamente sincretici.}, KEYWORDS = {Lessico Mentale, apprendimento morfologico, paradigmi flessionali}, PAGES = {17}, URL = {https://publications.cnr.it/doc/288012}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, CONFERENCE_NAME = {XLII Convegno della Società di Linguistica Italiana}, CONFERENCE_PLACE = {Scuola Normale Superiore, Pisa}, CONFERENCE_DATE = {25-27 settembre 2008}, BOOKTITLE = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società di Linguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, EDITOR = {Bambini, V. and Ricci, I. and Bertinetto, P. M.}, } @INPROCEEDINGS{GIRAUDO_2012_INPROCEEDINGS_GMP_84808, AUTHOR = {Giraudo, H. and Montermini, F. and Pirrelli, V.}, TITLE = {Processi cognitivi nell'analisi delle classi verbali dell'italiano: un approccio sperimentale}, YEAR = {2012}, ABSTRACT = {L'analisi della flessione, soprattutto verbale, nelle lingue romanze ha ricevuto un notevole impulso negli ultimi anni, in particolare dall'apporto alla ricerca in linguistica teorica di discipline come la psicolinguistica o le scienze cognitive. In questo articolo intendiamo riesaminare la ripartizione dei verbi italiani in classi, e osservare come la teoria morfologica e l'analisi sperimentale possano dare risultati convergenti e contribuire a mettere in luce i processi mentali che costituiscono la base della competenza morfologica dei parlanti (cf. Pirrelli 2007a; 2007b e, per un'illustrazione Bonami et al. 2008).}, KEYWORDS = {Morphology, Word Processing, Word Learning, Mental Lexicon, L1}, URL = {https://publications.cnr.it/doc/84808}, VOLUME = {2 (CD ROM)}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, ISBN = {978-88-7870-652-1}, CONFERENCE_NAME = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società diLinguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, CONFERENCE_PLACE = {Scuola Normale Superiore, Pisa}, CONFERENCE_DATE = {25-27 settembre 2008}, BOOKTITLE = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società di Linguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, EDITOR = {Bambini, V. and Ricci, I. and Bertinetto, P. M.}, } @INPROCEEDINGS{MARZI_2012_INPROCEEDINGS_MFCP_287129, AUTHOR = {Marzi, C. and Ferro, M. and Caudai, C. and Pirrelli, V.}, TITLE = {Evaluating Hebbian Self-Organizing Memories for Lexical Representation and Access}, YEAR = {2012}, ABSTRACT = {The lexicon is the store of words in long-term memory. Any attempt at modelling lexical competence must take issues of string storage seriously. In the present contribution, we discuss a few desiderata that any biologically-inspired computational model of the mental lexicon has to meet, and detail a multi-task evaluation protocol for their assessment. The proposed protocol is applied to a novel computational architecture for lexical storage and acquisition, the "Topological Temporal Hebbian SOMs" (T2HSOMs), which are grids of topologically organised memory nodes with dedicated sensitivity to time-bound sequences of letters. These maps can provide a rigorous and testable conceptual framework within which to provide a comprehensive, multi-task protocol for testing the performance of Hebbian self-organising memories, and a comprehensive picture of the complex dynamics between lexical processing and the acquisition of morphological structure.}, KEYWORDS = {Mental Lexicon, Morphology Acquisition, Self-Organizing Maps}, PAGES = {886-893}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {8th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25/05/2012}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Uğur Doğan, M. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{MARZI_2012_INPROCEEDINGS_MFP_219553, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Prediction and Generalisation in Word Processing and Storage}, YEAR = {2012}, ABSTRACT = {Word storage and processing have traditionally been modelled according to different computational paradigms, in line with the classical corner-stone of "dual-route" models of word structure assuming a sharp dissociation between memory and computation (Clahsen 1999, Di Sciullo \& Williams 1987, Pinker \& Prince 1988, Parasada \& Pinker 1993). Even the most radical alternative to dual-route thinking, connectionist one-route models, challenged the lexicon-grammar dualism only by providing a neurally-inspired mirror image of classical base-to-inflection rules, while largely neglecting issues of lexical storage (Rumelhart \& McClelland 1986, McClelland \& Patterson 2002, Seidenberg \& McClelland 1989). Recent psycho- and neuro-linguistic evidence, however, supports a less deterministic and modular view of the interaction between stored word knowledge and on-line processing [Baayen et al. 1997, Hay 2001, Maratsos 2000, Stemberger \& Middleton 2003, Tabak et al. 2005, Ford et al. 2003, Post et al. 2008). The view entails simultaneous activation of distributed patterns of cortical connectivity encoding redundant distributional regularities in language data. Furthermore, recent developments in morphological theorising question the primacy of grammar rules over lexical storage, arguing that word regularities emerge from independent principles of lexical organisation, whereby lexical units and constructions are redundantly stored and mutually related through entailment relations (Matthews 1991, Corbett \& Fraser 1993, Pirrelli 2000, Burzio 2004, Booij 2010). We endorse here such a non modular view on Morphology to investigate two basic behavioural aspects of human word processing: morphological prediction and generalisation. The investigation is based on a computer model of morphology acquisition supporting the hypothesis that they both derive from a common pool of principles of lexical organisation.}, KEYWORDS = {Morphological generalisation, Word processing, Self-organising memory}, PAGES = {114-131}, URL = {http://mmm.lingue.unibo.it/}, CONFERENCE_NAME = {Eighth Mediterranean Morphology Meeting on "Morphology and the architecture of the grammar" (MMM8)}, CONFERENCE_PLACE = {Cagliari, Italy}, CONFERENCE_DATE = {14-17 September 2011}, EDITOR = {Ralli, A. and Booij, G. and Scalise, S. and Karasimos, A.}, } @INPROCEEDINGS{PIRRELLI_2012_INPROCEEDINGS_PG_84785, AUTHOR = {Pirrelli, V. and Guevara, E.}, TITLE = {Understanding NN Compounds}, YEAR = {2012}, ABSTRACT = {In this paper we intend to pursue two basic objectives: i) point out a substantial convergence between classification criteria for compounding that have developed independently from largely complementary perspectives and methodological stances, and ii) assess the important empirical consequences of this convergence and their potential impact on recent linguistic analyses of lexical compounds as either lexical (and specifically morphological) or syntactic phenomena. These two points are brought home by focusing on a particular class of Italian compounds, namely endocentric NN compounds such as ufficio reclami ('complaint office') or pesce palla ('ball fish') that prove to be increasingly productive in contemporary Italian (cf. Dardano 1978, Bisetto 2004).}, KEYWORDS = {Morphological composition, Word Processing, Word Learning, Mental Lexicon}, PAGES = {17}, URL = {https://publications.cnr.it/doc/84785}, VOLUME = {2 (CD ROM)}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, ISBN = {978-88-7870-652-1}, CONFERENCE_NAME = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società diLinguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, CONFERENCE_PLACE = {Scuola Normale Superiore, Pisa}, CONFERENCE_DATE = {25-27 settembre 2008}, BOOKTITLE = {Linguaggio e cervello /Semantica, Atti del XLII Convegno della Società di Linguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, EDITOR = {Bambini, V. and Ricci, I. and Bertinetto, P. M.}, } @INPROCEEDINGS{PIRRELLI_2012_INPROCEEDINGS_P_288047, AUTHOR = {Pirrelli, V.}, TITLE = {At the core of lexical processing: computational and neurocognitive issues}, YEAR = {2012}, ABSTRACT = {The lexicon lies at the root of our linguistic competence and represents a fundamental interface domain between language and our conceptualisation of the outside world. In traditional conceptions of the language architecture, the lexicon has been generally characterised as a declarative memory store of static building blocks, with rules providing the basic principles and constraints on their on-line procedural combination. The talk deals with some recent computational models of self-organising memories and neuroimaging evidence of the connectivity of the perisylvian network for language processing and working memory located in the left hemisphere of the human brain, to suggest a different conception of the mental lexicon and its role in the architecture of language.}, KEYWORDS = {Memory, Mental Lexicon, Neurocognitive correlates}, URL = {http://hnk.ffzg.hr/fassbl2012/}, CONFERENCE_NAME = {8th International Conference Formal Approaches to South Slavic and Balkan Languages (FASSBL-8)}, CONFERENCE_PLACE = {Dubrovnik, Croatia}, CONFERENCE_DATE = {19-21 settembre 2012}, } @INPROCEEDINGS{PIRRELLI_2012_INPROCEEDINGS_P_288106, AUTHOR = {Pirrelli, V.}, TITLE = {Hebbian Self-Organizing Memories for Lexical Recoding and Processing}, YEAR = {2012}, ABSTRACT = {Hebbian self-organizing memories (Pirrelli et al. 2010, Ferro et al. 2011, Koutnik 2007) can provide a rigorous and testable conceptual framework within which to unify diverse functional hypotheses for lexical acquisition and processing, and to clarify how these hypotheses may be explained computationally. I discuss a few desiderata that any biologically-inspired computational model of the mental lexicon has to meet, and report on how well such desiderata are met by different types of Hebbian self-organizing memories, exhibiting empirically different maturational trends in lexical acquisition.}, KEYWORDS = {Self-organising Maps, Memory, Word Processing}, URL = {https://publications.cnr.it/doc/288106}, CONFERENCE_NAME = {Workshop on Exo-lexical variables in monolingual and bilingual morphological processing, IMM15}, CONFERENCE_PLACE = {Vienna}, CONFERENCE_DATE = {February 9-12, 2012}, } @ARTICLE{CHERSI_2011_ARTICLE_CFPP_205122, AUTHOR = {Chersi, F. and Ferro, M. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Time, Language and Action-A Unified Long-Term Memory Model for Sensory-Motor Chains and Word Schemata}, YEAR = {2011}, ABSTRACT = {Action and language are known to be organized as closely-related brain subsystems. An Italian CNR project implemented a computational neural model where the ability to form chains of goal-directed actions and chains of linguistic units relies on a unified memory architecture obeying the same organizing principles.}, PAGES = {27-28}, URL = {http://ercim-news.ercim.eu/images/stories/EN84/EN84-web.pdf}, VOLUME = {84}, PUBLISHER = {ERCIM (Le Chesnay)}, ISSN = {0926-4981}, JOURNAL = {ERCIM news}, } @ARTICLE{FERRO_2011_ARTICLE_FMP_205180, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {A Self-Organizing Model of Word Storage and Processing: Implications for Morphology Learning}, YEAR = {2011}, ABSTRACT = {In line with the classical cornerstone of "dual-route" models of word structure, assuming a sharp dissociation between memory and computation, word storage and processing have traditionally been modelled according to different computational paradigms. Even the most popular alternative to dual-route thinking - connectionist one-route models - challenged the lexicon-grammar dualism only by providing a neurally-inspired mirror image of classical base-to-inflection rules, while largely neglecting issues of lexical storage. Recent psycho- and neuro-linguistic evidence, however, supports a less deterministic and modular view of the interaction between stored word knowledge and on-line processing. We endorse here such a non modular view on morphology to offer a computer model supporting the hypothesis that they are both derivative of a common pool of principles for memory self-organization.}, KEYWORDS = {Lexical Processing, Self Organizing Maps, Morphological Structure, Serial Memory}, PAGES = {209-226}, URL = {http://www.rivisteweb.it/doi/10.1418/35840}, VOLUME = {2}, DOI = {10.1418/35840}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @INPROCEEDINGS{FERRO_2011_INPROCEEDINGS_FMP_205490, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {T2HSOM: Understanding the Lexicon by Simulating Memory Processes for Serial Order}, YEAR = {2011}, ABSTRACT = {Over the last several years, both theoretical and empirical approaches to lexical knowledge and encoding have prompted a radical reappraisal of the traditional dichotomy between lexicon and grammar. The lexicon is not simply a large waste basket of exceptions and sub-regularities, but a dynamic, possibly redundant repository of linguistic knowledge whose principles of relational organization are the driving force of productive generalizations. In this paper, we overview a few models of dynamic lexical organization based on neural network architectures that are purported to meet this challenging view. In particular, we illustrate a novel family of Kohonen self-organizing maps (T2HSOMs) that have the potential of simulating competitive storage of symbolic time series while exhibiting interesting properties of morphological organization and generalization. The model, tested on training samples of as morphologically diverse languages as Italian, German and Arabic, shows sensitivity to manifold types of morphological structure and can be used to bootstrap morphological knowledge in an unsupervised way.}, KEYWORDS = {Mental Lexicon, Self-organizing Maps, Morphology}, PAGES = {32-41}, URL = {http://alpage.inria.fr/~sagot/woler2011/WoLeR2011/Program_%26_Proceedings.html}, CONFERENCE_NAME = {First International Workshop on Lexical Resources}, CONFERENCE_PLACE = {Ljubljana Slovenia}, CONFERENCE_DATE = {1-5 Agosto 2011}, BOOKTITLE = {First International Workshop on Lexical Resources}, EDITOR = {Sagot, B.}, } @MISC{PIRRELLI_2011_MISC_P_288014, AUTHOR = {Pirrelli, V.}, TITLE = {ESF Research Networking Programme: "The European Network on Word Structure. Cross-disciplinary approaches to understanding word structure in the languages of Europe-(NetWordS)}, YEAR = {2011}, URL = {https://publications.cnr.it/doc/288014}, } @ARTICLE{FERRO_2010_ARTICLE_FOPP_64549, AUTHOR = {Ferro, M. and Ognibene, D. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Reading as active sensing: a computational model of gaze planning in word recognition}, YEAR = {2010}, ABSTRACT = {We offer a computational model of gaze planning during reading that consists of two main components: a lexical representation network, acquiring lexical representations from input texts (a subset of the Italian CHILDES database), and a gaze planner, designed to recognize written words by mapping strings of characters onto lexical representations. The model implements an active sensing strategy that selects which characters of the input string are to be fixated, depending on the predictions dynamically made by the lexical representation network. We analyze the developmental trajectory of the system in performing the word recognition task as a function of both increasing lexical competence, and correspondingly increasing lexical prediction ability. We conclude by discussing how our approach can be scaled up in the context of an active sensing strategy applied to a robotic setting.}, KEYWORDS = {Reading, Language Learning, Mental Lexicon}, PAGES = {1-16}, URL = {https://publications.cnr.it/doc/64549}, VOLUME = {4}, PUBLISHER = {Frontiers Research Foundation (Lausanne, Svizzera)}, ISSN = {1662-5218}, JOURNAL = {Frontiers in neurorobotics}, } @ARTICLE{FERRO_2010_ARTICLE_FPP_64553, AUTHOR = {Ferro, M. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Morphology, Memory and the Mental Lexicon}, YEAR = {2010}, ABSTRACT = {Recent experimental evidence on morphological learning and processing has prompted a less deterministic and modular view of the interaction between stored word knowledge and on-line processing. Storing a word in the mental lexicon does not simply entail keeping a faithful memory image of that word in the most compact way. It also requires encoding and manipulating such image through topological structures that are optimally adapted to word production and comprehension. Temporal Self-Organizing Maps (THSOMs) are a novel model of artificial neural network that keeps time serial information through predictive activation chains of receptors encoding both spatial and temporal information of input stimuli. The impact of this model on issues of lexical organization and morphological processing is investigated in detail through a series of simulations shedding light on the dynamics between short-term memory (activation), long-term memory (learning) and morphological organization of stored word forms (topology).}, KEYWORDS = {Morphology, Word Processing, Word Learning, Mental Lexicon}, PAGES = {203-242}, URL = {https://publications.cnr.it/doc/64553}, VOLUME = {2}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{PIRRELLI_2010_ARTICLE_P_157483, AUTHOR = {Pirrelli, V.}, TITLE = {Interdisciplinary Approaches to Understanding Word Processing and Storage}, YEAR = {2010}, ABSTRACT = {The present collection of papers originates from a successful application to the European Science Foundation Exploratory Workshop Programme for the "Words in Action" workshop. The workshop, convened in Pisa on the 12th and 13th of October 2009, brought together experts of various scientific domains and theoretical inclinations to advance the current awareness of theoretical, typological, psycholinguistic, computational and neuro-physiological issues in word processing and storage, with a view to promoting novel methods of research and assessment for grammar architecture and language physiology.}, KEYWORDS = {Morphology, Word Processing, Word Learning, Mental Lexicon}, PAGES = {91-95}, URL = {https://publications.cnr.it/doc/157483}, VOLUME = {IX}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @INCOLLECTION{PIRRELLI_2010_INCOLLECTION_PGB_136469, AUTHOR = {Pirrelli, V. and Guevara, E. and Baroni, M.}, TITLE = {Computational issues in compound processing}, YEAR = {2010}, ABSTRACT = {Understanding compounds is a challenging computational task, cutting across multiple levels of linguistic analysis and touching upon intricate issues of representation, grammar architecture and algorithmic processing. At the same time, compounds raise all these problems in the most direct and exemplar way. From this perspective, they are an ideal probe into core issues of language architecture, making us pause about the need for advanced processing models and multi-disciplinary ap- proaches to long-lasting linguistic cruces. The paper reviews some of the lessons that can be learned from reading twenty years of computa- tional literature on the topic and assesses them against the background of germane theoretical and cognitive issues.}, KEYWORDS = {Morphology, Compounding, Natural Language Processing, Mental Lexicon}, PAGES = {271-285}, URL = {https://publications.cnr.it/doc/136469}, PUBLISHER = {John Benjamins (Amsterdam, NLD)}, ISBN = {9789027248275}, BOOKTITLE = {Cross-disciplinary issues in compounding}, EDITOR = {Scalise, S. and Vogel, I.}, } @EDITORIAL{PIRRELLI_2010_EDITORIAL_P_273429, AUTHOR = {Pirrelli, V.}, TITLE = {Interdisciplinary Approaches to Understanding Word Processing and Storage}, YEAR = {2010}, ABSTRACT = {The present collection of papers originates from a successful application to the European Science Foundation Exploratory Workshop Programme for the "Words in Action" workshop. The workshop, convened in Pisa on the 12th and 13th of October 2009, brought together experts of various scientific domains and theoretical inclinations to advance the current awareness of theoretical, typological, psycholinguistic, computational and neuro-physiological issues in word processing and storage, with a view to promoting novel methods of research and assessment for grammar architecture and language physiology.}, PAGES = {91-240}, URL = {https://publications.cnr.it/doc/273429}, PUBLISHER = {Societa Editrice il Mulino (Bologna, ITA)}, } @TECHREPORT{PIRRELLI_2010_TECHREPORT_PLMDGM_367784, AUTHOR = {Pirrelli, V. and Lenci, A. and Montemagni, S. and Dell'Orletta, F. and Giovannetti, E. and Marchi, S.}, TITLE = {ConnectToLife (modulo semantico)-Rapporto tecnico finale}, YEAR = {2010}, ABSTRACT = {Il presente documento costituisce il rapporto tecnico finale del progetto Connect-To-Life (modulo semantico) relativo alle attività svolte dall'unità ILC-CNR.}, KEYWORDS = {annotazione linguistica, estrazione di termini, clustering semantico, trattamento automatico della lingua, costruzione di ontologie}, PAGES = {16}, URL = {https://publications.cnr.it/doc/367784}, } @MISC{FERRO_2010_MISC_FMP_157477, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Word self-organization in time and space? Algorithms and evaluation}, YEAR = {2010}, ABSTRACT = {ABSTRACT: Words are time-bound signals and are amenable to temporal processing. The human brain has an innate ability to encode serial events into spatial patterns of neural activity (David Beiser \& James Houk, 1998). Temporal Hebbian SOMs (THSOMs) allow us to take the two assumptions seriously. They provide a novel computational framework accounting for many paradigm-based generalizations in a natural and insightful way. This claim is validated on inflectional data from German, English and Italian.}, KEYWORDS = {Morphology, Word Processing and Learning, Mental Lexicon, L1, SOMs}, URL = {https://publications.cnr.it/doc/157477}, } @INCOLLECTION{DELLORLETTA_2009_INCOLLECTION_DLMMP_184585, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: una piattaforma linguistico-computazionale per l'estrazione di conoscenza da testi}, YEAR = {2009}, ABSTRACT = {The paper describes the automatic extraction of domain knowledge from Italian document collections and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Term extraction, Ontology Learning}, PAGES = {285-300}, URL = {https://publications.cnr.it/doc/184585}, PUBLISHER = {Bulzoni (Roma, ITA)}, ISBN = {978-88-7870-469-5}, EDITOR = {Ferrari, G. and Benatti, R. and Mosca, M.}, } @INCOLLECTION{LENCI_2009_INCOLLECTION_LMP_186141, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Annotazione sintattica di corpora: aspetti metodologici}, YEAR = {2009}, ABSTRACT = {Un assunto sempre più condiviso nell'ambito degli studi sull'acquisizione sia di L1 che di L2 è che l'evidenza empirica privilegiata debba essere rappresentata da corpora di produzioni scritte o orali degli apprendenti, estensivamente annotate a molteplici livelli di rappresentazione linguistica. Più in generale, corpora lemmatizzati e annotati a livello morfosintattico fanno ormai parte dello strumentario comune del linguista. Accanto ad essi, si fa però strada l'esigenza di disporre di risorse testuali più sofisticate dal punto di vista delle modalità di esplorazione linguistica, come ad esempio corpora annotati a livello sintattico (le cosiddette treebank). Questi consentono infatti di osservare i processi di convergenza degli apprendenti verso la lingua "obiettivo" anche a livello di specifici tratti grammaticali astratti o di macro-strutture linguistiche. L'articolo propone uno schema di annotazione sintattica caratterizzato da un doppio livello di codifica. Si tratta di un approccio originale che differisce dalla maggior degli schemi di annotazione sintattica esistenti per due aspetti: 1. la separazione della dimensione relazionale da quella a costituenti, che sono trattati a livelli di annotazione indipendenti, ma al tempo stesso correlati, in modo tale che lo stesso testo è simultaneamente interrogabile ai due livelli; 2. la rappresentazione a costituenti fornisce una rappresentazione del testo come sequenza di proto-costituenti sintagmatici non ricorsivi. Questa strategia di annotazione permette una fattorizzazione di diversi aspetti e dimensioni della struttura sintattica che risulta promettente da un lato per l'annotazione di corpora di lingua "non-standard" come quelli contenenti produzioni di apprendenti di L1 o L2, sia come punto di partenza per successivi processi di estrazione di informazione linguistica dal testo. Dopo aver illustrato le motivazioni sottostanti allo schema proposto, ciascun livello di rappresentazione (chunking e dipendenze funzionali) viene illustrato in dettaglio, mostrandone anche la possibilità di combinazione sullo stesso testo. L'articolo si chiude con la discussione di prospettive di uso di corpora annotati secondo lo schema di annotazione proposto.}, KEYWORDS = {Corpora annotati, annotazione sintattica}, PAGES = {25-46}, URL = {https://publications.cnr.it/doc/186141}, PUBLISHER = {Guerra Edizioni (Perugia, ITA)}, ISBN = {978-88-557-0168-6}, BOOKTITLE = {CORPORA DI ITALIANO L2: TECNOLOGIE, METODI, SPUNTI TEORICI}, EDITOR = {Andorno, C. and Rastelli, S.}, } @INCOLLECTION{LENCI_2009_INCOLLECTION_LMPV_136465, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Ontology learning from Italian legal texts}, YEAR = {2009}, ABSTRACT = {The paper reports on the methodology and preliminary results of a case study in automatically extracting ontological knowledge from Italian legislative texts. We use a fully-implemented ontology learning system (T2K) that includes a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine language learning. Tools are dynamically integrated to provide an incremental representation of the content of vast repositories of unstructured documents. Evaluated results, however preliminary, show the great potential of NLP-powered incremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.}, KEYWORDS = {Ontology Learning, document management, legal knowledge extraction}, PAGES = {75-94}, URL = {https://publications.cnr.it/doc/136465}, VOLUME = {188}, DOI = {10.3233/978-1-58603-942-4-75}, ISBN = {978-1-58603-942-4}, BOOKTITLE = {Law, Ontologies and the Semantic Web-Channelling the Legal Information Flood}, EDITOR = {Breuker, J. and Casanovas, P. and Klein, M. C. A. and Francesconi, E.}, } @INPROCEEDINGS{PIRRELLI_2009_INPROCEEDINGS_P_288113, AUTHOR = {Pirrelli, V.}, TITLE = {Comprendere un documento con il computer}, YEAR = {2009}, ABSTRACT = {In this talk, I shall deal with the complex process of computer reading and understanding of text documents as the result of the interleaving of a number of levels of processing, both linguistic and extra-linguistic, such as parsing, classifying, learning and knowing. In real tasks, there exists no parsing without classifying, no classifying without learning, no learning without knowing and eventually no knowing without "doing things with words". Only through robust integration and co-operation of less than optimal components and inter-disciplinary cross-fertilization we can hope to develop general and comprehensive solutions which are more than the sums of their parts.}, KEYWORDS = {Gestione documentale, annotazione linguistica, indicizzazione}, URL = {https://publications.cnr.it/doc/288113}, CONFERENCE_NAME = {Documentazione, terminologia e Scienze delle Informazione}, CONFERENCE_PLACE = {CNR, Roma}, CONFERENCE_DATE = {12 giugno 2009}, } @TECHREPORT{PIRRELLI_2009_TECHREPORT_PM_176379, AUTHOR = {Pirrelli, V. and Marzi, C.}, TITLE = {Words In Action: Interdisciplinary Approaches to Understanding Word Processing and Storage}, YEAR = {2009}, ABSTRACT = {Almost all levels of language knowledge and processing (from phonology, to syntax and semantics) are known to be affected by knowledge of word structure at varying degrees. A better understanding of the human strategies involved in learning and processing word structure thus lies at the heart of our comprehension of the basic mechanisms serving both language and cognition and is key to addressing some fundamental challenges for the study of the physiology of grammar. On the 12th and 13th of October 2009, in the Research Area of the Italian National Research Council (CNR) in Pisa, 26 scholars from Europe, Canada and the United States were convened to take part in the European Science Foundation Exploratory Workshop "Words in Action: Interdisciplinary Approaches To Understanding Word Processing And Storage". The workshop brought together experts of various scientific domains and different theoretical inclinations to advance the current awareness of theoretical, historical, psycholinguistic, computational and neurophysiological issues in morphological processing and learning, with a view to assessing levels of research convergence and exploring the potential for synergy and strategic co-operation.}, KEYWORDS = {Morphology, Word Processing, Word Learning, Mental Lexicon}, URL = {https://publications.cnr.it/doc/176379}, } @ARTICLE{DELLORLETTA_2008_ARTICLE_DLMMPV_64541, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio}, YEAR = {2008}, ABSTRACT = {The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Natural Language Processing, Machine Learning, Knowledge extraction from texts, Ontology learning, Legal ontologies}, PAGES = {197-218}, URL = {https://publications.cnr.it/doc/64541}, VOLUME = {26}, PUBLISHER = {Aida (Roma, Italia)}, ISSN = {1594-2201}, JOURNAL = {Aida Informazioni (Online)}, } @INPROCEEDINGS{DELLORLETTA_2008_INPROCEEDINGS_DLMMPV_84707, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio}, YEAR = {2008}, ABSTRACT = {The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Natural Language Processing, Machine Learning, Knowledge extraction from texts, Ontology learning, Legal ontologies}, PAGES = {197-218}, URL = {http://www.assiterm91.it/wp-content/uploads/2010/11/Convegno-2008.pdf}, VOLUME = {Anno 26, numero 1-2}, PUBLISHER = {Aida (Roma, Italia)}, ISSN = {1594-2201}, CONFERENCE_NAME = {Atti del Convegno Nazionale Ass. I. Term}, CONFERENCE_PLACE = {Arcavacata di Rende (CS)}, CONFERENCE_DATE = {5-7/06/2008}, BOOKTITLE = {Terminologia analisi testuale e documentazione nella città digitale}, } @INPROCEEDINGS{DELLORLETTA_2008_INPROCEEDINGS_DLMMPV_84698, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Marchi, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Acquiring Legal Ontologies from Domain-specific Texts}, YEAR = {2008}, ABSTRACT = {The paper reports on methodology and preliminary results ofa case study in automatically extracting ontological knowledgefrom Italian legislative texts in the environmental domain. Weuse a fully-implemented ontology learning system (T2K) thatincludes a battery of tools for Natural Language Processing(NLP), statistical text analysis and machine language learn-ing. Tools are dynamically integrated to provide an incremen-tal representation of the content of vast repositories of unstruc-tured documents. Evaluated results, however preliminary, arevery encouraging, showing the great potential of NLP-poweredincremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.}, KEYWORDS = {Ontology learning, Document management, knowledge extraction from texts, Natural Language Processing}, PAGES = {98-101}, URL = {https://publications.cnr.it/doc/84698}, CONFERENCE_NAME = {LangTech 2008}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {28-29/02/2008}, } @INPROCEEDINGS{LENCI_2008_INPROCEEDINGS_LMPM_84730, AUTHOR = {Lenci, A. and McGillivray, B. and Pirrelli, V. and Montemagni, S.}, TITLE = {Unsupervised Acquisition of Verb Subcategorization Frames from Shallow-Parsed Corpora}, YEAR = {2008}, KEYWORDS = {Acquisition, Machine Learning, Corpus (creation, annotation, etc.), Lexicon, Lexical database}, URL = {https://publications.cnr.it/doc/84730}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{PIRRELLI_2008_INPROCEEDINGS_P_288118, AUTHOR = {Pirrelli, V.}, TITLE = {Morphology Learning as Paradigm Learning: Developmental and Computational Evidence from Romance Languages}, YEAR = {2008}, ABSTRACT = {In a comprehensive comparison of the developmental stages in the acquisition of inflection in nearly two dozen languages (in the Indo-European, Ugro-Finnic and Semitic families plus Turkish), Bittner et al. (2003) arrive at the conclusion that the transition from lexical processing to morphological patterning is not the automatic outcome of rote lexical storage, but rather the result of an active construction of the child, crucially conditioned by typological factors such as richness, uniformity and transparency of inflectional paradigms. In the present talk I intend to assess this hypothesis by observing the dynamics of a purely morphological acquisition of Romance verb paradigms through a family of Artificial Neural Networks known as Self- Organizing Maps (Kohonen 2002). I shall show that the interplay between built-in principles of acquisition of time-coded sequences and morphology-specific principles of organization of inflectional paradigms can go a long way in accounting for the typological trends highlighted in Bittner et al. (2003). Reported results allow us to draw some general conclusions concerning the process of morphology acquisition as paradigm-based learning and lead to a reappraisal of the traditional one-route vs. dual-route debate in morphology processing and learning.}, URL = {http://www.mod-langs.ox.ac.uk/romance-morphology/oxmorph1.html}, CONFERENCE_NAME = {First Oxford Workshop on Romance Verb Morphology}, CONFERENCE_PLACE = {Trinity College, Oxford, UK}, CONFERENCE_DATE = {28 August 2008}, } @MISC{PIRRELLI_2008_MISC_PM_151569, AUTHOR = {Pirrelli, V. and Montemagni, S.}, TITLE = {AnITA}, YEAR = {2008}, KEYWORDS = {NLP Tools}, URL = {https://publications.cnr.it/doc/151569}, } @ARTICLE{BARONI_2007_ARTICLE_BGP_64535, AUTHOR = {Baroni, M. and Guevara, E. and Pirrelli, V.}, TITLE = {NN Compounds in Italian: Modelling Category Induction and Analogical Extension}, YEAR = {2007}, KEYWORDS = {Morphology, Compounding, Mental Lexicon, Lexical Semantics}, PAGES = {263-290}, URL = {https://publications.cnr.it/doc/64535}, VOLUME = {2}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{CALDERONE_2007_ARTICLE_CHP_64536, AUTHOR = {Calderone, B. and Herreros, I. and Pirrelli, V.}, TITLE = {Learning Inflection: The Importance of Starting Big}, YEAR = {2007}, ABSTRACT = {Perchè i sistemi verbali morfologicamente più "ricchi" vengono appresi da un bambino con maggiore facilità di sistemi più "poveri", caratterizzati da maggiore suppletivismo e da un minor numero di marcatori flessionali? Studi recenti condotti nel quadro della Morfologia Naturale (Bittner et al. 2003) hanno evidenziato il ruolo centrale svolto in questo apparente paradosso dal "contrasto morfologico" e dalla relazione biunivoca tra forma e contenuto all'interno del paradigma flessionale. Il presente lavoro illustra da questo punto di vista il comportamento di un modello originale di reti neurali artificiali auto-organizzanti con architettura "a cascata" e apprendimento asincrono, addestrato su forme verbali codificate fonologicamente. Il modello addestrato è in grado di memorizzare sia configurazioni morfologiche astratte, corrispondenti alle terminazioni flessionali di forme verbali regolari e irregolari, sia forme flesse piene, in funzione della loro frequenza per tipo e per unità nel corpus di addestramento. Il comportamento del modello è valutato su due differenti corpora di addestramento, italiano e inglese, entrambi campionati dal database CHILDES. L'analisi della topologia delle informazioni memorizzate dal modello addestrato consente di trarre alcune conclusioni generali sull'interazione tra processi di acquisizione di sequenze fonotattiche e principi di acquisizione paradigmatica. Le implicazioni teoriche dei risultati vengono inoltre discusse alla luce del tradizionale dibattito tra modelli "a meccanismo singolo" e "a meccanismo doppio" di acquisizione morfologica.}, PAGES = {175-200}, URL = {https://publications.cnr.it/doc/64536}, VOLUME = {2}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{DELLORLETTA_2007_ARTICLE_DFLMP_64537, AUTHOR = {Dell'Orletta, F. and Federico, M. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Maximum Entropy for Italian PoS Tagging}, YEAR = {2007}, ABSTRACT = {L'articolo illustra le prestazioni del ILC-UniPi MaxEnt PoS Tagger in Evalita 2007. The report contains a description of the ILC-UniPi MaxEnt PoS Tagger performance in Evalita 2007.}, PAGES = {10-11}, URL = {https://publications.cnr.it/doc/64537}, VOLUME = {IV(2)}, } @ARTICLE{PIRRELLI_2007_ARTICLE_P_64534, AUTHOR = {Pirrelli, V.}, TITLE = {Psycho-Computational Issues in Morphology Learning and Processing: An Overture}, YEAR = {2007}, PAGES = {131-138}, URL = {https://publications.cnr.it/doc/64534}, VOLUME = {2}, } @ARTICLE{PIRRELLI_2007_ARTICLE_P_64539, AUTHOR = {Pirrelli, V.}, TITLE = {Lingue e Linguaggio}, YEAR = {2007}, PAGES = {130-300}, URL = {https://publications.cnr.it/doc/64539}, VOLUME = {2}, } @INPROCEEDINGS{BARONI_2007_INPROCEEDINGS_BGP_84669, AUTHOR = {Baroni, M. and Guevara, E. and Pirrelli, V.}, TITLE = {Sulla tipologia dei composti N N in italiano: principi categoriali ed evidenza distribuzionale a confronto}, YEAR = {2007}, KEYWORDS = {Morphology, Compounding, Mental Lexicon, Lexical Semantics}, URL = {https://publications.cnr.it/doc/84669}, ISBN = {978-88-7870-469-5}, CONFERENCE_NAME = {XL Congresso Internazionale di Studi della Società di Linguistica Italiana (SLI 2006)}, CONFERENCE_PLACE = {Vercelli}, CONFERENCE_DATE = {settembre 2006}, BOOKTITLE = {Linguistica e modelli tecnologici della ricerca}, EDITOR = {Ferrari, G. and Benatti, R. and Mosca, M.}, } @INPROCEEDINGS{DELLORLETTA_2007_INPROCEEDINGS_DFLMP_84696, AUTHOR = {Dell'Orletta, F. and Federico, M. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Maximum Entropy for Italian PoS Tagging}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84696}, CONFERENCE_NAME = {Evaluation of NLP Tools for Italian-EVALITA 2007}, CONFERENCE_PLACE = {Roma}, } @INPROCEEDINGS{DELLORLETTA_2007_INPROCEEDINGS_DLMMP_84687, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: una piattaforma linguistico-computazionale per l'estrazione di conoscenza da testi}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84687}, CONFERENCE_NAME = {XL Congresso Internazionale di Studi della Società di Linguistica Italiana (SLI 2006)}, CONFERENCE_PLACE = {Roma}, } @INPROCEEDINGS{LENCI_2007_INPROCEEDINGS_LMPV_84693, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {NLP-based ontology learning from legal texts. A case study}, YEAR = {2007}, ABSTRACT = {The paper reports on the methodology and preliminary results of a case study in automatically extracting ontological knowledge from Italian legislative texts in the environmental domain. We use a fully-implemented ontology learning system (T2K) that includes a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine language learning. Tools are dynamically integrated to provide an incremental representation of the content of vast repositories of unstructured documents. Evaluated results, however preliminary, are very encouraging, showing the great potential of NLP-powered incremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.}, PAGES = {113-129}, URL = {https://publications.cnr.it/doc/84693}, CONFERENCE_NAME = {II Workshop on Legal Ontologies and Artificial Intelligence Techniques (LOAIT'07)}, CONFERENCE_PLACE = {Stanford}, CONFERENCE_DATE = {4 giugno 2007}, } @INPROCEEDINGS{PIRRELLI_2007_INPROCEEDINGS_P_84688, AUTHOR = {Pirrelli, V.}, TITLE = {On the cognitive autonomy of morphological processing}, YEAR = {2007}, ABSTRACT = {Does morphological knowledge define an autonomous domain of grammar or is it rather the by-product of syntax-based principles and representations? We address this question by tapping a large body of cognitive language evidence, focusing on what is known about the way speakers learn, structure, access and use their mental morphological lexicon to parse and produce words. In line with the assumption that empirical evidence of concrete language usage can shed light on issues of domain-specificity in grammar, we conclude that it is difficult to reconcile usage-based language facts with the view that morphology is the syntax of morphemes. However, it would be equally misleading and logically unnecessary to characterise the functional autonomy of morphology from syntax in terms of processing modularity.}, KEYWORDS = {Theoretical Morphology, Mental Lexicon, Language Learning, Self-Organizing Maps}, PAGES = {245-269}, URL = {https://publications.cnr.it/doc/84688}, VOLUME = {37}, PUBLISHER = {LINCOM academic publishers (LINCOM GmbH) (München, DEU)}, ISBN = {9783895865046}, CONFERENCE_NAME = {Actes du colloque international de Morphologie 4èmes Décembrettes}, CONFERENCE_PLACE = {Toulouse}, CONFERENCE_DATE = {4-5 Dicembre 2005}, BOOKTITLE = {Morphologie à Toulouse}, EDITOR = {Hathout, N. and Montermini, F.}, } @INPROCEEDINGS{PIRRELLI_2007_INPROCEEDINGS_PH_84689, AUTHOR = {Pirrelli, V. and Herreros, I.}, TITLE = {Learning Inflection by Itself}, YEAR = {2007}, ABSTRACT = {The paper reports on a few experimental results of a computer simulation of learning the verb morphology of Italian, English and Arabic with the same type of neural architecture based on Kohonen's self-organizing maps. Issues of the mental organization of the resulting morphological lexica are explored in some detail and discussed in the light of the differential distribution of regular and irregular inflections in the three languages. It is shown that typologically diverse, non trivial aspects of the underlying paradigmatic structure of the three verb systems effectively emerge through sheer exposure to realistic distributions of verb forms devoid of morpho-syntactic content. We argue that these results go a long way towards explaining how global organization effects in the mental morphological lexicon may eventually result from local word processing steps.}, KEYWORDS = {Theoretical Morphology, Mental Lexicon, Language Learning, Self-Organizing Maps}, PAGES = {269-290}, URL = {http://mmm.lingue.unibo.it/}, PUBLISHER = {Università degli Studi di Bologna (Bologna, Italia)}, ISSN = {1826-7491}, CONFERENCE_NAME = {V Mediterranean Morphology Meeting}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {2005}, BOOKTITLE = {Proceedings of the Fifth Mediterranean Morphology Meeting}, EDITOR = {Booij, G. and Ducceschi, L. and Fradin, B. and Guevara, E. and Ralli, A. and Scalise, S.}, } @INPROCEEDINGS{SORIA_2007_INPROCEEDINGS_SBLMP_84682, AUTHOR = {Soria, C. and Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Automatic Extraction of Semantics in Law Documents}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84682}, CONFERENCE_NAME = {V Legislative XML Workshop}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {2007}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157412, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Segmentazione di un Testo Italiano in Token}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157412}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157413, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Language Recognition Tool, Specifiche di Implementazione}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157413}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157414, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Analisi Morfosintattica per l'Italiano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157414}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157415, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Specifiche di Chunking per l'Italiano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157415}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157416, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Specifiche di Named Entity Recognition per l'Italiano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157416}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157417, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Segmentazione di un Testo Inglese in Token}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157417}, } @TECHREPORT{SASAKI_2007_TECHREPORT_SMAPMMP_157423, AUTHOR = {Sasaki, Y. and McNaught, J. and Ananiadou, S. and Pezik, P. and McGillivray, B. and Montemagni, S. and Pirrelli, V.}, TITLE = {Augmented Version of Bio-Lexicon}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157423}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BCGLMPRS_84608, AUTHOR = {Bartolini, R. and Caracciolo, C. and Giovannetti, E. and Lenci, A. and Marchi, S. and Pirrelli, V. and Renso, C. and Spinsanti, L.}, TITLE = {Creation and Use of Lexicons and Ontologies for NL Interfaces to Databases}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84608}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BCGLMPRS_91313, AUTHOR = {Bartolini, R. and Caracciolo, C. and Giovannetti, E. and Lenci, A. and Marchi, S. and Pirrelli, V. and Renso, C. and Spinsanti, L.}, TITLE = {Creation and use of lexicons and ontologies for natural language interface to databases}, YEAR = {2006}, ABSTRACT = {In this paper we present an original approach to natural language query interpretation which has been implemented within the FuLL (Fuzzy Logic and Language) Italian project of BC S.r.l. In particular, we discuss here the creation of linguistic and ontological resources, together with the exploitation of existing ones, for natural language-driven database access and retrieval. Both the database and the queries we experiment with are Italian, but the methodology we broach naturally extends to other languages.}, KEYWORDS = {Natual language processing, ontologies, gis, databases}, PAGES = {6}, URL = {https://publications.cnr.it/doc/91313}, CONFERENCE_NAME = {LREC Conference}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26/05/2006}, BOOKTITLE = {LREC 2006}, } @INPROCEEDINGS{DELLORLETTA_2006_INPROCEEDINGS_DLMP_84660, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Probing the space of grammatical variation: induction of cross-lingual grammatical constraints from treebanks}, YEAR = {2006}, ABSTRACT = {The paper reports on a detailed quantitative analysis of distributional language data of both Italian and Czech, highlighting the relative contribution of a number of distributed grammatical factors to sentence-based identification of subjects and direct objects. The work uses a Maximum Entropy model of stochastic resolution of conflicting grammatical constraints and is demonstrably capable of putting explanatory theoretical accounts to the test of usage-based empirical verification.}, PAGES = {21-28}, URL = {https://publications.cnr.it/doc/84660}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {1-932432-78-7}, CONFERENCE_NAME = {Coling/ACL 2006}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {22 July 2006}, BOOKTITLE = {Proceedings of the Workshop on Frontiers in Linguistically Annotated Corpora 2006 (LAC 06)}, } @INPROCEEDINGS{PIRRELLI_2006_INPROCEEDINGS_P_112917, AUTHOR = {Pirrelli, V.}, TITLE = {Parlare per sapere: la lingua come accesso alla conoscenza}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/112917}, CONFERENCE_NAME = {Conferenza TAL 2006: Uomini e macchine, un colloquio possibile}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{PIRRELLI_2006_INPROCEEDINGS_PLM_112916, AUTHOR = {Pirrelli, V. and Lenci, A. and Montemagni, S.}, TITLE = {Probing the space of grammatical variation: induction of cross-lingual grammatical constraints from treebanks}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/112916}, CONFERENCE_NAME = {Language resources and language research: typology, second language acquisition, English Linguistics}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {2006}, } @MISC{BARTOLINI_2006_MISC_BDLMMP_151563, AUTHOR = {Bartolini, R. and Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-to-Knowledge (T2K) Versione 2}, YEAR = {2006}, ABSTRACT = {Versione 2. Text-to-Knowledge (T2K) è una piattaforma software di supporto avanzato alla gestione documentale per la creazione dinamica di repertori terminologici e ontologie di dominio a partire da testi e per l'indicizzazione concettuale di documenti. Il sistema T2K si propone di offrire una batteria integrata di strumenti avanzati di analisi linguistica del testo, analisi statistica e apprendimento automatico del linguaggio, destinati a offrire una rappresentazione accurata del contenuto di una base documentale non strutturata, per scopi di indicizzazione avanzata e navigazione intelligente. I risultati di questo processo di acquisizione sono annotati in forma di metadati XML, offrendo in tal modo la prospettiva di una sempre crescente e diretta interoperabilità con sistemi automatici per la produzione di contenuti digitali selezionati e strutturati dinamicamente su misura, per diversi profili di utenza. Versioni prototipali di T2K sono già operative su alcuni portali della pubblica amministrazione e sono state applicate per l'indicizzazione di contenuti didattici multimediali. E' in corso l'integrazione della tecnologia T2K nel sistema di gestione informatica di documentazione scientifica del CNR.}, KEYWORDS = {text to knowledge, nlp, estrazione terminologica, ontology learning, indicizzazione terminologica}, URL = {https://publications.cnr.it/doc/151563}, } @ARTICLE{LENCI_2005_ARTICLE_LMP_64502, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Acquiring and Representing Meaning: Theoretical and Computational Perspectives}, YEAR = {2005}, PAGES = {19-66}, URL = {https://publications.cnr.it/doc/64502}, VOLUME = {22-23}, } @BOOK{LENCI_2005_BOOK_LMP_136436, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Acquiring and Representing Word Meaning: Computational perspectives}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/136436}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Pisa-Roma, ITA)}, ISBN = {88-8147-413-1}, } @BOOK{LENCI_2005_BOOK_LMP_136437, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Testo e computer-Elementi di linguistica computazionale}, YEAR = {2005}, ABSTRACT = {In che modo il computer può aiutarci a comprendere come funziona la nostra lingua? Cosa significa analizzare un testo con l'aiuto di un calcolatore? In che misura possiamo estendere le potenzialità del computer rendendolo capace di interagire con gli utenti umani nella loro lingua' Queste e altre domande sono l'oggetto di indagine della linguistica computazionale, una disciplina che ha al suo centro proprio il rapporto tra lingua e computer. Il libro fornisce gli elementi di base della linguistica computazionale partendo da un interesse primario per il testo, la sua struttura e il suo contenuto. Il volume propone una sintesi equilibrata e accessibile tra sapere e fare, nozioni di base e loro applicazione, ed è destinato in primo luogo agli studenti delle facoltà umanistiche e scientifiche interessati all'interazione tra scienze umane e informatica, ma anche agli studiosi che vogliano imparare a usare il computer come strumento di ricerca sul linguaggio.}, KEYWORDS = {Linguistica Computazionale}, PAGES = {255}, URL = {https://publications.cnr.it/doc/136437}, PUBLISHER = {Carocci (Roma, ITA)}, ISBN = {8843034251}, } @EDITORIAL{PIRRELLI_2005_EDITORIAL_PM_146069, AUTHOR = {Pirrelli, V. and Montemagni, S.}, TITLE = {Acquisition and Representation of Word Meaning: Theoretical and computational perspectives}, YEAR = {2005}, KEYWORDS = {Lexical semantics, Distributional semantics, Lexicon acquisition}, URL = {https://publications.cnr.it/doc/146069}, VOLUME = {XXII-XXIII}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Pisa-Roma, ITA)}, ISBN = {88-8147-413-1}, } @INPROCEEDINGS{BARTOLINI_2005_INPROCEEDINGS_BGLMP_84576, AUTHOR = {Bartolini, R. and Giorgetti, D. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Automatic Incremental Term Acquisition from Domain Corpora}, YEAR = {2005}, ABSTRACT = {We describe a technique for the acquisition of terms from Italian domain text corpora, which relies both on sophisticated linguistic analysis and on statistical measures applied to linguistically processed text rather than to raw text as it is usually the case. The main advantage of this technique is that minimal a priori knowledge of term structure is required, thus allowing to explore and discover terms in a given domain without imposing a strict pattern matching structure on them, and also to easily extend it to different domains. The approach we present in this paper is incremental as it may be iterated to discover terms of increasing complexity built on top of terms discovered in the previous iteration. The reason why it is convenient to adopt such an incremental approach is that it allows to "clean" data from noise in the first step, elicitating the constituent terms, and then to refine term acquisition on "skimmed" term data.}, PAGES = {293-300}, URL = {https://publications.cnr.it/doc/84576}, CONFERENCE_NAME = {7th International conference on Terminology and Knowledge Engineering (TKE2005)}, CONFERENCE_PLACE = {Copenhagen}, CONFERENCE_DATE = {2005}, BOOKTITLE = {Proceedings of TKE 2005-7th International Conference on Terminology and Knowledge Engineering}, } @INPROCEEDINGS{DELLORLETTA_2005_INPROCEEDINGS_DLMP_84579, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Climbing the path to grammar: a maximum entropy model of subject/object learning}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84579}, CONFERENCE_NAME = {Psychocomputational Models of Human Language Acquisition (PsychoCompLA-2005)}, CONFERENCE_PLACE = {Ann Arbour (USA)}, } @INPROCEEDINGS{PIRRELLI_2005_INPROCEEDINGS_P_112918, AUTHOR = {Pirrelli, V.}, TITLE = {On the cognitive autonomy of morphological processing}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112918}, CONFERENCE_NAME = {4èmes Décembrettes}, CONFERENCE_PLACE = {Toulouse}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{PIRRELLI_2005_INPROCEEDINGS_PH_112921, AUTHOR = {Pirrelli, V. and Herreros, I.}, TITLE = {Learning Morphology by Itsel}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112921}, CONFERENCE_NAME = {5th Mediterranean Morphology Meeting}, CONFERENCE_PLACE = {Fréjus. France}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{PIRRELLI_2005_INPROCEEDINGS_PL_112922, AUTHOR = {Pirrelli, V. and Lenci, A.}, TITLE = {Dalla raccolta dati alla diagnostica: prospettive per una}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112922}, CONFERENCE_NAME = {XIII Congresso della Società Italiana di Psicofisiologia}, CONFERENCE_PLACE = {Marina di Carrara}, CONFERENCE_DATE = {2005}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BCLMP_157365, AUTHOR = {Bartolini, R. and Caracciolo, C. and Lenci, A. and Marchi, S. and Pirrelli, V.}, TITLE = {Motore semantico. Documento di progettazione e sviluppo}, YEAR = {2005}, ABSTRACT = {Il presente documento descrive architettura, funzionalità e algoritmo di un componente software dedicato, designato come "Motore Semantico", che ha lo scopo di produrre rappresentazioni logico-concettuali, ontologicamente interpretate, di interrogazioni in linguaggio naturale su una base di dati di tipo anche GIS.}, KEYWORDS = {NLP}, PAGES = {1-42}, URL = {https://publications.cnr.it/doc/157365}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMMP_157367, AUTHOR = {Bartolini, R. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Personalizzazione degli Italian NLP tools}, YEAR = {2005}, ABSTRACT = {Il presente documento intende offrire criteri e risultati della fase di personalizzazione dei moduli per l'analisi automatica del testo (Italian NLP tools o "AnITA") all'interno dell'architettura prevista nell'ambito del progetto FuLL.}, KEYWORDS = {NLP}, PAGES = {13}, URL = {https://publications.cnr.it/doc/157367}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMP_157369, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Modellazione del motore sintattico e delle strutture dati di supporto}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157369}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMMP_157370, AUTHOR = {Bartolini, R. and Lenci, L. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: Acquisizione semi-automatica di ontologie per l'indicizzazione semantica di documenti}, YEAR = {2005}, ABSTRACT = {Text-2-Knowledge, Acquisizione semi-automatica di ontologie per l'indicizzazione semantica di documenti}, KEYWORDS = {nlp, terminology extraction}, URL = {https://publications.cnr.it/doc/157370}, } @TECHREPORT{LENCI_2005_TECHREPORT_LMP_157381, AUTHOR = {Lenci, A. and Marchi, S. and Pirrelli, V.}, TITLE = {Motore del dialogo. Documento di progettazione e sviluppo}, YEAR = {2005}, ABSTRACT = {Il presente documento intende offrire i criteri generali e le funzionalità di base relativi alla progettazione del motore di dialogo nell'ambito del progetto FuLL}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157381}, } @TECHREPORT{LENCI_2005_TECHREPORT_LPS_157382, AUTHOR = {Lenci, A. and Pirrelli, V. and Soria, C.}, TITLE = {Modellazione del motore di dialogo e delle strutture dati di supporto}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157382}, } @MISC{BARTOLINI_2005_MISC_BDGMLMP_151548, AUTHOR = {Bartolini, R. and Dell'Orletta, F. and Giorgetti, D. and Marchi, S. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-to-Knowledge (T2K)}, YEAR = {2005}, ABSTRACT = {Piattaforma di estrazione e indicizzazione terminologica.}, KEYWORDS = {NLP, estrazione terminologica}, URL = {https://publications.cnr.it/doc/151548}, } @MISC{BARTOLINI_2005_MISC_BMLMP_151550, AUTHOR = {Bartolini, R. and Marchi, S. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {NLPtools}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151550}, } @MISC{GIORGOLO_2005_MISC_GHP_151551, AUTHOR = {Giorgolo, G. and Herreros, I. and Pirrelli, V.}, TITLE = {SOM-ware}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151551}, } @ARTICLE{FURFARI_2004_ARTICLE_FSPSB_173367, AUTHOR = {Furfari, F. and Soria, C. and Pirrelli, V. and Signore, O. and Bianchi Bandinelli, R.}, TITLE = {NICHE: Natural Interaction in Computerised Home Environments}, YEAR = {2004}, ABSTRACT = {Future technologies will provide users with increasing control over surrounding devices embedded in a common home environment. Somewhat paradoxically, this could result in an increase rather than a reduction in complexity if support for high-level interfacing is not introduced. This concern prompted the launching of a medium-term project aimed at promoting natural user-home interaction along the lines of the Ambient Intelligence vision.}, KEYWORDS = {HCI, Home Autoamtion, Smart Home}, PAGES = {55-56}, URL = {http://www.ercim.org/publication/Ercim_News/enw58/furfari.html}, VOLUME = {58}, PUBLISHER = {ERCIM (Le Chesnay)}, ISSN = {0926-4981}, JOURNAL = {ERCIM news}, } @INCOLLECTION{BARTOLINI_2004_INCOLLECTION_BLMPS_30867, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Automatic Classification and Analysis of Provisions in Italian Legal Texts: A Case Study}, YEAR = {2004}, ABSTRACT = {In this paper we address the problem of automatically enriching legal texts with semantic annotation, an essential pre–requisite to effective indexing and retrieval of legal documents. This is done through illustration of SALEM (Semantic Annotation for LEgal Management), a computational system developed for automated semantic annotation of (Italian) law texts. SALEM is an incremental system using Natural Language Processing techniques to perform two tasks: i) classify law paragraphs according to their regulatory content, and ii) extract relevant text fragments corresponding to specific semantic roles that are relevant for the different types of regulatory content. The paper sketches the overall architecture of SALEM and reports results of a preliminary case study on a sample of Italian law texts.}, KEYWORDS = {Annotazione semantica, Classificazione automatica}, PAGES = {593-604}, URL = {https://rdcu.be/dftjm}, VOLUME = {3292}, DOI = {10.1007/978-3-540-30470-8_72}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-3-540-23664-1}, BOOKTITLE = {On the Move to Meaningful Internet Systems 2004: OTM 2004 Workshops. OTM 2004}, EDITOR = {Meersman, R. and Tari, Z. and Corsaro, A.}, } @INPROCEEDINGS{BARTOLINI_2004_INPROCEEDINGS_BLMP_84570, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Hybrid Constraints for Robust Parsing: First Experiments and Evaluation}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84570}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BARTOLINI_2004_INPROCEEDINGS_BLMPS_84571, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Semantic Mark-up of Italian Legal Texts Through NLP-based Techniques}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84571}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{PIRRELLI_2004_INPROCEEDINGS_PAM_112920, AUTHOR = {Pirrelli, V. and Allegrini, P. and Montemagni, S.}, TITLE = {Classifying text through time: a complexity science approach to dynamic web page filtering}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112920}, CONFERENCE_NAME = {International Conference on Text Mining (CIFT)}, CONFERENCE_PLACE = {La Rochelle Francia}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{PIRRELLI_2004_INPROCEEDINGS_PLM_112923, AUTHOR = {Pirrelli, V. and Lenci, A. and Montemagni, S.}, TITLE = {The lexicon in context: distributional evidence and representational issues}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112923}, CONFERENCE_NAME = {International Colloquium: Word Structure and Lexical Systems: models and applications}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {2004}, } @TECHREPORT{BARTOLINI_2004_TECHREPORT_BGLMP_157375, AUTHOR = {Bartolini, E. and Giorgetti, D. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: Acquisizione automatica di ontologie per l'indicizzazione semantica di documenti}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157375}, } @ARTICLE{ALLEGRINI_2003_ARTICLE_AMP_64466, AUTHOR = {Allegrini, P. and Montemagni, S. and Pirrelli, V.}, TITLE = {Example-based automatic induction of semantic classes through entropic scores}, YEAR = {2003}, ABSTRACT = {Abstract - The paper deals in some detail with the application of examplebased machine learning techniques to the task of automatically acquiring semantic information from functionally annotated texts. Special emphasis is placed on the use of “analogical proportions” as a means of structuring the knowledge embodied in attested examples, and weighing up their contribution to a variety of lexico-semantic classification tasks. Careful quantitative analysis of automatically acquired information proves to shed considerable light on the semantic inter-connectivity of input data, their structure and organising principles.}, PAGES = {1-45}, URL = {https://publications.cnr.it/doc/64466}, VOLUME = {16-17}, } @ARTICLE{LENCI_2003_ARTICLE_LMP_64476, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Chunk-it. An Italian shallow parser for robust syntactic annotation}, YEAR = {2003}, PAGES = {353-386}, URL = {https://publications.cnr.it/doc/64476}, VOLUME = {16-17}, } @ARTICLE{MONTEMAGNI_2003_ARTICLE_MBBCCLPZFMRBPSZMPD_64477, AUTHOR = {Montemagni, S. and Barsotti, F. and Battista, M. and Calzolari, N. and Corazzari, O. and Lenci, A. and Pirrelli, V. and Zampolli, A. and Fanciulli, F. and Massetani, M. and Raffaelli, R. and Basili, R. and Pazienza, M. T. and Saracino, D. and Zanzotto, F. and Mana, N. and Pianesi, F. and Delmonte, R.}, TITLE = {The syntactic-semantic Treebank of Italian. An Overview}, YEAR = {2003}, PAGES = {461-492}, URL = {https://publications.cnr.it/doc/64477}, VOLUME = {16-17}, } @ARTICLE{PIRRELLI_2003_ARTICLE_PB_64462, AUTHOR = {Pirrelli, V. and Battista, M.}, TITLE = {Syntagmatic and paradigmatic issues in computational morphology}, YEAR = {2003}, ABSTRACT = {Abstract - In this paper some germane theoretical issues in inflectional morphology will be addressed from a computational point of view. In particular we shall focus on the proper treatment of verb stem allomorphy in Italian conjugation and discuss several different formal solutions in some detail. To put our discussion on a more computational footing, all our examples are illustrated by using the DATR formalism as our metalanguage. This allows us to combine the advantages of the advanced expressive power and flexibility of DATR with the further bonus of offering a running piece of program code that actually works on the discussed examples. The upshot of the paper is that a computational treatment of Italian conjugation can considerably benefit from recent theoretical advances in word and paradigm morphology, as this level of description allows the rule writer to capture generalizations which would otherwise completely elude a purely syntagmatic approach to allomorphy.}, PAGES = {679-701}, URL = {https://publications.cnr.it/doc/64462}, VOLUME = {18-19}, } @ARTICLE{SORIA_2003_ARTICLE_SP_64470, AUTHOR = {Soria, C. and Pirrelli, V.}, TITLE = {A multi-level annotation meta-scheme for dialogue acts}, YEAR = {2003}, ABSTRACT = {Abstract - This article describes a new principled framework for comparison, design and standardization of annotation schemes for dialogue acts. Previous attempts at comparing existing schemes in order to identify a common core of generally agreed-upon dialogue acts share the assumption that tags belonging to different schemes and describing the same general phenomena can always be related through hypo- or hyperonymy relationships. Consequently, general-purpose schemes have often been the result of a merger of different tag sets. In this article, we show the extent to which comparability of different annotation schemes is prevented by the very limited tag inter-translatability. We thus describe an alternative approach to the comparison of dialogue act taxonomies based on a compositional analysis of tags according to independent classificatory dimensions. The framework takes a recognition-based approach to dialogue tagging and defines four independent taxonomies of tags, one for each orthogonal dimension of linguistic and contextual analysis assumed to have a bearing on identification of dialogue acts. We also show how the same framework can be used to design a generalpurpose annotation scheme which combines the features of generality and expressivity by exploiting a modular structure. The advantages and limitations of this proposal over other previous attempts are discussed and concretely exemplified.}, KEYWORDS = {dialogue acts, annotation scheme, pragmatics}, PAGES = {925-952}, URL = {https://publications.cnr.it/doc/64470}, VOLUME = {18-19}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @INCOLLECTION{ALLEGRINI_2003_INCOLLECTION_ALMP_136427, AUTHOR = {Allegrini, P. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Le forme del significato. Acquisizione e rappresentazione dell'informazione semantica}, YEAR = {2003}, KEYWORDS = {Acquisizione, Semantica Lessicale, Ontologia, Machine Learning}, URL = {https://publications.cnr.it/doc/136427}, } @INCOLLECTION{PIRRELLI_2003_INCOLLECTION_P_136424, AUTHOR = {Pirrelli, V.}, TITLE = {Machine language learning meets information technology}, YEAR = {2003}, KEYWORDS = {Apprendimento, Sistemi integrati, Semantic web, Machine Learning}, URL = {https://publications.cnr.it/doc/136424}, PUBLISHER = {Angeli (Milano, ITA)}, } @INPROCEEDINGS{PIRRELLI_2003_INPROCEEDINGS_PL_112893, AUTHOR = {Pirrelli, V. and Lenci, A.}, TITLE = {Modelli computazionali dell'apprendimento del linguaggio}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/112893}, CONFERENCE_NAME = {XI Congresso della Società Italiana di Psicofisiologia}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2003}, } @TECHREPORT{HEID_2003_TECHREPORT_HMPS_157341, AUTHOR = {Heid, U. and Maci, E. and Pirrelli, V. and Soria, C.}, TITLE = {NITE Interim Evaluation Report}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157341}, } @BOOK{PIRRELLI_2002_BOOK_P_136412, AUTHOR = {Pirrelli, V.}, TITLE = {Per un superamento della dicotomia Lessico-Grammatica. Aspetti di composizionalità "debole" del linguaggio}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/136412}, } @ARTICLE{PIRRELLI_2000_ARTICLE_PB_274338, AUTHOR = {Pirrelli, V. and Battista, M.}, TITLE = {The Paradigmatic Dimension of Stem Allomorphy in Italian Verb Inflection}, YEAR = {2000}, ABSTRACT = {This paper is concerned with a detailed analysis of stem allomorphy in Italian Conjugation, carried out from a phonological and paradigmatic perspective. In theory, one would expect these two complementary viewpoints to take care of neatly separable classes of phenomena. In fact, the two dimensions turn out to be interlocked in a complex way, to define a grammatical continuum ranging from minor phonological processes to full suppletion. A formal descriptive framework is proposed here, whereby several insights into the structure of inflectional paradigms (Matthews 1974, Carstairs 1987, Wurzel 1989, Stump 1991, Aronoff 1994) are dealt with from a unifying, purely morphological perspective. In this framework, the structure of a verb paradigm is characterised in terms of a distribution of slots into a number of equivalence classes, or set partition, where each equivalence class is associated with a morphologically distinct stem root. It is shown that, in Italian, a few set partitions account for the structure of all Italian verb paradigms, whether regular or less regular. Moreover, all these partitions are mutually related homomorphically. This well-behaved family of distributions tightly constrains stem allomorphy at an appropriate level of abstraction, independently of whether the origin of allomorphy is morpho-phonological or purely morphological, showing the superiority of the obtained generalisations over more traditional syntagmatic accounts.}, KEYWORDS = {Morfologia, allomorfia, paradigmi flessionali}, PAGES = {307-379}, URL = {https://publications.cnr.it/doc/274338}, VOLUME = {12}, PUBLISHER = {Pacini (Ospedaletto, Italia)}, ISSN = {1120-2726}, JOURNAL = {Rivista di Linguistica}, } @ARTICLE{PIRRELLI_1999_ARTICLE_PY_273631, AUTHOR = {Pirrelli, V. and Yvon, F.}, TITLE = {The hidden dimension: a paradigmatic view of data-driven NLP}, YEAR = {1999}, ABSTRACT = {Many tasks in language analysis are described as the maximally economic mapping of one level of linguistic representation onto another such level. Over the past decade, many different machine-learning strategies have been developed to automatically induce such mappings directly from data. In this paper, we contend that the way most learning algorithms have been applied to problems of language analysis reflects a strong bias towards a compositional (or biunique) model of interlevel mapping. Although this is justified in some cases, we contend that biunique inter-level mapping is not a jack of all trades. A model of analogical learning, based on a paradigmatic reanalysis of memorized data, is presented here. The methodological pros and cons of this approach are discussed in relation to a number of germane linguistic issues and illustrated in the context of three case studies: word pronunciation, word analysis, and word sense disambiguation. The evidence produced here seems to suggest that the brain is not designed to carry out the logically simplest and maximally economic way of relating form and function in language. Rather we propose a radical shift of emphasis in language learning from syntagmatic inter-level mapping to paradigmatically-constrained intra-level mapping.}, KEYWORDS = {data-driven NLP, memory-based machine learning, analogical language learning}, PAGES = {391-408}, URL = {https://publications.cnr.it/doc/273631}, VOLUME = {11}, PUBLISHER = {Taylor \& Francis (London, Regno Unito)}, ISSN = {1362-3079}, JOURNAL = {Journal of experimental and theoretical artificial intelligence (Online)}, }