@ARTICLE{MARZI_2026_ARTICLE_MFNPTDSKK_574721, AUTHOR = {Marzi, C. and Ferro, M. and Nadalini, A. and Pirrelli, V. and Todorova, M. and Dimitrova, T. and Stefanova, V. and Kukova, H. and Koeva, S.}, TITLE = {Comparable reading development in Bulgarian and Italian: cross-linguistic insights from a finger-tracking study}, YEAR = {2026}, ABSTRACT = {Transparent orthographies, such as Bulgarian and Italian, feature highly consistent grapheme-phoneme correspondences, enabling rapid acquisition of decoding skills. Despite belonging to different language families and using distinct scripts (i. e., Cyrillic vs. Latin), these languages provide an ideal framework to investigate whether orthographic transparency can outweigh script differences in shaping reading development. We conducted a cross-sectional study with primary school children from Grades 2 to 5 in Bulgaria and Italy. Reading performance was recorded using a novel finger-tracking technique, which allows the capture of temporal dynamics of reading in a portable, low-cost, and classroom-friendly format. Measures of reading time and text comprehension accuracy were compared across grades and languages. Developmental trajectories for both speed and comprehension accuracy showed remarkable similarity across Bulgarian and Italian, with both languages exhibiting steady improvement from grade 2 to grade 5. Our cross-linguistic results showed that reading development in primary school children follows both universal and language-specific trajectories. While broad developmental trajectories were similar, cross-linguistic differences emerged in the impact of morphological complexity, pointing to both universal and language-specific mechanisms. Our findings indicate that orthographic transparency may exert a stronger influence on early reading development than script type, even across languages from different families. The study also highlights the potential of finger-tracking for large-scale literacy research. Establishing comparable developmental benchmarks in transparent orthographies may inform cross-linguistic screening tools and early interventions}, KEYWORDS = {reading development, transparent orthographies, finger-tracking, cross-linguistic comparison, Bulgarian, Italian}, PAGES = {1-30}, URL = {https://www.mdpi.com/2226-471X/11/4/70}, VOLUME = {11 (4)}, DOI = {10.3390/languages11040070}, ISSN = {2226-471X}, JOURNAL = {LANGUAGES}, } @ARTICLE{SRIVASTAVA_2026_ARTICLE_SFPC_561481, AUTHOR = {Srivastava, M. and Ferro, M. and Pirrelli, V. and Coro, G.}, TITLE = {Enhancing token boundary detection in disfluent speech}, YEAR = {2026}, ABSTRACT = {This paper presents an open-source Automatic Speech Recognition (ASR) pipeline optimised for disfluent Italian read speech, designed to enhance both transcription accuracy and token boundary precision in low-resource settings. The study aims to address the difficulty that conventional ASR systems face in capturing the temporal irregularities of disfluent reading, which are crucial for psycholinguistic and clinical analyses of fluency. Building upon the WhisperX framework, the proposed system replaces the neural Voice Activity Detection module with an energy-based segmentation algorithm designed to preserve prosodic cues such as pauses and hesitations. A dual-alignment strategy integrates two complementary phoneme-level ASR models to correct onset–offset asymmetries, while a bias-compensation post-processing step mitigates systematic timing errors. Evaluation on the READLET (child read speech) and CLIPS (adult read speech) corpora shows consistent improvements over baseline systems, confirming enhanced robustness in boundary detection and transcription under disfluent conditions. The results demonstrate that the proposed architecture provides a general, language-independent framework for accurate alignment and disfluency-aware ASR. The approach can support downstream analyses of reading fluency and speech planning, contributing to both computational linguistics and clinical speech research}, KEYWORDS = {Automatic Speech Recognition, Statistical analysis, Disfluencies, Voice Activity Detection}, PAGES = {14}, URL = {https://www.sciencedirect.com/science/article/pii/S2667305325001401}, VOLUME = {29}, DOI = {10.1016/j.iswa.2025.200614}, ISSN = {2667-3053}, JOURNAL = {INTELLIGENT SYSTEMS WITH APPLICATIONS}, } @INPROCEEDINGS{MARZI_2026_INPROCEEDINGS_MBTNADBFTGP_580325, AUTHOR = {Marzi, C. and Boni, N. and Todesco, A. and Nadalini, A. and Albertin, G. and Dolciotti, C. and Bongioanni, P. and Ferro, M. and Tamburini, F. and Gagliardi, G. and Pirrelli, V.}, TITLE = {Reading Dynamics and Comprehension in Cognitive Aging: A Multimodal Language Resource}, YEAR = {2026}, ABSTRACT = {We introduce a novel Italian language resource for the study of reading and comprehension in aging populations, combining behavioural and linguistic data from healthy controls (HC), individuals with subjective cognitive decline (SCI), participants with Mild Cognitive Impairment (MCI), and patients with mild dementia (CDR1). Reading performance was recorded through a finger-tracking based application during both silent and oral reading, enabling fine-grained temporal analyses at the text, token and character level. Comprehension was assessed via multiple question types (wh -, inferential, referential, and lexical). Descriptive and non-linear regression analyses informed a feature selection process, yielding temporal and comprehension-based measures that capture individual reading dynamics. These features were explored through unsupervised clustering and supervised classification to investigate their discriminative and predictive potential across cognitive profiles. The resource supports research on reading and cognitive decline, offers a reproducible protocol for large-scale data collection, and provides a foundation for developing early cognitive screening and monitoring tools for aging populations}, KEYWORDS = {reading behaviour, cognitive aging, finger-tracking, language resources for assistive technologies, cluster analysis, automatic classification}, PAGES = {2608-2618}, URL = {http://www.lrec-conf.org/proceedings/lrec2026/}, VOLUME = {15}, DOI = {10.63317/3wjy3a8cwnw8}, PUBLISHER = {ELRA (Paris, FRA)}, ISBN = {978-2-493814-49-4}, CONFERENCE_NAME = {Language Resources and Evaluation Conference (LREC 2026)}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of the Fifteenth Language Resources and Evaluation Conference (LREC 2026)}, } @ARTICLE{MARZI_2025_ARTICLE_MNLSTPF_549321, AUTHOR = {Marzi, C. and Nadalini, A. and Lento, A. and Srivastava, M. and Todesco, A. and Pirrelli, V. and Ferro, M.}, TITLE = {Oral text reading as a multi-sensory task}, YEAR = {2025}, ABSTRACT = {Reading aloud involves the complex interplay of visual, motor and lexical processes. While eye movements have been extensively investigated in the reading literature, less is known about the coordination of voice, eye and finger movements in oral and finger-point reading. Here we propose a multimodal perspective on these dynamics, emphasising the contribution of integrating eye-tracking, finger-tracking, and voice recording to a more comprehensive understanding of reading proficiency. Our results show that finger and eye movements are strongly coupled in early readers. Conversely, skilled readers show a more flexible coordination of sensorimotor signals and a more adaptive sensitivity to prosodic structures, with voice articulation slowing at key structural points, such as chunk heads and sentence-final boundaries. These findings provide novel insights into how multimodal coordination evolves with reading expertise, contributing to a more fine-grained understanding of reading fluency}, KEYWORDS = {reading development, multimodal integration, eye-voice span, finger-voice span, adaptive reading.}, PAGES = {141-156}, URL = {https://www.rivisteweb.it/doi/10.1418/117447}, VOLUME = {XXIV (1)}, DOI = {10.1418/117447}, ISSN = {1720-9331}, JOURNAL = {LINGUE E LINGUAGGIO}, } @ARTICLE{OKOYE_2025_ARTICLE_OCPFNCCCDFFGPRRSSZMBFNAAABBBBBBCCCCCCCCCCDDDDFGGGGILLLLLMMMMMMNOPPPPQRRSSSSSSTTTTVVZ_564586, AUTHOR = {Okoye, C. and Cuffaro, L. and Pozzi, F. E. and Ferrara, M. C. and Noale, M. and Calciolari, S. and Chicco, D. and Cincotti, F. and Daini, R. and Finazzi, A. and Francioso, L. and Gasparini, F. and Pagan, E. and Ribino, P. and Romeo, Z. and Sala, G. and Solfrizzi, V. and Zambon, A. and Maggi, S. and Bellelli, G. and Ferrarese, C. and Null, N. and Airoldi, C. and Aloisi, A. and Appollonio, I. and Bazzini, C. and Bochicchio, M. A. and Bologna, M. and Brattico, E. and Bruno, G. and Bulgari, M. and Canevelli, M. and Capone, S. and Ceolin, C. and Chiaradonna, F. and Colamarino, E. and Conti, E. and Corsonello, A. and Cortelessa, G. and Crudele, L. and Custodero, C. and De Luca, A. and Delussi, M. and Di Napoli, C. and Dibello, V. and Franchi, M. and Ganora, D. and Gesualdo, L. and Goldin, E. and Grossi, A. and Isella, V. and Lenti, R. and Leone, A. and Locati, S. and Logrieco, A. and Logroscino, G. and Macchia, E. and Mantuano, P. and Massimi, A. and Matteini, P. and Messina, G. and Moretti, L. and Natalello, A. and Orlandi, I. and Paragliola, G. and Paparella, G. and Pegoraro, S. and Pirrelli, V. and Quaranta, N. A. A. and Riccardi, G. and Romano, D. and Saibene, A. and Sala, A. and Sciurti, E. and Serino, L. and Siciliano, P. and Silanos, F. and Tamburrano, A. and Tosi, G. and Tratsevich, A. and Tremolizzo, L. and Villari, P. and Vezzoso, A. and Zoia, C. P.}, TITLE = {Multicomponent interventions and technologies to reduce the burden of frailty, functional, and cognitive decline: insights from the Age-It Research Program}, YEAR = {2025}, ABSTRACT = {Objectives Preventing age-related complications is a critical priority for health systems. Within the Age-It program, Spoke 8 aims to evaluate scalable, multicomponent, technology-assisted interventions to prevent frailty and mitigate functional and cognitive decline in older adults across different care settings. Methods Spoke 8 includes three clinical studies conducted in community, hospital, and long-term care settings, supported by cross-cutting work packages on digital infrastructure, technology development, and economic evaluation. The intervention model integrates physical, cognitive, nutritional, and psychosocial components, supported by digital tools, biomarkers of aging, and a centralized data platform. Results The project is expected to generate evidence on the effectiveness, feasibility, and cost-effectiveness of multidomain interventions implemented across diverse real-world settings, including community, hospital, and long-term care. Technology-assisted strategies-such as wearable sensors and digital cognitive tools-may enhance adherence and enable remote monitoring, while also supporting more personalized care delivery. The integration of artificial intelligence will facilitate the interpretation of complex clinical and biological data, improving risk stratification and the early identification of individuals most likely to benefit from targeted interventions. Together, these approaches may help reduce hospitalizations, delay functional decline, and promote aging in place. Discussion This initiative supports the transition toward more integrated and equitable care models for older adults. Through the implementation of scalable, person-centered interventions within routine services, the project offers policy-relevant strategies to address frailty and functional decline-contributing to the redesign of aging care in Italy and providing insights applicable across diverse health systems facing the challenges of population aging countries}, KEYWORDS = {Dementia, Frailty, Multidomain intervention, Prevention, Technology}, URL = {https://iris.cnr.it/handle/20.500.14243/564586}, VOLUME = {80 (Supplement_2)}, DOI = {10.1093/geronb/gbaf186}, ISSN = {1079-5014}, JOURNAL = {JOURNALS OF GERONTOLOGY SERIES B-PSYCHOLOGICAL SCIENCES AND SOCIAL SCIENCES}, } @INPROCEEDINGS{FERRO_2024_INPROCEEDINGS_FMNTLP_501843, AUTHOR = {Ferro, M. and Marzi, C. and Nadalini, A. and Taxitari, L. and Lento, A. and Pirrelli, V.}, TITLE = {ReadLet: a Dataset for Oral, Visual and Tactile Text Reading Data of Early and Mature Readers}, YEAR = {2024}, ABSTRACT = {The paper presents the design and construction of a time-stamped multimodal dataset for reading research, including multiple time-aligned temporal signals elicited with four experimental trials of connected text reading by both child and adult readers. We present the experimental protocols, as well as the data acquisition process and the post-processing phase of data annotation/augmentation. To evaluate the potential and usefulness of a time-aligned multimodal dataset for reading research, we present a few statistical analyses showing the correlation and complementarity of multimodal time-series of reading data, as well as some results of modelling adults’ reading data by integrating different modalities. The total dataset size amounts to about 2. 5 GByte in compressed format and is available through the CLARIN infrastructure}, KEYWORDS = {text reading, eye movements, finger movements, eye-finger span, synchronisation, parallel processing, multimodality}, PAGES = {13595-13609}, URL = {https://aclanthology.org/volumes/2024.lrec-main/}, PUBLISHER = {ELRA Language Resources Association (ELRA) (Parigi, FRA)}, ISBN = {978-2-493814-10-4}, CONFERENCE_NAME = {2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)}, CONFERENCE_PLACE = {Parigi}, BOOKTITLE = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)}, } @INPROCEEDINGS{LENTO_2024_INPROCEEDINGS_LNFMPDKSTK_513008, AUTHOR = {Lento, A. and Nadalini, A. and Ferro, M. and Marzi, C. and Pirrelli, V. and Dimitrova, T. and Kukova, H. and Stefanova, V. and Todorova, M. and Koeva, S.}, TITLE = {Assessing Reading Literacy of Bulgarian Pupils with Finger-tracking}, YEAR = {2024}, ABSTRACT = {The paper reports on the first steps in developing a time-stamped multimodal dataset of reading data by Bulgarian children. Data are being collected, structured and analysed by means of ReadLet, an innovative infrastructure for multimodal language data collection that uses a tablet as a reader’s front-end. The overall goal of the project is to quantitatively analyse the reading skills of a sample of early Bulgarian readers collected over a two-year period, and compare them with the reading data of early readers of Italian, collected using the same protocol. We illustrate design issues of the experimental protocol, as well as the data acquisition process and the post-processing phase of data annotation/augmentation. To evaluate the potential and usefulness of the Bulgarian dataset for reading research, we present some preliminary statistical analyses of our recently collected data. They show robust convergence trends between Bulgarian and Italian early reading development stages}, KEYWORDS = {Finger Tracking, Reading, Learning, Text Comprehension.}, PAGES = {140-149}, URL = {https://dcl.bas.bg/clib/proceedings/}, CONFERENCE_NAME = {Sixth International Conference on Computational Linguistics in Bulgaria (CLIB 2024)}, CONFERENCE_PLACE = {BGR}, BOOKTITLE = {Proceedings of the Sixth International Conference on Computational Linguistics in Bulgaria (CLIB 2024)}, } @INPROCEEDINGS{LENTO_2024_INPROCEEDINGS_LNKPMF_519724, AUTHOR = {Lento, A. and Nadalini, A. and Khlif, N. and Pirrelli, V. and Marzi, C. and Ferro, M.}, TITLE = {Comparative Evaluation of Computational Models Predicting Eye Fixation Patterns During Reading: Insights from Transformers and Simpler Architectures}, YEAR = {2024}, ABSTRACT = {Eye tracking records of natural text reading are known to provide significant insights into the cognitive processes underlying word processing and text comprehension, with gaze patterns, such as fixation duration and saccadic movements, being modulated by morphological, lexical, and higher-level structural properties of the text being read. Although some of these effects have been simulated with computational models, it is still not clear how accurately computational modelling can predict complex fixation patterns in connected text reading. State-of-the-art neural architectures have shown promising results, with pre-trained transformer-based classifiers having recently been claimed to outperform other competitors, achieving beyond 95% accuracy. However, transformer-based models have neither been compared with alternative architectures nor adequately evaluated for their sensitivity to the linguistic factors affecting human reading. Here we address these issues by evaluating the performance of a pool of neural networks in classifying eye-fixation English data as a function of both lexical and contextual factors. We show that i) accuracy of transformer-based models has largely been overestimated, ii) other simpler models make comparable or even better predictions, iii) most models are sensitive to some of the major lexical factors accounting for at least 50% of human fixation variance, iv) most models fail to capture some significant context-sensitive interactions, such as those accounting for spillover effects in reading. The work shows the benefits of combining accuracy-based evaluation metrics with non-linear regression modelling of fixed and random effects on both real and simulated eye-tracking data}, KEYWORDS = {eye-tracking, eye fixation time prediction, neural network, contextual word embeddings, lexical features}, PAGES = {10}, URL = {https://ceur-ws.org/Vol-3878/}, VOLUME = {VOL-3878}, PUBLISHER = {CEUR (Aachen, DEU)}, ISBN = {979-12-210-7060-6}, CONFERENCE_NAME = {Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Aachen}, BOOKTITLE = {Proceedings of the Tenth Italian Conference on Computational Linguistics (CLiC-it 2024)}, } @ARTICLE{MARZI_2023_ARTICLE_MP_462114, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {A discriminative information-theoretical analysis of the regularity gradient in inflectional morphology}, YEAR = {2023}, ABSTRACT = {Over the last decades, several independent lines of research in morphology have questioned the hypothesis of a direct correspondence between sublexical units and their mental correlates. Word and paradigm models of morphology shifted the fundamental part-whole relation in an inflection system onto the relation between individual inflected word forms and inflectional paradigms. In turn, the use of artificial neural networks of densely interconnected parallel processing nodes for morphology learning marked a radical departure from a morpheme-based view of the mental lexicon. Lately, in computational models of Discriminative Learning, a network architecture has been combined with an uncertainty reducing mechanism that dispenses with the need for a one-to-one association between formal contrasts and meanings, leading to the dissolution of a discrete notion of the morpheme. The paper capitalises on these converging lines of development to offer a unifying information-theoretical, simulation-based analysis of the costs incurred in processing (ir)regularly inflected forms belonging to the verb systems of English, German, French, Spanish and Italian. Using Temporal Self-Organising Maps as a computational model of lexical storage and access, we show that a discriminative, recurrent neural network, based on Rescorla-Wagner's equations, can replicate speakers' exquisite sensitivity to widespread effects of word frequency, paradigm entropy and morphological (ir)regularity in lexical processing. The evidence suggests an explanatory hypothesis linking Word and paradigm morphology with principles of information theory and human perception of morphological structure. According to this hypothesis, the ways more or less regularly inflected words are structured in the mental lexicon are more related to a reduction in processing uncertainty and maximisation of predictive efficiency than to economy of storage}, KEYWORDS = {Morphological inflection, Morphological regularity, Prediction-driven processing, Discriminative learning, Lexical self-organisation, Gradient structure, Information theory, Non-linear modelling}, PAGES = {1-51}, URL = {https://doi.org/10.1007/s11525-023-09415-6}, DOI = {10.1007/s11525-023-09415-6}, ISSN = {1871-5621}, JOURNAL = {MORPHOLOGY (DORDRECHT)}, } @ARTICLE{NADALINI_2023_ARTICLE_NMFTLCP_501822, AUTHOR = {Nadalini, A. and Marzi, C. and Ferro, M. and Taxitari, L. and Lento, A. and Crepaldi, D. and Pirrelli, V.}, TITLE = {Eye-voice and finger-voice spans in adults’ oral reading of connected texts}, YEAR = {2023}, ABSTRACT = {The present paper investigates the interaction between eye movements, voice articulation and the movements of the index finger dynamically pointing to a text line in oral finger-point reading of Italian. During finger-point reading, the finger appears to be ahead of the voice most of the times, by a margin that is significantly modulated by the distribution of phrasal and prosodic units in the reading text. Eye movements replicate the same effects on a different time scale. The eye is ahead of both voice and finger by a wide margin (confirming evidence observed for English and German sentence reading), while showing a tendency to re-synchronise with voice articulation at the right edge of strong prosodic units (sentence boundaries). Our evidence suggests a multicomponent view of the time span between the eye/finger and the voice. The span is shown to be the dynamic outcome of an optimally adaptive reading strategy, resulting from the interaction between individual decoding skills, the reader's phonological buffer capacity, and the structural complexity of a reading text. Proficient readers modulate their span to compensate for the different timing between word fixation and word articulation, read faster, and dynamically adjust their processing window to the meaningful, prosodic units of a text}, KEYWORDS = {finger-point reading, eye-tracking, finger-tracking, eye-voice span, finger-voice span, eye-finger coordination, parallel processing, working memory, phonological buffer, adaptive reading}, PAGES = {366-400}, URL = {https://benjamins.com/catalog/ml.00025.nad}, VOLUME = {18 (3)}, DOI = {10.1075/ml.00025.nad}, ISSN = {1871-1340}, JOURNAL = {THE MENTAL LEXICON}, } @EDITORIAL{MARZI_2023_EDITORIAL_MP_452788, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Integrative Views on Representations and Processes in Morphology}, YEAR = {2023}, ABSTRACT = {One of the most enduring conceptualisations of the language architecture rests ona modular subdivision of work between lexical representations of stored items onthe one hand, and dynamic processes, modelled as procedural rules working on suchitems, on the other hand. In morphology, network-based approaches have suggested an alternative "integrative" view of word representations and processes, where lexical representations consist of partially overlapping activation patterns spreading over several processing units. From this integrative perspective, the resulting network is both a lexicon and a word processor. We argue that the network-based view provides a stimulating research framework for several complementary levels of language inquiry (including theoretical, computational and neuro-psychological approaches) to be fruitfully integrated into a novel, comprehensive understanding of morphology. We discuss some implications of this view and delineate prospects of progress in this area}, KEYWORDS = {morphology, mental lexicon, Connectionism, Network science, Discriminative Learning}, PAGES = {397-556}, URL = {https://link.springer.com/journal/11525/volumes-and-issues/33-4}, DOI = {10.1007/s11525-023-09416-5}, PUBLISHER = {Springer (Dordrecht, NLD)}, ISSN = {1871-5656}, CONFERENCE_PLACE = {Dordrecht}, } @EDITORIAL{MARZI_2023_EDITORIAL_MP_450063, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Integrative views of representations and processes in morphology: an introduction}, YEAR = {2023}, ABSTRACT = {One of the most enduring conceptualisations of the language architecture rests on a modular subdivision of work between lexical representations of stored items on the one hand, and dynamic processes, modelled as procedural rules working on such items, on the other hand. In morphology, network-based approaches have suggested an alternative “integrative” view of word representations and processes, where lexical representations consist of partially overlapping activation patterns spreading over several processing units. From this integrative perspective, the resulting network is both a lexicon and a word processor. We argue that the network-based view provides a stimulating research framework for several complementary levels of language inquiry (including theoretical, computational and neuro-psychological approaches) to be fruitfully integrated into a novel, comprehensive understanding of morphology}, KEYWORDS = {Morphology, Mental Lexicon, Connectionism, Network science, Discriminative learning}, PAGES = {397-408}, URL = {https://link.springer.com/article/10.1007/s11525-023-09416-5}, VOLUME = {33}, DOI = {10.1007/s11525-023-09416-5}, ISSN = {1871-5621}, } @ARTICLE{MARZI_2022_ARTICLE_MNMMP_419693, AUTHOR = {Marzi, C. and Narzisi, A. and Milone, A. and Masi, G. and Pirrelli, V.}, TITLE = {Reading behaviors through patterns of finger-tracking in Italian children with autism spectrum disorder}, YEAR = {2022}, ABSTRACT = {The paper proposes an ecological and portable protocol for the large-scale collection of reading data in high-functioning autism spectrum disorder (ASD) children based on recording the finger movements of a subject reading a text displayed on a tablet touchscreen. By capitalizing on recent evidence that movements of a finger that points to a scene or text during visual exploration or reading may approximate eye fixations, we focus on recognition of written content and function words, pace of reading, and accuracy in reading comprehension. The analysis showed significant differences between typically developing and ASD children, with the latter group exhibiting greater variation in levels of reading ability, slower developmental pace in reading speed, less accurate comprehension, greater dependency on word length and word frequency, less significant prediction-based processing, as well as a monotonous, steady reading pace with reduced attention to weak punctuation. Finger-tracking patterns provides evidence that ASD readers may fail to integrate single word processing into major syntactic structures and lends support to the hypothesis of an impaired use of contextual information to predict upcoming stimuli, suggesting that difficulties in perception may arise as difficulties in prediction}, KEYWORDS = {reading, autism, finger-tracking, deleloping readers, prediction-driven processing}, PAGES = {1-17}, URL = {https://www.mdpi.com/2076-3425/12/10/1316}, VOLUME = {12 (1316)}, DOI = {10.3390/brainsci12101316}, ISSN = {2076-3425}, JOURNAL = {BRAIN SCIENCES}, } @INCOLLECTION{CREPALDI_2022_INCOLLECTION_CFMNPT_415388, AUTHOR = {Crepaldi, D. and Ferro, M. and Marzi, C. and Nadalini, A. and Pirrelli, V. and Taxitari, L.}, TITLE = {Finger movements and eye movements during adults' silent and oral reading}, YEAR = {2022}, ABSTRACT = {Using a common tablet and a web application, we can record the finger movements of a reader that is concurrently reading and finger-pointing a text displayed on the tablet touchscreen. In a preliminary analysis of "finger-tracking" data of early-graders we showed that finger movements can replicate established reading effects observed in more controlled settings. Here, we analyse and discuss reading evidence collected by (i) tracking the finger movements of adults reading a short essay displayed on a tablet touchscreen, and (ii) tracking the eye movements of adultsreading a comparable text displayed on the screen of a computer. Texts in the two conditions were controlled for linguistic complexity and page layout. In addition, we tested adults' comprehension in both silent and oral reading, by asking them multiple-choice questions after reading each text. We show and discuss the reading evidence that the two (optical and tactile) protocols provide, and to what extent they show comparable effects. We conclude with some remarks on the importance of ecology and portability of protocols for large-scale collection of naturalistic reading data}, KEYWORDS = {Reading, finger-tracking, digital technology}, PAGES = {443-471}, URL = {https://link.springer.com/book/9783030998905}, PUBLISHER = {Springer (Dordrecht, NLD)}, ISBN = {978-3-030-99890-5}, CONFERENCE_PLACE = {Dordrecht}, BOOKTITLE = {Developing language and literacy-Studies in Honor of Dorit Diskin Ravid}, EDITOR = {Levie, R. and Bar On, A. and Ashkenazi, O. and Dattner, E. and Brandes, G.}, } @INPROCEEDINGS{MARZI_2022_INPROCEEDINGS_MNFMMVPTP_413387, AUTHOR = {Marzi, C. and Narzisi, A. and Ferro, M. and Masi, G. and Milone, A. and Viglione, V. and Pelagatti, S. and Tomassini, I. and Pirrelli, V.}, TITLE = {Patterns of finger-tracking in Italian early readers with Autism Spectrum Disorder}, YEAR = {2022}, ABSTRACT = {Background: Of late, the synergistic interaction of eye and hand movements in the exploration of a visual scene displayed on a computer touchscreen was shown to provide a congruent signature of the "attention maps" of subjects with autism spectrum disorders (ASD). A familiar context where this visual and tactile interaction is exploited is when children use the finger of their dominant hand to point the letters of written words as they are reading, particularly at early stages of their literacy development. In the present work, a dedicated app running on a common tablet is used to capture and analyse the finger-tracking behaviour of children with ASD while they are reading few episodes of a connected text on the tablet touchscreen. The reader's voice is also recorded through the tablet built-in microphone. The sliding movements of the finger across the tablet touchscreen are discretized into a series of densely distributed "touch events", which are then mapped onto the text lines in much the same way eye fixations are projected onto a sequence of words using an eye-tracker. Reading texts are linguistically annotated, to control for levels of reading difficulty, and finger-tracking times are associated with linguistic glosses. Objectives: Investigate patterns of finger-tracking as a potential non biological marker for identification of children with ASD. Methods: A preliminary analysis is offered of evidence of the finger-tracking behaviour of 20 Italian children with high functioning ASD, aged 7-11 years, while they are engaged in reading. A grade-matched control group of children with typical development was included. Patterns of finger-tracking are assessed in connection with three complementary aspects of reading behaviour: (1) word recognition, (2) pace of reading of multi-word intonation units, and (3) text comprehension, controlled by asking children a few multiple-choice questions on text content after each reading session. Results: Considerable variation in levels of reading ability was observed in the ASD sample, with a few children showing clear evidence of impaired reading comprehension. However, fluent readers with ASD exhibit the same correlation between accurate decoding (assessed by measuring per-word reading speed) and high levels of reading comprehension found in controls. Likewise, decoding rates were found to significantly increase with increasing grade levels, following the typical developmental pattern observed in controls. On a less local level of linguistic analysis, the reading pace of ASD readers fails to be modulated according to major syntactic structures, punctuation marks and direct speech turns, an effect concomitant with a flat prosodic intonation of oral reading. Conclusions: Preliminary findings confirm the heterogeneous nature of reading skills in children with ASD, showing that the use of a tablet screen as a tactile interface for visual perception analysis can offer a robust experimental protocol for large-scale, multimodal collection of naturalistic data for extensive assessment of readers with ASD}, KEYWORDS = {reading, autism, finger-tracking, developing readers, prediction-driven processing}, PAGES = {192-192}, URL = {https://cdn.ymaws.com/www.autism-insar.org/resource/resmgr/files/insar_2022/2022_Abstract_Book.pdf}, VOLUME = {2022}, CONFERENCE_NAME = {INSAR}, BOOKTITLE = {2022 annual meeting abstract book}, } @INPROCEEDINGS{MARZI_2022_INPROCEEDINGS_MP_415389, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {An information-theoretic analysis of the inflectional regular-irregular gradient for optimal processing units}, YEAR = {2022}, ABSTRACT = {Prediction-driven word processing defines the human ability to anticipate upcoming input words in recognition. From this perspective, input word forms need to be processed as quickly and efficiently as possible. Under the reasonable assumption that spoken words are memorized and processed as word trees (e. g. Marslen-Wilson's "cohorts"), the larger the size of the cohort of an input word at a certain point in time (and the later its uniqueness point), the harder and slower to process the word is. Regularly and irregularly inflected verb forms have different stem family sizes and different uniqueness points. Using a Recurrent Neural Network (RNN) as a computational model of the human lexical proces-sor, we explore here how their distributional and structural properties may affect (optimal) processing strategies}, KEYWORDS = {Morphological inflection, prediction-driven processing, discriminability, non-linearity, learnability}, PAGES = {50-51}, URL = {https://archive.nytud.hu/imm20/abstracts/main.pdf}, CONFERENCE_NAME = {20th International Morphology Meeting-(Dedicated to the memory of Ferenc Kiefer)}, BOOKTITLE = {Book of Abstracts of the 20th International Morphology Meeting-Dedicated to the memory of Ferenc Kiefer}, } @INPROCEEDINGS{TAXITARI_2021_INPROCEEDINGS_TCFMNP_423945, AUTHOR = {Taxitari, L. and Cappa, C. and Ferro, M. and Marzi, C. and Nadalini, A. and Pirrelli, V.}, TITLE = {Using mobile technology for reading assessment}, YEAR = {2021}, ABSTRACT = {The enormous potential of Information and Communication Technologies (ICT) for addressing critical educational issues is generally acknowledged, but its use in the assessment of the complex skills of reading and understanding a text has been very limited to date. The paper contrasts traditional reading assessment protocols with ReadLet, an ICT platform with a tablet front-end, designed to support online monitoring of silent and oral reading abilities in early graders. ReadLet makes use of cloud computing and mobile technology for large-scale data collection and allows the time alignment of the child's reading behaviour with texts tagged using Natural Language Processing (NLP) tools. Initial findings replicate established benchmarks from the psycholinguistic literature on reading in both typically and atypically developing children, making the application a new ground-breaking approach in the evaluation of reading skills. Index Terms-reading assessment, reading research, mobile technology, NLP, cloud computing, special education needs}, KEYWORDS = {reading assessment, reading research, mobile technology, NLP, cloud computing, special education needs}, PAGES = {1-6}, URL = {http://www.ieee.ma/cist20/component/content/?id=26\&Itemid=185}, ISBN = {9781728166469}, CONFERENCE_NAME = {6th IEEE Congress on Information Science \& Technology (IEEE CIST'20)}, BOOKTITLE = {Proceedings of the 6th IEEE Congress on Information Science and Technology (CiSt)}, } @INPROCEEDINGS{MARZI_2021_INPROCEEDINGS_MTFNP_426392, AUTHOR = {Marzi, C. and Taxitari, L. and Ferro, M. and Nadalini, A. and Pirrelli, V.}, TITLE = {Valutare la lettura "in tempo reale": un esempio di integrazione tra linguistica computazionale e linguistica applicata}, YEAR = {2021}, ABSTRACT = {In anni recenti, linguistica computazionale e linguistica applicata hanno ampliato i loro rispettivi ambiti d'indagine, utilizzando l'ontologia formale della linguistica teorica e i modelli cognitivi della psicolinguistica per studiare le difficoltà che i parlanti incontrano nello svolgimento di "compiti" linguistici specifici. Nell'ambito della lettura, le tecnologie per il Trattamento Automatico del Linguaggio (TAL) si sono dimostrate capaci di classificare il livello di leggibilità di un testo, basandosi sulla distribuzione di alcuni parametri linguistici in testi pre-classificati per età dei lettori destinatari, o per grado di scolarità, o per livello di sviluppo cognitivo. Ad esempio, parole o frasi più lunghe, o parole più rare tendono a distribuirsi in testi di più difficile comprensione, o destinati a lettori più maturi. E' possibile così assegnare a un testo, o a ogni singola frase, un punteggio di leggibilità in funzione (inversa) della complessità lessicale, morfologica, sintattica o pragmatica dell'unità testuale analizzata. In Linguistica Applicata (LA) la valutazione della difficoltà di lettura ha seguito un approccio funzionale. Nel modello semplice di lettura, ad esempio, la capacità di leggere un testo è analizzata come il prodotto dell'interazione tra decodifica e comprensione. Attraverso l'osservazione di un campione di bambini impegnati nella lettura, è possibile valutare la loro fluenza in decodifica, gli errori di decodifica e comprensione, e l'efficacia di percorsi educativi personalizzati. La piattaforma ReadLet è stata sviluppata con l'obiettivo di integrare l'approccio classificatorio del TAL con quello funzionale della LA. Il bambino legge un breve testo visualizzato sullo schermo di un tablet, ad alta voce o in modalità silente. In entrambi i casi, al bambino viene chiesto di "tenere il segno" con il dito sullo schermo nel corso della lettura. La traccia tattile è registrata e allineata con il testo visualizzato sullo schermo mediante un algoritmo di convoluzione. Al contempo, il testo è annotato automaticamente per tratti linguistici. Alla fine della sessione di lettura silente, il bambino risponde ad alcune semplici domande sul contenuto del testo. I dati raccolti consentono di valutare le difficoltà (rallentamenti o errori) che il bambino incontra nella lettura, e di mettere in relazione "in tempo reale" queste difficoltà con aspetti linguistici specifici del testo. Un'analisi preliminare dei dati raccolti da ReadLet su oltre 400 allievi di alcune scuole elementari toscane e della Svizzera italiana, ha evidenziato il differente "passo" di lettura tra lettori con sviluppo tipico e atipico, e il peso che variabili come lunghezza, frequenza e lessicalità hanno su profili di lettura individuali e aggregati. La possibilità di "controllare" automaticamente la distribuzione di queste variabili nel testo e di correlarle con le difficoltà del singolo bambino consente, infine, di somministrare testi con livelli di difficoltàgradualmente crescenti, rendendo possibili percorsi personalizzati di potenziamento}, KEYWORDS = {reading assessment, reading strategies, NLP, ICT mobile technologies}, PAGES = {5-5}, URL = {https://iris.cnr.it/handle/20.500.14243/426392}, VOLUME = {2021}, CONFERENCE_NAME = {XXI Congresso Internazionale di AItLA}, BOOKTITLE = {FARE LINGUISTICA APPLICATA CON LE DIGITAL HUMANITIES}, } @ARTICLE{MASINI_2020_ARTICLE_MP_424255, AUTHOR = {Masini, F. and Pirrelli, V.}, TITLE = {L'evidenza morfologica nell'era digitale: per un'integrazione di teoria e computazione}, YEAR = {2020}, ABSTRACT = {This article proposes a research perspective on morphological and lexical data based on an integrated approach that merges linguistic theory and computational analyses of a large quantity of textual data. Starting from a description of the units and processes of morphology, and of the issues they raise, we discuss to what extent these theoretical notions can be translated into the algorithmic procedures of Natural Language Processing (NLP) and what resources and methods are nowadays available to make morphological and lexical knowledge explicit within texts. At the same time, we explore the repercussions that the application of computational (but also psycho-/neuro-linguistic) techniques may have on our theoretical representations and on their plausibility}, KEYWORDS = {morphology - lexicon - categories - Natural Language Processing - Italian}, PAGES = {77-126}, URL = {https://iris.cnr.it/handle/20.500.14243/424255}, VOLUME = {VI}, ISSN = {0393-1226}, JOURNAL = {QUADERNI DI SEMANTICA}, } @INCOLLECTION{MARZI_2020_INCOLLECTION_MBBP_408279, AUTHOR = {Marzi, C. and Blevins James, P. and Booij, G. and Pirrelli, V.}, TITLE = {Inflection at the morphology-syntax interface}, YEAR = {2020}, ABSTRACT = {What is inflection? Is it part of language morphology, syntax or both?What are the basic units of inflection and how do speakers acquire and processthem? How do they vary across languages? Are some inflection systems somewhatmore complex than others, and does inflectional complexity affect the wayspeakers process words? This chapter addresses these and other related issuesfrom an interdisciplinary perspective. Our main goal is to map out the place ofinflection in our current understanding of the grammar architecture. In doingthat, we will embark on an interdisciplinary tour, which will touch upon theoretical, psychological, typological, historical and computational issues in morphology, with a view to looking for points of methodological and substantialconvergence from a rather heterogeneous array of scientific approaches and theoreticalperspectives. The main upshot is that we can learn more from this thanjust an additive medley of domain-specific results. In the end, a cross-domainsurvey can help us look at traditional issues in a surprisingly novel light}, KEYWORDS = {inflection, paradigmatic relations, word processing, word learning, inflectional complexity, family size, entropy}, PAGES = {228-294}, URL = {https://www.degruyter.com/view/book/9783110440577/10.1515/9783110440577-007.xml}, DOI = {10.1515/9783110440577-007}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, CONFERENCE_PLACE = {Berlin/Munich}, BOOKTITLE = {Word Knowledge and Word Usage. A cross-interdisciplinary guide to the mental lexicon}, EDITOR = {Vito Pirrelli, I. P. and Dressler, W. U.}, } @INCOLLECTION{PIRRELLI_2020_INCOLLECTION_PMFCBM_408278, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Baayen Harald, R. and Milin, P.}, TITLE = {Psycho-computational modelling of the mental lexicon}, YEAR = {2020}, ABSTRACT = {Over the last decades, a growing body of evidence on the mechanismsgoverning lexical storage, access, acquisition and processing has questionedtraditional models of language architecture and word usage based on the hypothesisof a direct correspondence between modular components of grammarcompetence (lexicon vs. rules), processing correlates (memory vs. computation)and neuro-anatomical localizations (prefrontal vs. temporo-parietal perisylvianareas of the left hemisphere). In the present chapter, we explore the empiricaland theoretical consequences of a distributed, integrative model of the mentallexicon, whereby words are seen as emergent properties of the functional interactionbetween basic, language-independent processing principles and the language-specific nature and organization of the input. From this perspective, language learning appears to be inextricably related to the way language isprocessed and internalized by the speakers, and key to an interdisciplinary understandingof such a way, in line with Tomaso Poggio's suggestion that the developmentof a cognitive skill is causally and ontogenetically prior to itsexecution (and sits "on top of it"). In particular, we discuss conditions, potentialand prospects of the epistemological continuity between psycholinguistic andcomputational modelling of word learning, and illustrate the yet largely untappedpotential of their integration. We use David Marr's hierarchy to clarify the complementarityof the two viewpoints. Psycholinguistic models are informative abouthow speakers learn to use language (interfacing Marr's levels 1 and 2). When wemove from the psycholinguistic analysis of the functional operations involved inlanguage learning to an algorithmic description of how they are computed, computersimulations can help us explore the relation between speakers' behavior andgeneral learning principles in more detail. In the end, psycho-computational models can be instrumental to bridge Marr's levels 2 and 3, bringing us closer tounderstanding the nature of word knowledge in the brain}, KEYWORDS = {mental lexicon, word storage and processing, psycholinguistics, computational linguistics, connectionist models, discriminative learning}, PAGES = {23-82}, URL = {https://www.degruyter.com/view/book/9783110440577/10.1515/9783110440577-002.xml}, DOI = {10.1515/9783110440577-002}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, CONFERENCE_PLACE = {Berlin/Munich}, BOOKTITLE = {Word Knowledge and Word Usage}, EDITOR = {Vito Pirrelli, I. P. and Dressler, W. U.}, } @INCOLLECTION{PIRRELLI_2020_INCOLLECTION_PPD_405086, AUTHOR = {Pirrelli, V. and Plag, I. and Dressler, U. W.}, TITLE = {Word knowledge in a cross-disciplinary world}, YEAR = {2020}, ABSTRACT = {This editorial project stemmed from a 4-year period of intense interdisciplinary research networking funded by the European Science Foundation within the framework of the NetWordS project (09-RNP-089). The project mission was to bring together experts of various research fields (from brain sciences and computing to cognition and linguistics) and of different theoretical inclinations, to advance the current awareness of theoretical, typological, psycholinguistic, computational and neurophysiological evidence on the structure and processing of words, with a view to promoting novel methods of research and assessment for grammar architecture and language usage. The unprecedented cross-disciplinary fertilization prompted by a wide range of scientific and educational initiatives (three international workshops, two summer schools, one main conference and over a hundred grants supporting short visits and multilateral exchanges) persuaded us to pursue this effort beyond the project lifespan, spawning the idea of an interdisciplinary handbook, where a wide range of central topics on word knowledge and usage are dealt with by teams of authors with common interests and different backgrounds. Unsurprisingly (with the benefit of the hindsight), the project turned out to be more challenging and time-consuming than initially planned. Cross-boundary talking and mutual understanding are neither short-term, nor immediately rewarding efforts, but part of a long-sighted, strategic vision, where stamina, motivation and planning ahead play a prominent role. We believe that this book, published as an open access volume, significantly sharpens the current understanding of issues of word knowledge and usage, and has a real potential for promoting novel research paradigms, and bringing up a new generation of language scholars}, KEYWORDS = {interdisciplinarity, word knowledge, word usage, language units, statistical and computer modeling, levels of understanding, between-level mapping, linking hypotheses, scale effects}, PAGES = {1-20}, URL = {https://doi.org/10.1515/9783110440577}, DOI = {10.1515/9783110440577}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, CONFERENCE_PLACE = {Berlin/Munich}, BOOKTITLE = {Word Knowledge and Word Usage. A Cross-Disciplinary Guide to the Mental Lexicon}, } @EDITORIAL{PIRRELLI_2020_EDITORIAL_PPD_408986, AUTHOR = {Pirrelli, V. and Plag, I. and Dressler Wolfgang, U.}, TITLE = {Word knowledge and word usage: a cross-disciplinary guide to the mental lexicon}, YEAR = {2020}, ABSTRACT = {This editorial project stemmed from a 4-year period of intense interdisciplinary research networking funded by the European Science Foundation within the framework of the NetWordS project (09-RNP-089)}, KEYWORDS = {interdisciplinarity, word knowledge, word usage, language units, statistical and computer modeling, levels of understanding, between-level mapping, linking hypotheses, scale effects}, PAGES = {1-717}, URL = {https://doi.org/10.1515/9783110440577}, VOLUME = {337}, DOI = {10.1515/9783110440577}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {978-3-11-051748-4}, CONFERENCE_PLACE = {Berlin/Munich}, } @INPROCEEDINGS{MARZI_2020_INPROCEEDINGS_MRNTP_382398, AUTHOR = {Marzi, C. and Rodella, A. and Nadalini, A. and Taxitari, L. and Pirrelli, V.}, TITLE = {Does finger-tracking point to child reading strategies?}, YEAR = {2020}, ABSTRACT = {The movement of a child's index finger that points to a printed text while (s)he is reading may provide a proxy for thechild's eye movements and attention focus. We validated this correlation by showing a quantitative analysis of patterns of "finger-tracking" of Italian early graders engaged in reading a text displayed on a tablet. A web application interfaced with the tablet monitors the reading behaviour by modelling the way the child points to the text while reading. Theanalysis found significant developmental trends in reading strategies, marking an interesting contrast between typically developing and atypically developing readers}, KEYWORDS = {reading assessment, reading strategies, mobile technology, special educiation needs}, PAGES = {1-7}, URL = {http://ceur-ws.org/Vol-2769/paper_60.pdf}, VOLUME = {2769}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, CONFERENCE_NAME = {Italian Conference on Computational Linguistics 2020}, CONFERENCE_PLACE = {Aachen}, BOOKTITLE = {Proceedings of the Seventh Italian Conference on Computational Linguistics}, } @INPROCEEDINGS{TAXITARI_2020_INPROCEEDINGS_TCFMNP_501841, AUTHOR = {Taxitari, L. and Cappa, C. and Ferro, M. and Marzi, C. and Nadalini, A. and Pirrelli, V.}, TITLE = {Using mobile technology for reading assessment}, YEAR = {2020}, ABSTRACT = {The enormous potential of Information and Communication Technologies (ICT) for addressing critical educational issues is generally acknowledged, but its use in the assessment of the complex skills of reading and understanding a text has been very limited to date. The paper contrasts traditional reading assessment protocols with ReadLet, an ICT platform with a tablet front-end, designed to support online monitoring of silent and oral reading abilities in early graders. ReadLet makes use of cloud computing and mobile technology for large-scale data collection and allows the time alignment of the child’s reading behaviour with texts tagged using Natural Language Processing (NLP) tools. Initial findings replicate established benchmarks from the psycholinguistic literature on reading in both typically and atypically developing children, making the application a new ground-breaking approach in the evaluation of reading skills}, KEYWORDS = {reading assessment, reading research, mobile technology, NLP, cloud computing, special education needs}, PAGES = {302-307}, URL = {https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9357173}, VOLUME = {2020-JUNE}, DOI = {10.1109/CiSt49399.2021.9357173}, ISBN = {978-1-7281-6646-9}, CONFERENCE_NAME = {6th IEEE Congress on Information Science and Technology (CiSt)}, BOOKTITLE = {Proceedings of the 6th IEEE Congress on Information Science and Technology (CiSt)}, } @INPROCEEDINGS{PIRRELLI_2020_INPROCEEDINGS_PCCDFGMNT_427657, AUTHOR = {Pirrelli, V. and Cappa, C. and Crepaldi, D. and Del Pinto, V. and Ferro, M. and Giulivi, S. and Marzi, C. and Nadalini, A. and Taxitari, L.}, TITLE = {Tracking the pace of reading with finger movements}, YEAR = {2020}, ABSTRACT = {Recent experimental evidence in visual perception analysis shows that eye and finger movements strongly correlate during scene exploration, at both individual and group levels. A familiar context which exploits this synergistic behaviour is when children learn to read, with the practice of finger-pointing to text as a support for their attention focus, directional movement and voice-print match. Using a tablet to display short texts, we collected evidence on the finger-pointing behaviour of 3rd-6th Italian graders engaged in both silent and oral reading. "Finger-tracking" data, sampled by the tablet and aligned with the text, made it possible to time a child's reading paceat word and sentence level. Results are shown to replicate established benchmarks in the reading literature, such as the difference in reading pace between age-matched typical and atypical readers as a function of word frequency and length, and neighbourhood entropy and Old20. Atypical readers show increasing difficulty with longer words, with a steeper time increment for word length \> 6, integrating previous evidence. In addition, neighbourhood density plays a sparse facilitative role in atypical reading, with no significant interaction with neighbourhood entropy, pointing to a non trivial developmental interplay between sublexical reading and the richness of the Italian orthographic-phonological lexicon. Despite their different dynamics, optical and tactile strategies for text exploration prove to be highly congruent: this suggests that finger-tracking can be used as an ecological proxy for eye-tracking in reading assessment}, KEYWORDS = {Reading, Finger tracking, Mental Lexicon, Word frequency, Word Length, Neighbourhood entropy}, PAGES = {1}, URL = {https://osf.io/hr62g/}, CONFERENCE_NAME = {Words in the World International Conference}, BOOKTITLE = {Words in the World book of abstracts}, } @ARTICLE{MARZI_2019_ARTICLE_MFP_392957, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {A processing-oriented investigation of inflectional complexity}, YEAR = {2019}, ABSTRACT = {Due to the typological diversity of their inflectional processes, some languages are intuitively more difficult than other languages. Yet, finding a single measure to quantitatively assess the comparative complexity of an inflectional system proves an exceedingly difficult endeavor. In this paper we propose to investigate the issue from a processing-oriented standpoint, using data processed by a type of recurrent neural network to quantitatively model the dynamic of word processing and learning in different input conditions. We evaluate the relative complexity of a set of typologically different inflectional systems (Greek, Italian, Spanish, German, English and Standard Modern Arabic) by training a Temporal Self-Organizing Map (TSOM), a recurrent variant of Kohonen's Self-Organizing Maps, on a fixed set of verb forms from top-frequency verb paradigms, with no information about the morphosemantic and morphosyntactic content conveyed by the forms. After training, the behavior of each language-specific TSOM is assessed on different tasks, looking at self-organizing patterns of temporal connectivity and functional responses. Our simulations show that word processing is facilitated by maximally contrastive inflectional systems, where verb forms exhibit the earliest possible point of lexical discrimination. Conversely, word learning is favored by a maximally generalizable system, where forms are inferred from the smallest possible number of their paradigm companions. Based on evidence from the literature and our own data, we conjecture that the resulting balance is the outcome of the interaction between form frequency and morphological regularity. Big families of stem-sharing, regularly inflected forms are the productive core of an inflectional system. Such a core is easier to learn but slower to discriminate. In contrast, less predictable verb forms, based on alternating and possibly suppletive stems, are easier to process but are learned by rote. Inflection systems thus strike a balance between these conflicting processing and communicative requirements, while staying within tight learnability bounds, in line with Ackermann and Malouf's Low Conditional Entropy Conjecture. Our quantitative investigation supports a discriminative view of morphological inflection as a collective, emergent system, whose global self-organization rests on a surprisingly small handful of language-independent principles of word coactivation and competition}, KEYWORDS = {Morphological complexity, Discriminative learning, Recurrent neural networks (RNNs), self-organization, emergence, processing uncertainty, stem-family size}, PAGES = {1-23}, URL = {https://www.frontiersin.org/articles/10.3389/fcomm.2019.00048/full}, VOLUME = {4 (48)}, DOI = {10.3389/fcomm.2019.00048}, ISSN = {2297-900X}, JOURNAL = {FRONTIERS IN COMMUNICATION}, } @ARTICLE{CARDILLO_2018_ARTICLE_CFMP_355603, AUTHOR = {Cardillo, F. and Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Deep Learning of Inflection and the Cell-Filling Problem}, YEAR = {2018}, ABSTRACT = {Machine learning offers two basic strategies for morphology induction: lexical segmentation and surface word relation. The first approach assumes that words can be segmented into morphemes. Inferring a novel inflected form requires identification of morphemic constituents and a strategy for their recombination. The second approach dispenses with segmentation: lexical representations form part of a network of associatively related inflected forms. Production of a novel form consists in filling in one empty node in the network. Here, we present the results of a task of word inflection by a recurrent LSTM network that learns to fill in paradigm cells of incomplete verb paradigms. Although the task does not require morpheme segmentation, we show that accuracy in carrying out the inflection task is a function of the model's sensitivity to paradigm distribution and morphological structure}, KEYWORDS = {Deep Learning, LSTM, Cell-Filling Problem}, PAGES = {57-75}, URL = {https://publications.cnr.it/doc/396348}, VOLUME = {4 (1)}, ISSN = {2499-4553}, JOURNAL = {IJCOL}, } @ARTICLE{FERRO_2018_ARTICLE_FMP_356242, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Discriminative word learning is sensitive to inflectional entropy}, YEAR = {2018}, ABSTRACT = {Psycholinguistic evidence based on inflectional and derivationalword families has emphasised the combined role of Paradigm Entropy andInflectional Entropy in human word processing. Although the way frequencydistributions affect behavioural evidence is clear in broad outline, we stillmiss a clear algorithmic model of how such a complex interaction takes placeand why. The main challenge is to understand how the local interaction oflearning and processing principles in morphology can result in global effectsthat require knowledge of the overall distribution of stems and affixes in wordfamilies. We show that principles of discriminative learning can shed light onthis issue. We simulate learning of verb inflection with a discriminativerecurrent network of specialised processing units, whose level of temporalconnectivity reflects the frequency distribution of input symbols in context. We analyse the temporal dynamic with which connection weights areadjusted during discriminative learning, to show that self-organisedconnections are optimally functional to word processing when thedistribution of inflected forms in a paradigm (Paradigm Entropy) and thedistribution of their inflectional affixes across paradigms (InflectionalEntropy) diverge minimally}, KEYWORDS = {discriminative learning, word processing, recurrent neural networks, relative entropy}, PAGES = {307-327}, URL = {https://www.rivisteweb.it/doi/10.1418/91871}, VOLUME = {XVII (2)}, DOI = {10.1418/91871}, ISSN = {1720-9331}, JOURNAL = {LINGUE E LINGUAGGIO}, } @INPROCEEDINGS{FERRO_2018_INPROCEEDINGS_FCGMNCP_350556, AUTHOR = {Ferro, M. and Cappa, C. and Giulivi, S. and Marzi, C. and Nahli, O. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: Reading for Understanding}, YEAR = {2018}, ABSTRACT = {This paper focuses on motivation, objectives, design issues and preliminary results of ReadLet, an ICT platform for assessing reading efficiency in primary school children. Test data are discussed on a sample of 200 early graders, reading French, Italian and Standard Modern Arabic (SMA)}, KEYWORDS = {Reading, text comprehension, Specific Learning Disorders, multimodal signal processing, cloud computing, portable assistive technology}, PAGES = {404-409}, URL = {https://publications.cnr.it/doc/390504}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {IEEE-CIST2018 LED-ICT}, CONFERENCE_PLACE = {New York}, BOOKTITLE = {Proceedings of the IEEE Congress on Information Science and Technology (CiSt)}, } @INPROCEEDINGS{MARZI_2018_INPROCEEDINGS_MFNBBP_349950, AUTHOR = {Marzi, C. and Ferro, M. and Nahli, O. and Belik, P. and Bompolas, S. and Pirrelli, V.}, TITLE = {Evaluating Inflectional Complexity Crosslinguistically: a Processing Perspective}, YEAR = {2018}, ABSTRACT = {The paper provides a cognitively motivated method for evaluating the inflectional complexity of a language, based on a sample of"raw" inflected word forms processed and learned by a recurrent self-organising neural network with fixed parameter setting. Trainingitems contain no information about either morphological content or structure. This makes the proposed method independent of bothmeta-linguistic issues (e. g. format and expressive power of descriptive rules, manual or automated segmentation of input forms, numberof inflectional classes etc.) and language-specific typological aspects (e. g. word-based, stem-based or template-based morphology). Results are illustrated by contrasting Arabic, English, German, Greek, Italian and Spanish}, KEYWORDS = {paradigm-based morphology, inflectional complexity, prediction-based processing, recurrent self-organising networks, Statistical And Machine Learning Methods, Language Modelling}, PAGES = {3860-3866}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/summaries/745.html}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{CAPPA_2018_INPROCEEDINGS_CFGMNCP_351301, AUTHOR = {Cappa, C. and Ferro, M. and Giulivi, S. and Marzi, C. and Nahli, O. and Cardillo, F. and Pirrelli, V.}, TITLE = {ReadLet: piattaforma ICT per valutare l'efficienza di lettura}, YEAR = {2018}, ABSTRACT = {ReadLet è una piattaforma ICT pensata per valutare accuratamente l'efficienza di lettura nei bambini della scuola primaria. Combina tecnologia ICT portatile e cloud-computing con una serie di moduli software, specifici per modalità di somministrazione. Questi, implementati come servizi web, includono: i) valutazione dell'elaborazione del testo e della leggibilità; ii) valutazione della velocità di lettura (ad alta voce e silente) e delle sue fluttuazioni); iii) valutazione della correttezza della decodifica ad alta voce; iv) valutazione della comprensione del testo (in lettura silente e da ascolto). Un prototipo della tecnologia ReadLet è stato sperimentato su circa 200 alunni (8-11 anni), che variano per stato socio-economico, lingua (italiana, francese, araba) e area geografica (Italia, Svizzera, Marocco). L'utilizzo del tablet per la lettura è stato percepito dai bambini come un'esperienza coinvolgente e piacevole. Gli insegnanti hanno trovato lo strumento facile da utilizzare e in grado di fornire maggiori informazioni rispetto agli strumenti tradizionali}, KEYWORDS = {leggere per capire, disturbi del linguaggio, screening}, URL = {https://www.airipa.it/congresso/pluginfile.php/2781/mod_resource/content/1/ProgrammaCongressoAIRIPA_Arezzo_dettagliato-3.pdf}, CONFERENCE_NAME = {XXVII Congresso Nazionale AIRIPA}, BOOKTITLE = {I disturbi dell'Apprendimento-Abstract book XXVII Congresso Nazionale AIRIPA}, } @INPROCEEDINGS{FERRO_2018_INPROCEEDINGS_FCGMCP_351299, AUTHOR = {Ferro, M. and Cappa, C. and Giulivi, S. and Marzi, C. and Cardillo, F. and Pirrelli, V.}, TITLE = {ReadLet: an ICT platform for the assessment of reading efficiency in early graders}, YEAR = {2018}, ABSTRACT = {Reading is not just word decoding, but the joint product of decoding and deep linguisticcomprehension [ 1, 2 ]. Effective linguistic comprehension relies on language skills such assemantic and syntactic awareness. Both decoding and linguistic comprehension are necessary forreading comprehension, and neither is by itself sufficient [ 2 ]. However, current protocols forreading assessment measure decoding (reading accuracy and speed) and reading comprehensionseparately [ 3, 4, 5 ]. This does not allow evaluation of reading efficiency [ 6 ], defined as the abilityto fully understand connected texts by minimising reading time, a cognitive ability that lies at theroots of students' academic achievement [ 8, 7 ]. ReadLet is an ICT platform specifically designedto provide accurate, evidence-based assessment of reading efficiency in early grade children, byoffering an ecological, non-invasive protocol for extensive data elicitation, storage and analysis. With ReadLet, early graders at school can read a one or two page text displayed on atablet touchscreen, either silently or aloud. Children are asked to slide their finger across thewords as they read, to guide directional tracking. After reading, the child is prompted with a fewmultiple-answer questions on text content presented one at a time, while the text remainsdisplayed on the screen for the child to be able to retrieve relevant information. In the process, the tablet keeps track of time-aligned multimodal data: voice recording, finger sliding time, timeof reading, time of question answering, and number of correct answers. Data are recorded, storedlocally, sent to the ReadLet server through an internet connection, and processed remotely by abattery of cloud-based services, analysing data automatically to produce a detailed quantitativesignature of each reading session. A server-based database aggregates anonymised data to makethem available for specialists. Also individual's longitudinal profiles are stored, for them bequeried and inspected upon authorised access. The platform combines portable ICT technology and cloud computing with a number ofmodality-specific software modules, implemented as web services including: i) a text processingand readability assessment service, consisting in a battery of tools for automated linguisticannotation of written texts and a machine-learning component assigning a readability score toannotated texts [ 9 ]; ii) a finger touch processing service aligning the child's finger sliding withthe written text and measuring speed fluctuations; iii) a speech processing and decodingassessment service, aligning the acoustic record of child's reading with the written text andassessing correctness of recoding [ 10 ]. At the time of writing, the platform includes the first twomodules only. Preliminary testing of a prototype version of ReadLet technology with apopulation of about 200 pupils aged 8 to 11, both male and female, varying for socio-economicstatus, language (Italian, French and Arabic) and geographical area (Italy and Morocco), showedthat children are extremely responsive to using a tablet for reading, and very easy to engage inwhat they perceive as an enjoyable experience. We expect online databases of automaticallyclassified cross-sectional and longitudinal data, accurate statistical modelling and developmentaltrends of reading literacy to help education professionals and clinical specialists assess the levelof reading skills reached by the child, and decide which intervention programmes and measuresare most appropriate. While information technology cannot and should not supplant the role andprofessional judgement of teachers and therapists, the project intends to provide portable tools, models and data for timely screening and daily management of reading difficulties and disorders}, KEYWORDS = {reading efficiency, decoding, comprehension, language specific disorders}, PAGES = {61-61}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, BOOKTITLE = {Book of Abstract of the 11th International Conference on the Mental Lexicon}, } @INPROCEEDINGS{MARZI_2018_INPROCEEDINGS_MFP_355611, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Is inflectional irregularity dysfunctional to human processing?}, YEAR = {2018}, ABSTRACT = {Regularly inflected verb forms are classically associated with the formal transparency andpredictability of their internal constituents [ 1, 2, 3 ]. Transparency ensures that full forms can besegmented uniquely into their internal constituents: as in walk-s/walk-ed. Predictability allowsfor a speaker to fill in an empty paradigm cell, using information from other known forms of thesame lexical paradigm and its inflection macro-class. From this perspective, irregulars appear tobe dysfunctional to the human processing system, as they make it hard to infer-say-boughtfrom buy, or segment bought appropriately into its constituent parts. Likewise, an influentialpsycholinguistic tradition relegates irregulars to the lexical store, whereas regulars are segmentedby rules into their simpler constituents [ 4, 5 ]. Here, we offer a few reasons for questioning this view. First, transparency andpredictability are not dichotomous notions. Secondly, their influence on processing is notunidirectional. Unpredictable stems in irregularly inflected forms of complex inflectionalsystems provide a lot of processing information, by dynamically constraining the number ofpossible alternative endings during serial processing. Thirdly, acquisition of word inflection doesnot consist in associating co-occurring cues and outcomes, but in discriminating betweenmultiple cues that are constantly in competition for their predictive value for a given outcome. We present the results of a few computer simulations with Self-organising RecurrentNeural Networks (TSOMs, [ 8, 9 ]) that learn how to inflect high-frequency verb paradigms in 6languages: English, German, Italian, Modern Greek, Modern Standard Arabic and Spanish. Aftertraining, each TSOM was tested on a word recognition (serial recoding) and a word production(serial recall) task, and results were analysed with generalised regression models. Processinguncertainty is differently apportioned on regulars and irregulars, depending on the nature of theprocessing task. While irregulars are harder to produce when they are unknown because theytypically have fewer neighbours than regulars have, they are readily accessed once they areacquired, for exactly the same reason. Our data are in line with psycholinguistic evidence [ 10, 11 ] that lexical processing ispaced by two types of uniqueness point: Marslen-Wilson's Uniqueness Point (UP), distinguishing unrelated onset-overlapping words [ 12 ], and the Complex Uniqueness Point(CUP), distinguishing paradigmatically-related words [ 11 ]. Late UPs are inhibitory and elicitprolonged reaction times in acoustic word recognition, explaining an early delay in wordrecognition of irregular stems. Similarly, late CUPs are inhibitory, and this accounts for aslowdown in the processing advantage of regulars, compared to irregulars, after UP. Thesestructural factors interact in a variety of ways and concurrently affect human processing, to showthat irregularly-inflected forms may in fact reflect communicative and processing constraints ofthe word processor. They provide strong evidence against a processing architecture that assumescompartmentalized, independent processing routes for some specific combinations of thesefactors (e. g. a rule-based route for a combination of transparency and predictability, and amemory-based route for all other combinations). In addition, they seem incompatible withBayesian approaches to auditory word comprehension ignoring a word's internal structure [ 13 ]. We suggest that a different design of the human language processor, based on a computationalarchitecture integrating memory and processing as two different dynamics of the sameunderlying mechanism, can shed light on the complexity of inflection, and vindicate the role ofirregular inflection in the system}, KEYWORDS = {inflectional processing, temporal self organizing maps, letter prediction, morpheme boundary}, PAGES = {60-60}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, BOOKTITLE = {Book of Abstract of the 11th International Conference on the Mental Lexicon}, } @INPROCEEDINGS{PIRRELLI_2018_INPROCEEDINGS_PFMGSM_355608, AUTHOR = {Pirrelli, V. and Ferro, M. and Marzi, C. and Gagné, C. and Spalding, T. and Marelli, M.}, TITLE = {Processing compounds: what frequency (alone) cannot explain}, YEAR = {2018}, ABSTRACT = {Observed elevation in typing latency for the initial letter of the second constituent of an Englishcompound, compared with the typing time of the final letter of the first constituent (Gagné \& Spalding 2016), suggests that both compounds (snowball) and pseudo-compounds (carpet) aredecomposed but also that full form representations are available in the lexical store. To gainfurther insight into the lexical representations underlying typing, we used computationalmodelling. In particular, we used superpositional models of word memory, based onSelf-Organising Recurrent Maps (TSOMs) (Ferro et al. 2016; Marzi et al. 2016), where bothsimple and compound words are processed (and stored) using the same pool of processing (andmemory) resources, to model the elevation in typing time at the constituent boundary and the rateof typing. In addition, we also considered models based in the Compositional DistributionalSemantics framework (CAOSS, Marelli et al. 2017), to simulate independent effects of semantictransparency on compound typing (Gagné \& Spalding 2016). Due to co-activation and competition between compounds and their constituent words inTSOMs, levels of activation of processing nodes per letter positions appear to reflect degrees ofcontext-sensitive predictability: the higher the level, the more expected the letter in that position. In English compounds, activation levels appeared to exhibit a characteristically U-shapedpattern, with min values centred on the constituent boundary. A similar pattern was found forpseudo-compounds, which nonetheless present a less pronounced U-shaped pattern and a higheractivation value at the morpheme boundary than compounds do. The difference is in line with thehigher speed-up rate in typing pseudo-compounds than compounds reported in Gagné andSpalding (2016). TSOMs were trained on letter-based representations, so computer experiments couldsimulate peripheral effects of serial processing of compound structure before lexical access. Toinvestigate post-lexical issues, we also tested computational models of generation of themeanings of novel compounds based on CAOSS, which proved to be able to account forwell-established relational effects in compound processing (Gagné 2001; Gagné \& Shoben 1997)with an unsupervised data-driven framework (Marelli et al. 2017). We ran a mixed-effectsregression analysis of the data in Gagné and Spalding (2016) using vector-semantics estimatesand TSOM activation levels to predict typing time for the initial letter of the second constituent. There was a negative effect of TSOM letter activation levels: i. e. the more active a letter node is, the faster a subject is at typing the letter (t =-2. 7 p =. 007). Also, there was a positive effect ofCAOSS-based compositionality estimates: i. e. the more easily a compound's lexicalizedmeaning can be obtained through compositional operations on single constituent vectors, theslower participants were at typing the first letter of the second constituent (t =2. 4, p =. 017). These results have interesting implications for an integrative computational architectureaccounting for the whole range of experimental evidence reported by Gagné and Spalding(2016). In particular we will focus on evidence of a stronger competition (and longer typingtime) in Transparent-Transparent and Transparent-Opaque compounds, vs. Opaque-Transparentcompounds, which gives an indication of a non-trivial interaction between semanticcompositionality and serial processing effects}, KEYWORDS = {compound processing, Temporal Self-organizing Map, letter production latency, constituent boundary}, PAGES = {60-60}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, BOOKTITLE = {Book of Abstract of the 11th International Conference on the Mental Lexicon}, } @ARTICLE{BOMPOLAS_2017_ARTICLE_BFMCP_336890, AUTHOR = {Bompolas, S. and Ferro, M. and Marzi, C. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {For a performance-oriented notion of regularity in inflection: the case of Modern Greek conjugation}, YEAR = {2017}, ABSTRACT = {Paradigm-based approaches to word processing/learning assume that word forms are not acquired in isolation, but through associative relations linking members of the same word family (e. g. a paradigm, or a set of forms filling the same paradigm cell). Principles of correlative learning offer a set of equations that are key to modelling this complex dynamic at a considerable level of detail. We use these equations to simulate acquisition of Modern Greek conjugation, and we compare the results with evidence from German and Italian. Simulations show that different Greek verb classes are processed and acquired differentially, as a function of their degrees of formal transparency and predictability. We relate these results to psycholinguistic evidence of Modern Greek word processing, and interpret our findings as supporting a view of the mental lexicon as an emergent integrative system}, KEYWORDS = {paradigm-based morphology, gradient (ir)regularity, recurrent self-organisng networks}, PAGES = {77-92}, URL = {http://www.ai-lc.it/IJCoL/v3n1/IJCOL_3_1_5_bompolas_et_al.pdf?v=2a47ad90f2ae}, VOLUME = {3 (1)}, ISSN = {2499-4553}, JOURNAL = {IJCOL}, } @EDITORIAL{PIRRELLI_2017_EDITORIAL_PZ_327066, AUTHOR = {Pirrelli, V. and Zarghili, A.}, TITLE = {Arabic Natural Language Processing: Models, systems and applications}, YEAR = {2017}, KEYWORDS = {Natural Language Processing, Standard Modern Arabic}, PAGES = {A1-A3}, URL = {https://www.sciencedirect.com/science/article/pii/S1319157817301155}, VOLUME = {29}, DOI = {10.1016/j.jksuci.2017.04.004}, ISSN = {2213-1248}, } @INPROCEEDINGS{CARDILLO_2017_INPROCEEDINGS_CFMP_326587, AUTHOR = {Cardillo, F. A. and Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {How "deep" is learning word inflection?}, YEAR = {2017}, ABSTRACT = {Machine learning offers two basic strategies for morphology induction: lexical segmentation and surface word relation. The first one assumes that words can be segmented into morphemes. Inducing a novel inflected form requires identification of morphemic constituents and a strategy for their recombination. The second approach dispenses with segmentation: lexical representations form part of a network of associatively related inflected forms. Production of a novel form consists in filling in one empty node in the network. Here, we present the results of a recurrent LSTM network that learns to fill in paradigm cells of incomplete verb paradigms. Although the process is not based on morpheme segmentation, the model shows sensitivity to stem selection and stem-ending boundaries}, KEYWORDS = {LSTM, Morphology induction, Cognitive modelling}, PAGES = {77-82}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85037368972\&origin=inward}, VOLUME = {2006}, DOI = {10.4000/books.aaccademia.2314}, PUBLISHER = {Accademia University Press (Torino, DEU)}, ISBN = {978-88-99982-76-8}, CONFERENCE_NAME = {Fourth Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Torino}, BOOKTITLE = {Proceedings of the Fourth Italian Conference on Computational Linguistics (CLiC-it 2017)}, EDITOR = {Basili, R. and Nissim, M. and Satta, G.}, } @INPROCEEDINGS{PIRRELLI_2017_INPROCEEDINGS_P_358748, AUTHOR = {Pirrelli, V.}, TITLE = {Co-activation and competition effects in lexical storage and processing}, YEAR = {2017}, ABSTRACT = {According to traditional wisdom in Linguistics, morphologically simple words reside in the mental lexicon, a kind of brain dictionary that contains unpredictable mappings between lexical features. Here I illustrate some of the defining features of an alternative view of the language architecture, where computation and storage are just the short-term and long-term dynamics of the same underlying process. Empirical results of a computational model of this view are reported and general implications for a theory of the lexicon are discussed}, KEYWORDS = {Mental Lexicon, Morphology, Human Language Processing, artificial neural networks, lexical self-organization}, PAGES = {1-21}, URL = {https://picgl4.files.wordpress.com/2015/11/4-paper_1_pirrelli.pdf}, CONFERENCE_NAME = {4th Patras International Conference of Graduate Students in Linguistics}, } @INPROCEEDINGS{BOMPOLAS_2017_INPROCEEDINGS_BMFCPR_327030, AUTHOR = {Bompolas, S. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V. and Ralli, A.}, TITLE = {Transparency and predictability in Modern Greek conjugation: Implications for models of word processing}, YEAR = {2017}, ABSTRACT = {We argue that the Greek evidence calls for a substantial revision of the clear-cut interaction between transparency/predictability and regularity, to make room for a more process-oriented notion of regularity. According to this view, regularity is no longer an epiphenomenon of the design of the human language faculty and the purported dualism between rule-based and memory-based routes, but the graded result of the varying interaction of several structural factors concurrently affecting the human word processor}, KEYWORDS = {Inflectional regularity, Word Processing, Modern Greek Conjugation}, PAGES = {17-19}, URL = {http://www.lilec.it/mmm/wp/wp-content/uploads/2017/02/Book-of-abstracts_MMM11_Final.pdf}, CONFERENCE_NAME = {MMM 11: 11th Mediterranean Morphology Meeting}, BOOKTITLE = {Proceedings of the 11th Mediterranean Morphology Meeting}, } @INPROCEEDINGS{PIRRELLI_2017_INPROCEEDINGS_PMFC_327022, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M. and Cardillo, F. A.}, TITLE = {Paradigm Relative Entropy and Discriminative Learning}, YEAR = {2017}, ABSTRACT = {In the present contribution, we show that principles of discriminative learning of symbolic time series go a long way in accounting for these effects, thus making an important contribution to our understanding of the human lexical processor and its sensitivity to word distributions both within and across paradigms}, KEYWORDS = {Paradigm Entropy, Discriminative Learning, Mental Lexicon, Verb Inflection}, PAGES = {5}, URL = {http://w3.erss.univ-tlse2.fr/ParadigMo2017/program.html}, CONFERENCE_NAME = {ParadigMo 2017: First Workshop on Paradigmatic Word Formation Modeling}, BOOKTITLE = {Book of Abstract of the ParadigMo Workshop}, } @ARTICLE{MARZI_2016_ARTICLE_MFCP_319237, AUTHOR = {Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {Effects of frequency and regularity in an integrative model of word storage and processing}, YEAR = {2016}, ABSTRACT = {In spite of converging evidence in this direction, little efforts have been put so far into providing detailed, algorithmic models of the interaction between lexical token frequency, paradigm frequency, and paradigm regularity in word processing and acquisition. Here we propose a neuro-computational account of the frequency/regularity interaction, and discuss some of its theoretical implications by analysing experimental results in the computational framework of Temporal Self-Organising Maps. Detailed quantitative analysis shows that the model provides a unitary explanatory framework bringing together insights from neighbour family effects on word recognition and production, evidence from family size effects in serial lexical access and paradigm-based dynamics in lexical acquisition. Considerable evidence has accrued on the role of paradigms as both theoretical and cognitive structures regimenting the way words are processed and acquired. The evidence supports a view of the lexicon as an emergent integrative system, where word forms are concurrently and competitively stored as repeatedly successful processing patterns, and on-line processing crucially depends on the internal organisation of stored patterns}, KEYWORDS = {Lexical access, word recall, serial processing, parallel activation, inflectional paradigms, mental lexicon}, PAGES = {79-114}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84986550295\&origin=inward}, VOLUME = {28 (1)}, ISSN = {2499-8117}, JOURNAL = {ITALIAN JOURNAL OF LINGUISTICS}, } @EDITORIAL{MARZI_2016_EDITORIAL_MP_319239, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Word knowledge and word usage: A foreword}, YEAR = {2016}, ABSTRACT = {This special issue, together with its companion issue to appear in Italian Journalof Linguistics, stems from the NetWordS Final Conference "Wordknowledge and word usage: representations and processes in the mental lexicon". The conference, held on the 30th and 31st of March, and the 1st of April2015 in Pisa, concluded the 4-year NetWordS project, the European Networkof Word Structure funded by the European Science Foundation within the ResearchNetworking Programme. In line with the highly multidisciplinary profileof NetWordS agenda, the conference offered a comprehensive and inclusiveforum focussing on two main lines of lexical inquiry: (i) usage-based approaches to bootstrapping word form and structure(morpho-phonological and morpho-syntactic issues), including: acquisition oflexical categories, emergence of morphological structure, lexical memories, anticipatory prediction-based mechanisms of word recognition, word production, frequency-based models of lexical productivity, word encoding, modelsof lexical architecture, family-based effects in word processing, word readingand writing; (ii) usage-based approaches to word meanings (lexical semantics andpragmatics in morphologically simple and complex words), including: distributionalsemantics, compound interpretation, concept composition and coercion, conceptualization of perception and action, time and space in the lexicon, metonymy and metaphor, lexico-semantic relations, perceptual groundingand embodied cognition, context-based and encyclopedic knowledge, semanticassociation and categorization. The multidisciplinary focus on word knowledge and word usage promotedby the Conference led participants to openly discuss an impressive range ofapproaches and empirical data: priming and lexical decision in a number ofcontexts, distributional semantics and models of semantic composition, neuralnetworks, machine learning and mathematical modelling of empirical evidence, as well as their neuro-biological and neuro-functional correlates. It is widely acknowledged that looking at the same problem from differentangles has an additive effect on the impact of current language research. Certainly more can be achieved, however, if, rather than simply adding more perspectiveson the same subject, with individual research efforts staying withinthe boundaries of single knowledge domains, scholars manage to integratethem into a boundary-shifting methodological perspective. When psycholinguisticevidence from humans is successfully replicated algorithmicallythrough a computational model implementing a few well-understood principlesof time-series processing, we are in a position to empirically assess whatinput conditions favour memorisation and acquisition of symbolic strings bythe model, and test these algorithmic predictions back on human subjects, thusgoing full circle. This may have a multiplicative effect on current research, providing not only mathematical modelling of present behavioural evidence, but amounting to fully explanatory mechanisms. Our current understanding ofWHERE and WHEN some cognitive processes are implemented in the brain willbe complemented by knowledge of WHAT information they rely on and HOWthey integrate it. Other compelling examples of the full potential of cross-disciplinary integrationcan be found in the present volume and in the twin issue of ItalianJournal of Linguistics. As a general point, we contend that only by puttingsingle-domain acquisitions into the wider context of human communication, and developing an interdisciplinary framework whereby each specialist willtake advantage of insights from other disciplines, we can make substantialprogress in our understanding of the lexical roots of human verbal communicationin real contexts. The edited selection of papers presented here providesa representative sample of the range of approaches debated at the NetWordSPisa Conference, by way of illustration of how aspects of knowledge integrationand methodological innovation can be put at the service of a better understandingof broad lexical issues}, KEYWORDS = {word knowledge, word usage, interdisciplinary approach, mental lexicon, NetWordS}, PAGES = {3-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84978285090\&origin=inward}, DOI = {10.1418/83651}, PUBLISHER = {Il Mulino (Bologna, ITA)}, ISSN = {1720-9331}, ISBN = {978-88-15-26226-4}, CONFERENCE_PLACE = {Bologna}, } @EDITORIAL{MARZI_2016_EDITORIAL_MP_319238, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Word knowledge and word usage: A Foreword}, YEAR = {2016}, ABSTRACT = {This special issue, together with its companion issue to appear in Lingue e Linguaggio, stems from the NetWordS Final Conference Word knowledge and word usage: representations and processes in the mental lexicon. * The conference, held on the 30th and 31st of March, and the 1st of April 2015 in Pisa, concluded the 4-year NetWordS project, the European Network of Word Structure funded by the European Science Foundation within the Research Networking Programme. In line with the highly multidisciplinary profile of NetWordS agenda, the conference offered a comprehensive and inclusive forum focussing on two main lines of lexical inquiry: (i) usage-based approaches to bootstrapping word form and structure (morpho-phonological and morpho-syntactic issues), including: acquisition of lexical categories, emergence of morphological structure, lexical memories, anticipatory prediction-based mechanisms of word recognition, word production, frequency-based models of lexical productivity, word encoding, models of lexical architecture, family-based effects in word processing, word reading and writing; (ii) usage-based approaches to word meanings (lexical semantics and pragmatics in morphologically simple and complex words), including: distributional semantics, compound interpretation, concept composition and coercion, conceptualization of perception and action, time and space in the lexicon, metonymy and metaphor, lexico-semantic relations, perceptual grounding and embodied cognition, context-based and encyclopedic knowledge, semantic association and categorization. The multidisciplinary focus on word knowledge and word usage promoted by the Conference led participants to openly discuss an impressive range of approaches and empirical data: priming and lexical decision in a number of contexts, distributional semantics and models of semantic composition, neural networks, machine learning and mathematical modelling of empirical evidence, as well as their neuro-biological and neuro-functional correlates}, KEYWORDS = {word knowledge, word usage, mental lexicon, interdisciplinary approach, NetWordS}, PAGES = {3-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84986558643\&origin=inward}, PUBLISHER = {Pacini (Pisa, ITA)}, ISSN = {2499-8117}, CONFERENCE_PLACE = {Pisa}, } @INPROCEEDINGS{BOMPOLAS_2016_INPROCEEDINGS_BMFCP_318149, AUTHOR = {Bompolas, S. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {Reassessing inflectional regularity in Modern Greek conjugation}, YEAR = {2016}, ABSTRACT = {Paradigm-based approaches to word processing/learning assume that word forms are not acquired in isolation, but through associative relations linking members of the same word family (e. g. a paradigm, or a set of forms filling the same paradigm cell). Principles of correlative learning offer a set of dynamic equations that are key to modelling this complex dynamic at a considerable level of detail. We use these dynamic equations to simulate acquisition of Modern Greek conjugation, and we compare the results with evidence from German and Italian. Simulations show that different Greek verb classes are processed and acquired differentially, depending on their degrees of formal transparency and predictability. We relate these results to psycholinguistic evidence on Modern Greek word processing, and interpret our findings as supporting a view of the mental lexicon as an emergent integrative system}, KEYWORDS = {word processing, paradigm-based learning, morphological processing, Greek stem allomoprhy, Temporal Self-Organising Map}, PAGES = {72-77}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009242702\&origin=inward}, VOLUME = {1749}, DOI = {10.4000/books.aaccademia.1721}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {978-88-99982-08-9}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}, CONFERENCE_PLACE = {Torino}, BOOKTITLE = {CLiC-it \& EVALITA 2016-Proceedings of Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}, EDITOR = {Basile, P. and Corazza, A. and Monetmagni, S. and Nissim, M. and Patti, V. and Semeraro, G. and Sprugnoli, R.}, } @INPROCEEDINGS{FERRO_2016_INPROCEEDINGS_FCPGS_325153, AUTHOR = {Ferro, M. and Cardillo, F. A. and Pirrelli, V. and Gagné, C. L. and Spalding, T. L.}, TITLE = {Written word production and lexical self-organisation: evidence from English (pseudo)compounds}, YEAR = {2016}, ABSTRACT = {Elevation in typing latency for the initial letter of the second constituent of an English compound, relative to the latency for the final letter of the first constituent of the same compound, provides evidence that implementation of a motor plan for written compound production involves smaller constituents, in both semantically transparent and semantically opaque compounds. We investigate here the implications of this evidence for algorithmic models of lexical organisation, to show that effects of differential perception of the internal structure of compounds and pseudo-compounds can also be simulated as peripheral stages of lexical access by a self-organising connectionist architecture, even in the absence of morphosemantic information. This complementary evidence supports a maximizationof-opportunity approach to lexical modelling, accounting for the integration of effects of pre-lexical and lexical access}, KEYWORDS = {compound, pseudo-compound, written word production, lexical self-organisation, temporal self organising map}, PAGES = {146-151}, URL = {http://ceur-ws.org/Vol-1749/}, VOLUME = {1749}, DOI = {10.4000/books.aaccademia.1775}, PUBLISHER = {Accademia University Press (Aachen, DEU)}, ISBN = {9788899982546}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it 2016)}, CONFERENCE_PLACE = {Torino}, BOOKTITLE = {Proceedings CLiC-it 2016}, EDITOR = {Basile, P. and Corazza, A. and Cutugno, F. and Montemagni, S. and Nissim, M. and Patti, V. and Semeraro, G. and Sprugnoli, R.}, } @ARTICLE{MARZI_2015_ARTICLE_MP_342523, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {A Neuro-Computational Approach to Understanding the Mental Lexicon}, YEAR = {2015}, ABSTRACT = {Human lexical knowledge does not appear to be organised to minimise storage, but rather to maximise processing efficiency. The way lexical information is stored reflects the way it is dynamically processed, accessed and retrieved. A detailed analysis of the way words are memorised, of the dynamic interaction between lexical representations and distribution and degrees of regularity in input data, can shed some light on the emergence of structures and relations within fully-stored words. We believe that a bottom-up investigation of low-level memory and processing functions can help understand the cognitive mechanisms that govern word processing in the mental lexicon. Neuro-computational models can play an important role in this inquiry, as they help understand the dynamic nature of lexical representations by establishing an explanatory connection between lexical structures and processing models dictated by the micro-functions of human brain. Starting from some linguistic, psycholinguistic and neuro-physiological evidence supporting a dynamic view of the mental lexicon as an integrative system, we illustrate Temporal Self Organising-Maps (TSOMs), artificial neural networks that can model such a view by memorising time series of symbolic units (words) as routinized patterns of short-term node activation. On the basis of a simple pool of principles of adaptive Hebbian synchronisation, TSOMs can perceive possible surface relations between word forms and store them by partially overlapping activation patterns, reflecting gradient levels of lexical specificity, from holistic to decompositional lexical representations. We believe that TSOMs offer an algorithmic model of the emergence of high-level, global and language-specific morphological structure through the working of low-level, language-aspecific processing functions, thus promising to bridge the persisting gap between high-level principles of grammar architecture (lexicon vs. rules), computational correlates (storage vs. processing) and low-level principles and localisations of brain functions. Extensions of the current TSOM architecture are envisaged and their theoretical implications are discussed}, KEYWORDS = {Mental lexicon, dynamic storage, parallel distributed processing, hebbian learning, temporal self-organising maps.}, PAGES = {493-535}, URL = {http://jcs.snu.ac.kr/jcs/issue/vol16/no4/05+Marzi+and+Pirrelli.pdf}, VOLUME = {16 (4)}, ISSN = {1976-6939}, JOURNAL = {JOURNAL OF COGNITIVE SCIENCE}, } @INCOLLECTION{PIRRELLI_2015_INCOLLECTION_PFM_290722, AUTHOR = {Pirrelli, V. and Ferro, M. and Marzi, C.}, TITLE = {Computational complexity of abstractive morphology}, YEAR = {2015}, ABSTRACT = {Abstractive and constructive approaches to word structure make radically different assumptions concerning nature and role of the building blocks that make up a speaker's morphological competence. In this contribution, we show that the two views are also computationally different. In particular, we contend that a number of problems arising in connection with a subsymbolic implementation of the constructive view (as epitomised by classical multi-layered perceptrons) are tackled effectively, or disappear altogether, in a neurally-inspired implementation of associative networks, resting on key-notions such as self-organization and emergence. A particular variant of Kohonen's Self-Organizing Map is introduced as a model to explore and assess the implications of an abstractive approach in terms of its computational complexity. Details of the model (Temporal Self-Organizing Map, TSOM) and experimental data are shown to illustrate the interplay between processing and storage in language acquisition}, KEYWORDS = {Word processing, computational complexity, mental lexicon, dynamic memories, self-organisation, word structure, morphology}, PAGES = {141-166}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84938781714\&origin=inward}, DOI = {10.1093/acprof:oso/9780198723769.003.0008}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {978-0-19-872376-9}, CONFERENCE_PLACE = {Oxford}, BOOKTITLE = {Understanding and Measuring Mprphological Complexity}, EDITOR = {Baerman, M. and Brown, D. and Corbett, G. G.}, } @EDITORIAL{PIRRELLI_2015_EDITORIAL_PMF_290958, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, TITLE = {Proceedings of the NetWordS Final Conference on Word Knowledge and Word Usage: Representations and Processes in the Mental Lexicon}, YEAR = {2015}, ABSTRACT = {The international conference "Word Knowledge and Word Usage: Representations and processes in the mental lexicon" is the final outcome of 4 years of intense multi-disciplinary research networking and cooperation funded by the European Science Foundation within the framework of the NetWordS programme (May 2011-April 2015). NetWordS' mission was to bring together experts of various research fields (from brain sciences and computing to cognition and linguistics) and of different theoretical inclinations, to advance the current awareness of theoretical, typological, psycholinguistic, computational and neurophysiological evidence on the structure and processing of words, with a view to developing novel research paradigms and bringing up a new generation of language scholars. The conference was intended to provide a first forum for assessing current progress of crossdisciplinary research on language architecture and usage, and discussing prospects of future synergy. People are known to memorise, parse and access words in a context-sensitive and opportunistic way, by caching their most habitual and productive processing patterns into routinized behavioural schemes. Speakers not only take advantage of token-based information such as frequency of individual, holistically stored words, but they are also able to organise stored words through paradigmatic structures (or word families) whose overall size and frequency is an important determinant of ease of lexical access and interpretation. Accordingly, lexical organisation is not necessarily functional to descriptive economy and minimisation of storage, but to more performance-oriented factors such as efficiency of memorisation, access and recall. Usage-based approaches to word processing lend support to this view, to promote explanatory frameworks that aim to investigate the stable correlation patterns linking distributional entrenchment of lexical units with productivity, internal structure and ease of interpretation. Ultimately, this is intended to establish a deep interconnection between performance-oriented, low-level lexical functions such as memorisation, rehearsal, access and recall, and their neuroanatomical correlates}, KEYWORDS = {mental lexicon, linguistics, brain sciences, psycholinguistics, computing, cognition}, PAGES = {1-189}, URL = {http://ceur-ws.org/Vol-1347/}, VOLUME = {1347}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, CONFERENCE_PLACE = {Aachen}, } @INPROCEEDINGS{FERRO_2015_INPROCEEDINGS_FMP_299099, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Lexical parsability and morphological structure}, YEAR = {2015}, ABSTRACT = {A classical tenet in the psycholinguistic literature on the mental lexicon is that a parsed affix presents high activation levels (and thus contributes to activation spreading to other words with the same affix), and that such levels are tightly correlated with the affix productivity. In a number of influential papers, it has been suggested that parsability criteria interact with frequency to define morphological productivity in the lexicon. For example, the frequency of a derivative (e. g. government) relative to its base (govern) is shown to be a good predictor for parsability/productivity. The higher the frequency ratio, the more likely the morphological structure to be perceived, and the associated affix to be used productively. The present contribution intends to offer a computational explanatory basis for this correlational evidence, and assess its applicability to the acquisition of complex inflectional paradigms. In those languages, like Italian and German, whose inflection is stem-based rather than word-based, there is often no single paradigmatic form which can act as a base by being properly contained in all other inflected variants. Yet, it seems intuitive to suggest that verbs that are inflected for one paradigm cell only (e. g. neighbouring), are learned earlier and more easily but exhibit lower levels of perceived inflectional structure than verbs with richer paradigms. This appears to be in good accord with experimental evidence of time latencies in lexical decision, which are shown to correlate negatively with token frequency, paradigm size and paradigm entropy. Our simulations, based on Temporal Self-Organizing Maps (TSOMs) allow us to establish an interesting connection between inflectional parsability, frequency-based paradigm structure, and acquisitional constraints on the interaction between the human processor and working memory. Self-organising topological models of the mental lexicon can mimic the spatial and temporal organization of memory structures supporting the processing of symbolic sequences, and can provide an interesting framework for testing integrative accounts of lexical processing/acquisition as the complex result of general-purpose operations on word stimuli (e. g. working memory, long-term storage, sensory-motor mapping, rehearsal, unit integration, unit analysis, executive control, time-series processing), in line with recent acquisitions on the neuro-functional architecture of the perisylvian language network in the left hemisphere of human brain. Simulations of the incremental acquisition of "mini-paradigms" (small islands of morphological contrast encompassing up to three different forms for the same verb support the hypothesis that perception of structure (parsability) and morphological productivity strongly correlate in the inflectional lexica of German and Italian. In particular, by monitoring longitudinal progress in storage and generalisation of differently distributed inflectional paradigms in the two languages, we show that: i) high-frequency forms are stored and accessed significantly earlier than low-frequency forms; ii) deeply entrenched but paradigmatically isolated forms tend to block usage of other forms in the same paradigm; iii) low-frequency evenly distributed (highly entropic) intra-paradigmatic forms are acquired later but are easily extended. Our investigation credits the proposed computational framework with psycholinguistic plausibility, and grounds parsability-based models of morphological productivity on a specific, explicit proposal of lexical architecture. This provides an explanatory basis for both psycholinguistic and linguistic accounts of morphological structure, and offers an intermediate framework for scientific inquiry bridging the gap between linguistic units and functional units in neurosciences. Finally, it makes the interesting suggestion that principles of morpheme-based organisation of the mental lexicon are compatible with a learning strategy requiring memorisation of full forms}, KEYWORDS = {morphological structure, word processing, token/type frequency}, PAGES = {22-37}, URL = {http://mmm.lis.upatras.gr/index.php/mmm/issue/view/293/showToc}, ISSN = {1826-7491}, CONFERENCE_NAME = {Morphology and Semantics-Ninth Mediterranean Morphology Meeting}, BOOKTITLE = {ONLINE PROCEEDINGS OF THE MEDITERRANEAN MORPHOLOGY MEETINGS}, EDITOR = {Audring, J. and Koutsoukos, N. and Masini, F. and Raffaelli, I.}, } @INPROCEEDINGS{MARZI_2015_INPROCEEDINGS_MFP_290953, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Lexical emergentism and the "frequency-by-regularity" interaction}, YEAR = {2015}, ABSTRACT = {In spite of considerable converging evidence of the role of inflectional paradigms in word acquisition and processing, little efforts have been put so far into providing detailed, algorithmic models of the interaction between lexical token frequency, paradigm frequency, paradigm regularity. We propose a neurocomputational account of this interaction, and discuss some theoretical implications of preliminary experimental results}, KEYWORDS = {morphological strucutre, frequency distribution, temporal self-orgabnising maps}, PAGES = {37-41}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84927156830\&origin=inward}, VOLUME = {1347}, CONFERENCE_NAME = {NetWordS Final Conference on Word Knowledge and Word Usage: Representations and Processes in the Mental Lexicon}, BOOKTITLE = {Word Knowledge and Word Usage 2015}, EDITOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, } @INPROCEEDINGS{PIRRELLI_2015_INPROCEEDINGS_PNBDM_293690, AUTHOR = {Pirrelli, V. and Nahli, O. and Boschetti, F. and Del Gratta, R. and Marzi, C.}, TITLE = {Computational Linguistics and Language Physiology: Insights from Arabic NLP and Cooperative Editing}, YEAR = {2015}, ABSTRACT = {Computer processing of written Arabic raises a number of challenges to traditional parsing architectures on many levels of linguistic analysis. In this contribution, we review some of these core issues and the demands they make, to suggest different strategies to successfully tackle them. In the end, we assess these issues in connection with the behaviour of neuro-biologically inspired lexical architectures known as Temporal Self-Organising Maps. We show that, far from being language-specific problems, issues in Arabic processing can shed light on some fundamental characteristics of the human language processor, such as structure-based lexical recoding, concurrent, competitive activation of output candidates and dynamic selection of optimal solutions}, KEYWORDS = {Non-concatenative morphology, Optical Character Recognition, WordNet, Temporal Self-organising Maps, Mental Lexicon, Language neuro-physiology}, PAGES = {1-8}, URL = {http://dl.acm.org/citation.cfm?id=2802612}, DOI = {10.1145/2802612.2802637}, ISBN = {978-1-4503-3295-8}, CONFERENCE_NAME = {Third AIUCD Annual Conference-Humanities and Their Methods in the Digital Ecosystem}, BOOKTITLE = {Third AIUCD Annual Conference-Humanities and Their Methods in the Digital Ecosystem}, EDITOR = {Tomasi, F. and Del Turco, R. R. and Tammaro, A. M.}, } @ARTICLE{CHERSI_2014_ARTICLE_CFPP_256840, AUTHOR = {Chersi, F. and Ferro, M. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Topological Self-Organization and Prediction Learning Support Both Action and Lexical Chains in the Brain}, YEAR = {2014}, ABSTRACT = {A growing body of evidence in cognitive psychology and neuroscience suggests a deep interconnection between sensory-motor and language systems in the brain. Based on recent neurophysiological findings on the anatomo-functional organization of the fronto-parietal network, we present a computational model showing that language processing may have reused or co-developed organizing principles, functionality, and learning mechanisms typical of premotor circuit. The proposed model combines principles of Hebbian topological self-organization and prediction learning. Trained on sequences of either motor or linguistic units, the network develops independent neuronal chains, formed by dedicated nodes encoding only context-specific stimuli. Moreover, neurons responding to the same stimulus or class of stimuli tend to cluster together to form topologically connected areas similar to those observed in the brain cortex. Simulations support a unitary explanatory framework reconciling neurophysiological motor data with established behavioral evidence on lexical acquisition, access, and recall}, KEYWORDS = {Motor chains, Lexical chains, Serial working memory, Computational modeling, Self-organizing maps, Somatotopic organization, Prediction}, PAGES = {476-491}, URL = {http://onlinelibrary.wiley.com/doi/10.1111/tops.12094/abstract?deniedAccessCustomisedMessage=\&userIsAuthenticated=false}, VOLUME = {6 (3)}, DOI = {10.1111/tops.12094}, ISSN = {1756-8757}, JOURNAL = {TOPICS IN COGNITIVE SCIENCE}, } @ARTICLE{MARZI_2014_ARTICLE_MFP_226384, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Morphological structure through lexical parsability}, YEAR = {2014}, ABSTRACT = {The emergence of morphological structure in lexical acquisition is analysed in the computational framework of Temporal Self-Organising Maps (TSOMs), to provide an explanatory basis for both psycholinguistic and linguistic accounts of lexical parsability. The investigation we propose is grounded on the hypothesis that perception of morphological structure (parsability) and frequency strongly correlate in the acquisition of inflectional paradigms. Analysis of experimental results of word acquisition obtained by artificially varying training conditions, allows us to understand developmental competition between fully-inflected word forms, and to investigate a hierarchy of frequency effects. The computational and theoretical implications of such a memory-based view of the relationship between frequency and perception, and its potential to account}, KEYWORDS = {inflectional paradigms, morphological structure, token/type frequency, word processing.}, PAGES = {263-290}, URL = {http://www.rivisteweb.it/doi/10.1418/78410}, VOLUME = {XIII (2)}, DOI = {10.1418/78410}, ISSN = {1720-9331}, JOURNAL = {LINGUE E LINGUAGGIO}, } @EDITORIAL{ELMOHAJIR_2014_EDITORIAL_EACAEPZE_290901, AUTHOR = {El Mohajir, M. and Al Achhab, M. and Chahhou, M. and Arioua, M. and El Mohajir, B. and Pirrelli, V. and Zarghili, A. and Elfar, M.}, TITLE = {Proceedings of IEEE-CiST14-Third IEEE International Colloquium in Information Science and Technology (CIST)}, YEAR = {2014}, ABSTRACT = {The 3rd international IEEE Colloquium on Information Science and Technology (CIST'14) is part of the IEEE CONFERENCE SERIES that are held in Morocco, and is sponsored by the IEEE Morocco Section and the IEEE Morocco Computer \& Communication Joint Chapter, and the UAE IEEE Student Branch. The 2014 edition was organized in collaboration with the Faculty of Sciences of Tetuan, the national school of applied sciences of Tetuan and the University of Abdelmalek Essaadi. IEEE CIST is emerging as a key annual event that aims to serve as a forum to promote the exchange of the latest advances achieved by IT researchers, IT decision makers, IT managers, application designers and software engineers in the domain of information science and related technology. Computing challenges, models, applications and IT solutions will be discussed from the perspectives of academia, industry and government. In addition to the main conference topics, IEEE CIST will also provide a platform for supporting innovative and original contributions in three complementary disciplines that are: Arabic natural language processing, Information and multimedia processing and Internet of Things. We would like to extend our most sincere thanks and gratitude to the keynote speakers of IEEE CIST'14 for their important added value to this edition and to the Scientific Committee Members who helped us in the review process. We would like also to express our thanks to the IEEE Computer Society for their support through their Distinguished Lecturers Programs. We are also very glad to express our most sincere gratitude for the organizing committee members for their full dedication and professional organization of this edition. The success of this colloquium will be mainly attributed to the authors who contributed with their posters and talks. We hope that CIST will continue to offer a privileged context for participants to develop new ways and methods to achieve our objectives in advancing our research and projects. We can together achieve more and face more efficiently the challenges of the current millennium}, PAGES = {440}, URL = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6996097}, DOI = {10.1109/CIST.2014.7016582}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-4799-5979-2}, CONFERENCE_PLACE = {New York}, } @EDITORIAL{PIRRELLI_2014_EDITORIAL_PR_274740, AUTHOR = {Pirrelli, V. and Raffaelli, I.}, TITLE = {Special Issue of Suvremena Lingvistika}, YEAR = {2014}, PAGES = {127-235}, URL = {https://iris.cnr.it/handle/20.500.14243/274740}, PUBLISHER = {Croatian Philological Society (Zagreb, HRV)}, CONFERENCE_PLACE = {Zagreb}, } @INPROCEEDINGS{LYDING_2014_INPROCEEDINGS_LSBBCDDLP_261825, AUTHOR = {Lyding, V. and Stemle, E. and Borghetti, C. and Brunello, M. and Castagnoli, S. and Dell'Orletta, F. and Dittmann, H. and Lenci, A. and Pirrelli, V.}, TITLE = {The PAISÀ Corpus of Italian Web Texts}, YEAR = {2014}, ABSTRACT = {PAIS`A is a Creative Commons licensed, large web corpus of contemporary Italian. We describe the design, harvesting, and processing steps involved in its creation}, PAGES = {36-43}, URL = {http://aclweb.org/anthology/W14-04}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, CONFERENCE_NAME = {Corpus annotation, Tree-bank, Corpus design, Corpus harvesting}, CONFERENCE_PLACE = {Stroudsburg}, BOOKTITLE = {Proceedings of the 9th Web as Corpus Workshop (WaC-9)}, EDITOR = {Bildhauer, F. and Schäfer, R.}, } @INPROCEEDINGS{PIRRELLI_2014_INPROCEEDINGS_PMF_231380, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, TITLE = {Two-dimensional Wordlikeness Effects in Lexical Organisation}, YEAR = {2014}, ABSTRACT = {The main focus of research on wordlikeness has been on how serial processing strategies affect perception of similarity and, ultimately, the global network of associative relations among words in the mental lexicon. Comparatively little effort has been put so far, however, into an analysis of the reverse relationship: namely, how global organisation effects influence the speakers' perception of word similarity and of words' internal structure. In this paper, we explore the relationship between the two dimensions of wordlikeness (the "syntagmatic" and the "paradigmatic" one), to suggest that the same set of principles of memory organisation can account for both dimensions}, KEYWORDS = {wordlikeness, lexical access, word processing, frequency, memory}, PAGES = {301-305}, URL = {http://clic.humnet.unipi.it/it/atti.html}, VOLUME = {1}, DOI = {10.12871/CLICIT2014158}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics CLiC-it 2014 \& Fourth International Workshop EVALITA 2014}, BOOKTITLE = {The First Italian Conference on Computational Linguistics-Proceedings}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BDMNP_230690, AUTHOR = {Boschetti, F. and Del Gratta, R. and Marzi, C. and Nahli, O. and Pirrelli, V.}, TITLE = {Modelli, metodi e strumenti per il trattamento automatico della lingua araba e per l'editing in ambienti collaborativi}, YEAR = {2014}, ABSTRACT = {La linguistica computazionale ha portato negli ultimi vent'anni a un profondo mutamento nello studio delle lingue e delle loro testimonianze scritte, spostando l'accento della ricerca da aspetti linguistico-formali all'uso linguistico in contesti comunicativi reali. Il presente contributo illustra l'impatto di questo cambio di prospettiva sullo studio della lingua araba, attraverso una rassegna di alcune attività di ricerca in corso presso l'Istituto di Linguistica Computazionale del CNR di Pisa: (i) acquisizione dei testi arabi tramite Optical Character Recognition (OCR) e sviluppo di strumenti per la correzione manuale del testo in ambienti collaborativi; (ii) sviluppo di algoritmi e strumenti per l'analisi morfologica della lingua araba; (iii) analisi delle dinamiche di acquisizione del lessico arabo mediante architetture bio-computazionali; (iv) sviluppo della WordNet dell'Arabo collegata a Princeton WordNet, ItalWordNet, LatinWordNet e alla nascente AncientGreek WordNet. Queste attività sono rivolte sia all'analisi delle caratteristiche linguistiche dell'arabo che allo studio della produzione letteraria araba e dei suoi rapporti storico-culturali con altre lingue. In particolare, il contributo intende illustrare la fertilità di un approccio metodologico che metta in relazione le dinamiche di acquisizione del lessico arabo, con la messa a punto di procedure di analisi ed edizione critica del testo e con i principi di organizzazione ontologica di una lingua ad alta produttività derivazionale}, KEYWORDS = {linguistica computazionale, uso linguistico, lessico arabo}, URL = {http://aiucd2014.unibo.it/book-of-abstracts.pdf}, CONFERENCE_NAME = {AIUCD 3rd annual conference}, BOOKTITLE = {La metodologia della ricerca umanistica nell'ecosistema digitale-AIUCD 2014 Terzo convegno annuale}, EDITOR = {Rossi, F. and Tomasi, F.}, } @INPROCEEDINGS{MARZI_2013_INPROCEEDINGS_MFP_227175, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Lexical parsability and morphological structure}, YEAR = {2013}, ABSTRACT = {A classical tenet in the psycholinguistic literature on the mental lexicon is that a parsed affix presents high activation levels (and thus contributes to activation spreading to other words with the same affix), and that such levels are tightly correlated with the affix productivity. In a number of influential papers, it has been suggested that parsability criteria interact with frequency to define morphological productivity in the lexicon. For example, the frequency of a derivative (e. g. government) relative to its base (govern) is shown to be a good predictor for parsability/productivity. The higher the frequency ratio, the more likely the morphological structure to be perceived, and the associated affix to be used productively. The present contribution intends to offer a computational explanatory basis for this correlational evidence, and assess its applicability to the acquisition of complex inflectional paradigms. In those languages, like Italian and German, whose inflection is stem-based rather than word-based, there is often no single paradigmatic form which can act as a base by being properly contained in all other inflected variants. Yet, it seems intuitive to suggest that verbs that are inflected for one paradigm cell only (e. g. neighbouring), are learned earlier and more easily but exhibit lower levels of perceived inflectional structure than verbs with richer paradigms. This appears to be in good accord with experimental evidence of time latencies in lexical decision, which are shown to correlate negatively with token frequency, paradigm size and paradigm entropy. Our simulations, based on Temporal Self-Organizing Maps (TSOMs) allow us to establish an interesting connection between inflectional parsability, frequency-based paradigm structure, and acquisitional constraints on the interaction between the human processor and working memory. Self-organising topological models of the mental lexicon can mimic the spatial and temporal organization of memory structures supporting the processing of symbolic sequences [8-10], and can provide an interesting framework for testing integrative accounts of lexical processing/acquisition as the complex result of general-purpose operations on word stimuli (e. g. working memory, long-term storage, sensory-motor mapping, rehearsal, unit integration, unit analysis, executive control, time-series processing), in line with recent acquisitions on the neuro-functional architecture of the perisylvian language network in the left hemisphere of human brain. Simulations of the incremental acquisition of "mini-paradigms" (small islands of morphological contrast encompassing up to three different forms for the same verb support the hypothesis that perception of structure (parsability) and morphological productivity strongly correlate in the inflectional lexica of German and Italian. In particular, by monitoring longitudinal progress in storage and generalisation of differently distributed inflectional paradigms in the two languages, we show that: i) high-frequency forms are stored and accessed significantly earlier than low-frequency forms; ii) deeply entrenched but paradigmatically isolated forms tend to block usage of other forms in the same paradigm; iii) low-frequency evenly distributed (highly entropic) intra-paradigmatic forms are acquired later but are easily extended. Our investigation credits the proposed computational framework with psycholinguistic plausibility, and grounds parsability-based models of morphological productivity on a specific, explicit proposal of lexical architecture. This provides an explanatory basis for both psycholinguistic and linguistic accounts of morphological structure, and offers an intermediate framework for scientific inquiry bridging the gap between linguistic units and functional units in neurosciences. Finally, it makes the interesting suggestion that principles of morpheme-based organisation of the mental lexicon are compatible with a learning strategy requiring memorisation of full forms}, KEYWORDS = {morphological structure, word paradigms, frequency, human processor}, PAGES = {33-34}, URL = {http://mmm9.ffzg.unizg.hr/wp-content/uploads/2012/10/MMM_PROGRAM4.pdf}, CONFERENCE_NAME = {9th Mediterannean Morphology Meeting on "Morphology and Semantics" (9th MMM)}, BOOKTITLE = {Morphology and Semantics-Books of Abstracts}, } @ARTICLE{MARZI_2012_ARTICLE_MFP_6224, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Word alignment and paradigm induction}, YEAR = {2012}, ABSTRACT = {The variety of morphological processes attested in inflectional system of average complexity calls for adaptive strategies of word alignment. Prefixation, suffixation, stem alternation and combinations thereof pose severe problems to unsupervised algorithms of morphology induction. The paper analyses morphological generalisation as a by-product of flexible memory self-organisation strategies for word recoding. Our model endorses the hypothesis that lexical forms are memorised as full units. At the same time, lexical units are paradigmatically organised. We show that the overall amount of redundant morphological structure emerging from paradigm-based self-organisation has a clear impact on generalisation. This supports the view that issues of word representation and issues of word processing are mutually implied in lexical acquisition}, KEYWORDS = {Morphological Generalisation, Morphological Paradigm, Self-Organising Memory, Word coding and Processing}, PAGES = {251-274}, URL = {http://www.rivisteweb.it/doi/10.1418/38789}, VOLUME = {XI (2)}, DOI = {10.1418/38789}, ISSN = {1720-9331}, JOURNAL = {LINGUE E LINGUAGGIO}, } @ARTICLE{MARZI_2012_ARTICLE_MP_6217, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Understanding the Architecture of the Mental Lexicon}, YEAR = {2012}, ABSTRACT = {The present collection stems from the 1st NetWordS Workshop "Understanding the architecture of the mental lexicon: Integration of existing approaches", held in the Pisa Research Area of the Italian National Research Council, in November 2011. "NetWordS: the European network on Word Structure in the languages of Europe" is the Research Networking Programme of the European Science Foundation launched in May 2011 with the ambitious goal of paving the way to the European interdisciplinary research agenda on the Mental Lexicon, with particular emphasis on the following three main challenges:-lexicon and rules in the grammar,-word knowledge and word use,-words and meanings}, KEYWORDS = {Mental Lexicon, interdisciplinary approach}, PAGES = {101-105}, URL = {https://iris.cnr.it/handle/20.500.14243/6217}, VOLUME = {XI (2)}, DOI = {10.1418/38780}, ISSN = {1720-9331}, JOURNAL = {LINGUE E LINGUAGGIO}, } @INCOLLECTION{PIRRELLI_2012_INCOLLECTION_PFC_134821, AUTHOR = {Pirrelli, V. and Ferro, M. and Calderone, B.}, TITLE = {Learning Paradigms in Time and Space: Computational Evidence from Romance Languages}, YEAR = {2012}, ABSTRACT = {In the linguistic literature, paradigms have enjoyed a hybrid status, half-way between entrenched patterns of lexical organization and processing structures enforcing global constraints on the output of traditional inflection rules. We describe here an original computational model of the mental lexicon where paradigmatic structures emerge through learning as the by-product of the endogenous dynamics of lexical memorization as competitive self-organization, based on the complementary principles of formal contrast (in space) and association biuniqueness (in time)}, KEYWORDS = {Computational model, Lexical memorization, Mental lexicon, Processing structures, Self-organizing maps}, PAGES = {135-157}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84921732430\&origin=inward}, DOI = {10.1093/acprof:oso/9780199589982.003.0008}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {978-0-19-958998-2}, CONFERENCE_PLACE = {Oxford}, BOOKTITLE = {Morphological Autonomy: Perspectives for Romance Inflectional Morphology}, EDITOR = {Maiden, M. and Smith, J. C. and Goldbach, M.}, } @EDITORIAL{MARZI_2012_EDITORIAL_MP_226484, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Understanding the Architecture of the Mental Lexicon}, YEAR = {2012}, ABSTRACT = {The present collection stems from the 1st NetWordS Workshop "Understanding the architecture of the mental lexicon: Integration of existing approaches", held in the Pisa Research Area of the Italian National Research Council, in November 2011. "NetWordS: the European network on Word Structure in the languages of Europe" is the Research Networking Programme of the European Science Foundation launched in May 2011 with the ambitious goal of paving the way to the European interdisciplinary research agenda on the Mental Lexicon, with particular emphasis on the following three main challenges:-lexicon and rules in the grammar,-word knowledge and word use,-words and meanings}, KEYWORDS = {Mental Lexicon, interdisciplinary approach}, PAGES = {101-274}, URL = {https://iris.cnr.it/handle/20.500.14243/226484}, PUBLISHER = {Il Mulino (Bologna, ITA)}, ISSN = {1720-9331}, ISBN = {978-88-15-23601-2}, CONFERENCE_PLACE = {Bologna}, } @INPROCEEDINGS{CALDERONE_2012_INPROCEEDINGS_CP_228503, AUTHOR = {Calderone, B. and Pirrelli, V.}, TITLE = {Apprendimento morfologico, relazioni base-derivato e topologie paradigmatiche. Evidenze psico-computazionali a confronto}, YEAR = {2012}, ABSTRACT = {Il presente lavoro è volto a esplorare alcune dinamiche acquisizionali relative ai processi di maturazione della competenza morfologica in apprendenti bambini. In quest"ottica, sono riportate due differenti simulazioni computazionali dei processi di apprendimento della morfologia flessiva in Italiano e in Inglese. La prima simulazione, propria di un quadro connessionista classico, dà conto in modo inadeguato delle differenti scale temporali nell"apprendimento di alcune forme flesse verbali in inglese e italiano. La letteratura sull"argomento (Pizzuto \& Caselli 1992, Noccetti 2003) documenta in modo convergente una maggiore rapidità nell"apprendimento delle forme del presente indicativo da parte dei bambini italiani rispetto al ritmo di acquisizione delle forme verbali corrispondenti (la forma di base e la terza persona singolare in-s) da parte di bambini di madre lingua inglese. La seconda simulazione, basata su un modello di memorie associative, a cascata" addestrate tramite protocollo non-supervisionato, rende conto in maniera non banale del paradosso acquisizionale, confermato su base inter-linguistica da un recente studio di Dressler e colleghi (Bittner et al., 2003), secondo cui sistemi flessivi più complessi e completi sono appresi con maggiore facilità di sistemi flessivi più semplici ed estesamente sincretici}, KEYWORDS = {Lessico Mentale, apprendimento morfologico, paradigmi flessionali}, PAGES = {17}, URL = {https://iris.cnr.it/handle/20.500.14243/228503}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, CONFERENCE_NAME = {XLII Convegno della Società di Linguistica Italiana}, CONFERENCE_PLACE = {Roma}, BOOKTITLE = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società di Linguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, EDITOR = {Bambini, V. and Ricci, I. and Bertinetto, P. M.}, } @INPROCEEDINGS{GIRAUDO_2012_INPROCEEDINGS_GMP_65173, AUTHOR = {Giraudo, H. and Montermini, F. and Pirrelli, V.}, TITLE = {Processi cognitivi nell'analisi delle classi verbali dell'italiano: un approccio sperimentale}, YEAR = {2012}, ABSTRACT = {L'analisi della flessione, soprattutto verbale, nelle lingue romanze ha ricevuto un notevole impulso negli ultimi anni, in particolare dall'apporto alla ricerca in linguistica teorica di discipline come la psicolinguistica o le scienze cognitive. In questo articolo intendiamo riesaminare la ripartizione dei verbi italiani in classi, e osservare come la teoria morfologica e l'analisi sperimentale possano dare risultati convergenti e contribuire a mettere in luce i processi mentali che costituiscono la base della competenza morfologica dei parlanti (cf. Pirrelli 2007a; 2007b e, per un'illustrazione Bonami et al. 2008)}, KEYWORDS = {Morphology, Word Processing, Word Learning, Mental Lexicon, L1}, URL = {https://iris.cnr.it/handle/20.500.14243/65173}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, ISBN = {978-88-7870-652-1}, CONFERENCE_NAME = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società diLinguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, CONFERENCE_PLACE = {Roma}, BOOKTITLE = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società di Linguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, EDITOR = {Bambini, V. and Ricci, I. and Bertinetto, P. M.}, } @INPROCEEDINGS{MARZI_2012_INPROCEEDINGS_MFCP_225692, AUTHOR = {Marzi, C. and Ferro, M. and Caudai, C. and Pirrelli, V.}, TITLE = {Evaluating Hebbian self-organizing memories for lexical representation and access}, YEAR = {2012}, ABSTRACT = {The lexicon is the store of words in long-term memory. Any attempt at modelling lexical competence must take issues of string storage seriously. In the present contribution, we discuss a few desiderata that any biologically-inspired computational model of the mental lexicon has to meet, and detail a multi-task evaluation protocol for their assessment. The proposed protocol is applied to a novel computational architecture for lexical storage and acquisition, the "Topological Temporal Hebbian SOMs" (T2HSOMs), which are grids of topologically organised memory nodes with dedicated sensitivity to time-bound sequences of letters. These maps can provide a rigorous and testable conceptual framework within which to provide a comprehensive, multi-task protocol for testing the performance of Hebbian self-organising memories, and a comprehensive picture of the complex dynamics between lexical processing and the acquisition of morphological structure}, KEYWORDS = {Mental Lexicon, Morphology Acquisition, Self-Organizing Maps}, PAGES = {886-893}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {LREC'12-8th International Conference on Language Resources and Evaluation}, BOOKTITLE = {Language Resources and Evaluation}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and An, M. U. D. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{MARZI_2012_INPROCEEDINGS_MFP_5201, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Prediction and Generalisation in Word Processing and Storage}, YEAR = {2012}, ABSTRACT = {Word storage and processing have traditionally been modelled according to different computational paradigms, in line with the classical corner-stone of "dual-route" models of word structure assuming a sharp dissociation between memory and computation (Clahsen 1999, Di Sciullo \& Williams 1987, Pinker \& Prince 1988, Parasada \& Pinker 1993). Even the most radical alternative to dual-route thinking, connectionist one-route models, challenged the lexicon-grammar dualism only by providing a neurally-inspired mirror image of classical base-to-inflection rules, while largely neglecting issues of lexical storage (Rumelhart \& McClelland 1986, McClelland \& Patterson 2002, Seidenberg \& McClelland 1989). Recent psycho-and neuro-linguistic evidence, however, supports a less deterministic and modular view of the interaction between stored word knowledge and on-line processing [Baayen et al. 1997, Hay 2001, Maratsos 2000, Stemberger \& Middleton 2003, Tabak et al. 2005, Ford et al. 2003, Post et al. 2008). The view entails simultaneous activation of distributed patterns of cortical connectivity encoding redundant distributional regularities in language data. Furthermore, recent developments in morphological theorising question the primacy of grammar rules over lexical storage, arguing that word regularities emerge from independent principles of lexical organisation, whereby lexical units and constructions are redundantly stored and mutually related through entailment relations (Matthews 1991, Corbett \& Fraser 1993, Pirrelli 2000, Burzio 2004, Booij 2010). We endorse here such a non modular view on Morphology to investigate two basic behavioural aspects of human word processing: morphological prediction and generalisation. The investigation is based on a computer model of morphology acquisition supporting the hypothesis that they both derive from a common pool of principles of lexical organisation}, KEYWORDS = {Morphological generalisation, Word processing, Self-organising memory}, PAGES = {114-131}, URL = {http://mmm.lingue.unibo.it/}, CONFERENCE_NAME = {Eighth Mediterranean Morphology Meeting on "Morphology and the architecture of the grammar" (MMM8)}, BOOKTITLE = {Proceedings of the 8th Mediterranean Morphology Meeting on "Morphology and the architecture of the grammar"}, EDITOR = {Ralli, A. and Booij, G. and Scalise, S.}, } @INPROCEEDINGS{PIRRELLI_2012_INPROCEEDINGS_PG_65152, AUTHOR = {Pirrelli, V. and Guevara, E.}, TITLE = {Understanding NN Compounds}, YEAR = {2012}, ABSTRACT = {In this paper we intend to pursue two basic objectives: i) point out a substantial convergence between classification criteria for compounding that have developed independently from largely complementary perspectives and methodological stances, and ii) assess the important empirical consequences of this convergence and their potential impact on recent linguistic analyses of lexical compounds as either lexical (and specifically morphological) or syntactic phenomena. These two points are brought home by focusing on a particular class of Italian compounds, namely endocentric NN compounds such as ufficio reclami ('complaint office') or pesce palla ('ball fish') that prove to be increasingly productive in contemporary Italian (cf. Dardano 1978, Bisetto 2004)}, KEYWORDS = {Morphological composition, Word Processing, Word Learning, Mental Lexicon}, PAGES = {17}, URL = {https://iris.cnr.it/handle/20.500.14243/65152}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, ISBN = {978-88-7870-652-1}, CONFERENCE_NAME = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società diLinguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, CONFERENCE_PLACE = {Roma}, BOOKTITLE = {Linguaggio e cervello /Semantica, Atti del XLII Convegno della Società di Linguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, EDITOR = {Bambini, V. and Ricci, I. and Bertinetto, P. M.}, } @INPROCEEDINGS{PIRRELLI_2012_INPROCEEDINGS_P_228577, AUTHOR = {Pirrelli, V.}, TITLE = {Computational Complexity in Neurally-inspired Morphology processing}, YEAR = {2012}, ABSTRACT = {Pre-compilation of memory "chunks" in the mental lexicon is beneficial for on-line processing because it enhances the capacity to predict the completion of an upcoming input word and to maintain longer and more complex word sequences in the STM store: the more we memorize, the less material we need to integrate on-line. Morpheme-based representations are like memory chunks that come into the picture when memory of whole words fails, due to either novel, degenerate and noisy input, or to limitations in perception/memory spans. Due to the combinatorial nature of language and our memory limitations, less "chunking" and more on-line integration is expected at processing levels beyond morphology}, URL = {https://iris.cnr.it/handle/20.500.14243/228577}, } @ARTICLE{CHERSI_2011_ARTICLE_CFPP_179864, AUTHOR = {Chersi, F. and Ferro, M. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Time, Language and Action-A Unified Long-Term Memory Model for Sensory-Motor Chains and Word Schemata}, YEAR = {2011}, ABSTRACT = {Action and language are known to be organized as closely-related brain subsystems. An Italian CNR project implemented a computational neural model where the ability to form chains of goal-directed actions and chains of linguistic units relies on a unified memory architecture obeying the same organizing principles}, PAGES = {27-28}, URL = {http://ercim-news.ercim.eu/images/stories/EN84/EN84-web.pdf}, VOLUME = {84}, ISSN = {0926-4981}, JOURNAL = {ERCIM NEWS}, } @ARTICLE{FERRO_2011_ARTICLE_FMP_179921, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {A Self-Organizing Model of Word Storage and Processing: Implications for Morphology Learning}, YEAR = {2011}, ABSTRACT = {In line with the classical cornerstone of "dual-route" models of word structure, assuming a sharp dissociation between memory and computation, word storage and processing have traditionally been modelled according to different computational paradigms. Even the most popular alternative to dual-route thinking-connectionist one-route models-challenged the lexicon-grammar dualism only by providing a neurally-inspired mirror image of classical base-to-inflection rules, while largely neglecting issues of lexical storage. Recent psycho-and neuro-linguistic evidence, however, supports a less deterministic and modular view of the interaction between stored word knowledge and on-line processing. We endorse here such a non modular view on morphology to offer a computer model supporting the hypothesis that they are both derivative of a common pool of principles for memory self-organization}, KEYWORDS = {Lexical Processing, Self Organizing Maps, Morphological Structure, Serial Memory}, PAGES = {209-226}, URL = {http://www.rivisteweb.it/doi/10.1418/35840}, VOLUME = {2}, DOI = {10.1418/35840}, ISSN = {1720-9331}, JOURNAL = {LINGUE E LINGUAGGIO}, } @INPROCEEDINGS{FERRO_2011_INPROCEEDINGS_FMP_214910, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {T2HSOM: Understanding the Lexicon by Simulating Memory Processes for Serial Order}, YEAR = {2011}, ABSTRACT = {Over the last several years, both theoretical and empirical approaches to lexical knowledge and encoding have prompted a radical reappraisal of the traditional dichotomy between lexicon and grammar. The lexicon is not simply a large waste basket of exceptions and sub-regularities, but a dynamic, possibly redundant repository of linguistic knowledge whose principles of relational organization are the driving force of productive generalizations. In this paper, we overview a few models of dynamic lexical organization based on neural network architectures that are purported to meet this challenging view. In particular, we illustrate a novel family of Kohonen self-organizing maps (T2HSOMs) that have the potential of simulating competitive storage of symbolic time series while exhibiting interesting properties of morphological organization and generalization. The model, tested on training samples of as morphologically diverse languages as Italian, German and Arabic, shows sensitivity to manifold types of morphological structure and can be used to bootstrap morphological knowledge in an unsupervised way}, KEYWORDS = {Mental Lexicon, Self-organizing Maps, Morphology}, PAGES = {32-41}, URL = {http://alpage.inria.fr/~sagot/woler2011/WoLeR2011/Program_\&_Proceedings.html}, CONFERENCE_NAME = {First International Workshop on Lexical Resources}, BOOKTITLE = {First International Workshop on Lexical Resources}, EDITOR = {Sagot, B.}, } @ARTICLE{FERRO_2010_ARTICLE_FOPP_37718, AUTHOR = {Ferro, M. and Ognibene, D. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Reading as active sensing: a computational model of gaze planning in word recognition}, YEAR = {2010}, ABSTRACT = {We offer a computational model of gaze planning during reading that consists of two main components: a lexical representation network, acquiring lexical representations from input texts (a subset of the Italian CHILDES database), and a gaze planner, designed to recognize written words by mapping strings of characters onto lexical representations. The model implements an active sensing strategy that selects which characters of the input string are to be fixated, depending on the predictions dynamically made by the lexical representation network. We analyze the developmental trajectory of the system in performing the word recognition task as a function of both increasing lexical competence, and correspondingly increasing lexical prediction ability. We conclude by discussing how our approach can be scaled up in the context of an active sensing strategy applied to a robotic setting}, KEYWORDS = {Reading, Language Learning, Mental Lexicon}, PAGES = {1-16}, URL = {https://iris.cnr.it/handle/20.500.14243/37718}, VOLUME = {4}, ISSN = {1662-5218}, JOURNAL = {FRONTIERS IN NEUROROBOTICS}, } @ARTICLE{FERRO_2010_ARTICLE_FPP_37719, AUTHOR = {Ferro, M. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Morphology, Memory and the Mental Lexicon}, YEAR = {2010}, ABSTRACT = {Recent experimental evidence on morphological learning and processing has prompted a less deterministic and modular view of the interaction between stored word knowledge and on-line processing. Storing a word in the mental lexicon does not simply entail keeping a faithful memory image of that word in the most compact way. It also requires encoding and manipulating such image through topological structures that are optimally adapted to word production and comprehension. Temporal Self-Organizing Maps (THSOMs) are a novel model of artificial neural network that keeps time serial information through predictive activation chains of receptors encoding both spatial and temporal information of input stimuli. The impact of this model on issues of lexical organization and morphological processing is investigated in detail through a series of simulations shedding light on the dynamics between short-term memory (activation), long-term memory (learning) and morphological organization of stored word forms (topology)}, KEYWORDS = {Morphology, Word Processing, Word Learning, Mental Lexicon}, PAGES = {203-242}, URL = {https://iris.cnr.it/handle/20.500.14243/37719}, VOLUME = {2}, ISSN = {1720-9331}, JOURNAL = {LINGUE E LINGUAGGIO}, } @INCOLLECTION{PIRRELLI_2010_INCOLLECTION_PGB_134819, AUTHOR = {Pirrelli, V. and Guevara, E. and Baroni, M.}, TITLE = {Computational issues in compound processing}, YEAR = {2010}, ABSTRACT = {Understanding compounds is a challenging computational task, cutting across multiple levels of linguistic analysis and touching upon intricate issues of representation, grammar architecture and algorithmic processing. At the same time, compounds raise all these problems in the most direct and exemplar way. From this perspective, they are an ideal probe into core issues of language architecture, making us pause about the need for advanced processing models and multi-disciplinary ap-proaches to long-lasting linguistic cruces. The paper reviews some of the lessons that can be learned from reading twenty years of computa-tional literature on the topic and assesses them against the background of germane theoretical and cognitive issues}, KEYWORDS = {Morphology, Compounding, Natural Language Processing, Mental Lexicon}, PAGES = {271-285}, URL = {https://iris.cnr.it/handle/20.500.14243/134819}, PUBLISHER = {John Benjamins (Amsterdam, NLD)}, ISBN = {9789027248275}, CONFERENCE_PLACE = {Amsterdam}, BOOKTITLE = {Cross-disciplinary issues in compounding}, EDITOR = {Scalise, S. and Vogel, I.}, } @EDITORIAL{PIRRELLI_2010_EDITORIAL_P_250731, AUTHOR = {Pirrelli, V.}, TITLE = {Interdisciplinary Approaches to Understanding Word Processing and Storage}, YEAR = {2010}, ABSTRACT = {The present collection of papers originates from a successful application to the European Science Foundation Exploratory Workshop Programme for the "Words in Action" workshop. The workshop, convened in Pisa on the 12th and 13th of October 2009, brought together experts of various scientific domains and theoretical inclinations to advance the current awareness of theoretical, typological, psycholinguistic, computational and neuro-physiological issues in word processing and storage, with a view to promoting novel methods of research and assessment for grammar architecture and language physiology}, PAGES = {91-240}, URL = {https://iris.cnr.it/handle/20.500.14243/250731}, PUBLISHER = {Societa Editrice il Mulino (Bologna, ITA)}, CONFERENCE_PLACE = {Bologna}, } @INCOLLECTION{DELLORLETTA_2009_INCOLLECTION_DLMMP_233257, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: una piattaforma linguistico-computazionale per l'estrazione di conoscenza da testi}, YEAR = {2009}, ABSTRACT = {The paper describes the automatic extraction of domain knowledge from Italian document collections and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge}, KEYWORDS = {Term extraction, Ontology Learning}, PAGES = {285-300}, URL = {https://iris.cnr.it/handle/20.500.14243/233257}, PUBLISHER = {Bulzoni (Roma, ITA)}, ISBN = {978-88-7870-469-5}, CONFERENCE_PLACE = {Roma}, EDITOR = {Ferrari, G. and Benatti, R. and Mosca, M.}, } @INCOLLECTION{LENCI_2009_INCOLLECTION_LMP_224573, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Annotazione sintattica di corpora: aspetti metodologici}, YEAR = {2009}, ABSTRACT = {Un assunto sempre più condiviso nell'ambito degli studi sull'acquisizione sia di L1 che di L2 è che l'evidenza empirica privilegiata debba essere rappresentata da corpora di produzioni scritte o orali degli apprendenti, estensivamente annotate a molteplici livelli di rappresentazione linguistica. Più in generale, corpora lemmatizzati e annotati a livello morfosintattico fanno ormai parte dello strumentario comune del linguista. Accanto ad essi, si fa però strada l'esigenza di disporre di risorse testuali più sofisticate dal punto di vista delle modalità di esplorazione linguistica, come ad esempio corpora annotati a livello sintattico (le cosiddette treebank). Questi consentono infatti di osservare i processi di convergenza degli apprendenti verso la lingua "obiettivo" anche a livello di specifici tratti grammaticali astratti o di macro-strutture linguistiche. L'articolo propone uno schema di annotazione sintattica caratterizzato da un doppio livello di codifica. Si tratta di un approccio originale che differisce dalla maggior degli schemi di annotazione sintattica esistenti per due aspetti: 1. la separazione della dimensione relazionale da quella a costituenti, che sono trattati a livelli di annotazione indipendenti, ma al tempo stesso correlati, in modo tale che lo stesso testo è simultaneamente interrogabile ai due livelli; 2. la rappresentazione a costituenti fornisce una rappresentazione del testo come sequenza di proto-costituenti sintagmatici non ricorsivi. Questa strategia di annotazione permette una fattorizzazione di diversi aspetti e dimensioni della struttura sintattica che risulta promettente da un lato per l'annotazione di corpora di lingua "non-standard" come quelli contenenti produzioni di apprendenti di L1 o L2, sia come punto di partenza per successivi processi di estrazione di informazione linguistica dal testo. Dopo aver illustrato le motivazioni sottostanti allo schema proposto, ciascun livello di rappresentazione (chunking e dipendenze funzionali) viene illustrato in dettaglio, mostrandone anche la possibilità di combinazione sullo stesso testo. L'articolo si chiude con la discussione di prospettive di uso di corpora annotati secondo lo schema di annotazione proposto}, KEYWORDS = {Corpora annotati, annotazione sintattica}, PAGES = {25-46}, URL = {https://iris.cnr.it/handle/20.500.14243/224573}, PUBLISHER = {Guerra Edizioni (Perugia, ITA)}, ISBN = {978-88-557-0168-6}, CONFERENCE_PLACE = {Perugia}, BOOKTITLE = {CORPORA DI ITALIANO L2: TECNOLOGIE, METODI, SPUNTI TEORICI}, EDITOR = {Andorno, C. and Rastelli, S.}, } @INCOLLECTION{LENCI_2009_INCOLLECTION_LMPV_134815, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Ontology learning from Italian legal texts}, YEAR = {2009}, ABSTRACT = {The paper reports on the methodology and preliminary results of a case study in automatically extracting ontological knowledge from Italian legislative texts. We use a fully-implemented ontology learning system (T2K) that includes a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine language learning. Tools are dynamically integrated to provide an incremental representation of the content of vast repositories of unstructured documents. Evaluated results, however preliminary, show the great potential of NLP-powered incremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies}, KEYWORDS = {Ontology Learning, document management, legal knowledge extraction}, PAGES = {75-94}, URL = {https://iris.cnr.it/handle/20.500.14243/134815}, DOI = {10.3233/978-1-58603-942-4-75}, ISBN = {978-1-58603-942-4}, BOOKTITLE = {Law, Ontologies and the Semantic Web-Channelling the Legal Information Flood}, EDITOR = {Breuker, J. and Casanovas, P. and Klein, M. C. A. and Francesconi, E.}, } @ARTICLE{DELLORLETTA_2008_ARTICLE_DLMMPV_37713, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio}, YEAR = {2008}, ABSTRACT = {The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge}, KEYWORDS = {Natural Language Processing, Machine Learning, Knowledge extraction from texts, Ontology learning, Legal ontologies}, PAGES = {197-218}, URL = {https://iris.cnr.it/handle/20.500.14243/37713}, VOLUME = {26 (1-2)}, ISSN = {1594-2201}, JOURNAL = {AIDA INFORMAZIONI (ONLINE)}, } @INPROCEEDINGS{DELLORLETTA_2008_INPROCEEDINGS_DLMMPV_65083, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio}, YEAR = {2008}, ABSTRACT = {The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge}, KEYWORDS = {Natural Language Processing, Machine Learning, Knowledge extraction from texts, Ontology learning, Legal ontologies}, PAGES = {197-218}, URL = {http://www.assiterm91.it/wp-content/uploads/2010/11/Convegno-2008.pdf}, VOLUME = {ANNO 26, NUMERO 1-2}, ISSN = {1121-0095}, CONFERENCE_NAME = {Atti del Convegno Nazionale Ass. I. Term}, BOOKTITLE = {AIDA INFORMAZIONI}, } @INPROCEEDINGS{DELLORLETTA_2008_INPROCEEDINGS_DLMMPV_65074, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Marchi, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Acquiring Legal Ontologies from Domain-specific Texts}, YEAR = {2008}, ABSTRACT = {The paper reports on methodology and preliminary results ofa case study in automatically extracting ontological knowledgefrom Italian legislative texts in the environmental domain. Weuse a fully-implemented ontology learning system (T2K) thatincludes a battery of tools for Natural Language Processing(NLP), statistical text analysis and machine language learn-ing. Tools are dynamically integrated to provide an incremen-tal representation of the content of vast repositories of unstruc-tured documents. Evaluated results, however preliminary, arevery encouraging, showing the great potential of NLP-poweredincremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies}, KEYWORDS = {Ontology learning, Document management, knowledge extraction from texts, Natural Language Processing}, PAGES = {98-101}, URL = {https://iris.cnr.it/handle/20.500.14243/65074}, CONFERENCE_NAME = {LangTech 2008}, } @INPROCEEDINGS{LENCI_2008_INPROCEEDINGS_LMPM_65104, AUTHOR = {Lenci, A. and McGillivray, B. and Pirrelli, V. and Montemagni, S.}, TITLE = {Unsupervised Acquisition of Verb Subcategorization Frames from Shallow-Parsed Corpora}, YEAR = {2008}, KEYWORDS = {Acquisition, Machine Learning, Corpus (creation, annotation, etc.), Lexicon, Lexical database}, URL = {https://iris.cnr.it/handle/20.500.14243/65104}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, } @ARTICLE{BARONI_2007_ARTICLE_BGP_433727, AUTHOR = {Baroni, M. and Guevara, E. and Pirrelli, V.}, TITLE = {NN Compounds in Italian: Modelling Category Induction and Analogical Extension}, YEAR = {2007}, ABSTRACT = {Dopo un inquadramento teorico del problema della composizione nelle scienze cognitive e in linguistica, presentiamo una serie di esperimenti sui composti nominali in italiano che mettono alla prova l'ipotesi che ci sia una distinzione fondamentale tra composti relazionali (legittimati da proprietà della testa) e composti attributivi (legittimati dal modificatore). Un'analisi computazionale basata su un corpus conferma che tale distinzione può in linea di principio venire indotta da dati di tipo distribuzionale. Inoltre, dati sperimentali mostrano che il modello è in grado di predire almeno in parte l'accettabilità di nuovi composti formati cambiando testa e modificatore di composti esistenti}, KEYWORDS = {Morphology, Compounding, Mental Lexicon, Lexical Semantics}, PAGES = {263-290}, URL = {https://iris.cnr.it/handle/20.500.14243/433727}, VOLUME = {2}, ISSN = {1720-9331}, JOURNAL = {LINGUE E LINGUAGGIO}, } @ARTICLE{CALDERONE_2007_ARTICLE_CHP_37709, AUTHOR = {Calderone, B. and Herreros, I. and Pirrelli, V.}, TITLE = {Learning Inflection: The Importance of Starting Big}, YEAR = {2007}, ABSTRACT = {Perchè i sistemi verbali morfologicamente più "ricchi" vengono appresi da un bambino con maggiore facilità di sistemi più "poveri", caratterizzati da maggiore suppletivismo e da un minor numero di marcatori flessionali? Studi recenti condotti nel quadro della Morfologia Naturale (Bittner et al. 2003) hanno evidenziato il ruolo centrale svolto in questo apparente paradosso dal "contrasto morfologico" e dalla relazione biunivoca tra forma e contenuto all'interno del paradigma flessionale. Il presente lavoro illustra da questo punto di vista il comportamento di un modello originale di reti neurali artificiali auto-organizzanti con architettura "a cascata" e apprendimento asincrono, addestrato su forme verbali codificate fonologicamente. Il modello addestrato è in grado di memorizzare sia configurazioni morfologiche astratte, corrispondenti alle terminazioni flessionali di forme verbali regolari e irregolari, sia forme flesse piene, in funzione della loro frequenza per tipo e per unità nel corpus di addestramento. Il comportamento del modello è valutato su due differenti corpora di addestramento, italiano e inglese, entrambi campionati dal database CHILDES. L'analisi della topologia delle informazioni memorizzate dal modello addestrato consente di trarre alcune conclusioni generali sull'interazione tra processi di acquisizione di sequenze fonotattiche e principi di acquisizione paradigmatica. Le implicazioni teoriche dei risultati vengono inoltre discusse alla luce del tradizionale dibattito tra modelli "a meccanismo singolo" e "a meccanismo doppio" di acquisizione morfologica}, PAGES = {175-200}, URL = {https://iris.cnr.it/handle/20.500.14243/37709}, VOLUME = {2}, ISSN = {1720-9331}, JOURNAL = {LINGUE E LINGUAGGIO}, } @ARTICLE{DELLORLETTA_2007_ARTICLE_DFLMP_37710, AUTHOR = {Dell'Orletta, F. and Federico, M. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Maximum Entropy for Italian PoS Tagging}, YEAR = {2007}, ABSTRACT = {L'articolo illustra le prestazioni del ILC-UniPi MaxEnt PoS Tagger in Evalita 2007. The report contains a description of the ILC-UniPi MaxEnt PoS Tagger performance in Evalita 2007}, PAGES = {10-11}, URL = {https://iris.cnr.it/handle/20.500.14243/37710}, VOLUME = {IV(2)}, } @ARTICLE{PIRRELLI_2007_ARTICLE_P_37708, AUTHOR = {Pirrelli, V.}, TITLE = {Psycho-Computational Issues in Morphology Learning and Processing: An Overture}, YEAR = {2007}, PAGES = {131-138}, URL = {https://iris.cnr.it/handle/20.500.14243/37708}, VOLUME = {2}, } @INCOLLECTION{DELLORLETTA_2007_INCOLLECTION_DLMP_134810, AUTHOR = {Dellorletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Corpus-based Modelling of Grammar Variation}, YEAR = {2007}, KEYWORDS = {Grammar variation, stochastic parsing, linguistic typology}, PAGES = {38-55}, URL = {https://iris.cnr.it/handle/20.500.14243/134810}, PUBLISHER = {Angeli (Milano, ITA)}, ISBN = {9788846489449}, CONFERENCE_PLACE = {Milano}, BOOKTITLE = {Language resources and linguistic theory}, EDITOR = {Sansò, A.}, } @INPROCEEDINGS{BARONI_2007_INPROCEEDINGS_BGP_65050, AUTHOR = {Baroni, M. and Guevara, E. and Pirrelli, V.}, TITLE = {Sulla tipologia dei composti N N in italiano: principi categoriali ed evidenza distribuzionale a confronto}, YEAR = {2007}, KEYWORDS = {Morphology, Compounding, Mental Lexicon, Lexical Semantics}, URL = {https://iris.cnr.it/handle/20.500.14243/65050}, ISBN = {978-88-7870-469-5}, CONFERENCE_NAME = {XL Congresso Internazionale di Studi della Società di Linguistica Italiana (SLI 2006)}, BOOKTITLE = {Linguistica e modelli tecnologici della ricerca}, EDITOR = {Ferrari, G. and Benatti, R. and Mosca, M.}, } @INPROCEEDINGS{DELLORLETTA_2007_INPROCEEDINGS_DFLMP_65073, AUTHOR = {Dell'Orletta, F. and Federico, M. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Maximum Entropy for Italian PoS Tagging}, YEAR = {2007}, URL = {https://iris.cnr.it/handle/20.500.14243/65073}, CONFERENCE_NAME = {Evaluation of NLP Tools for Italian-EVALITA 2007}, } @INPROCEEDINGS{LENCI_2007_INPROCEEDINGS_LMPV_65070, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {NLP-based ontology learning from legal texts. A case study}, YEAR = {2007}, ABSTRACT = {The paper reports on the methodology and preliminary results of a case study in automatically extracting ontological knowledge from Italian legislative texts in the environmental domain. We use a fully-implemented ontology learning system (T2K) that includes a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine language learning. Tools are dynamically integrated to provide an incremental representation of the content of vast repositories of unstructured documents. Evaluated results, however preliminary, are very encouraging, showing the great potential of NLP-powered incremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies}, PAGES = {113-129}, URL = {https://iris.cnr.it/handle/20.500.14243/65070}, CONFERENCE_NAME = {II Workshop on Legal Ontologies and Artificial Intelligence Techniques (LOAIT'07)}, } @INPROCEEDINGS{PIRRELLI_2007_INPROCEEDINGS_P_65066, AUTHOR = {Pirrelli, V.}, TITLE = {On the cognitive autonomy of morphological processing}, YEAR = {2007}, ABSTRACT = {La connaissance morphologique définit-elle un domaine linguistique autonome dans la grammaire ou est-ce plutôt le sous-produit de principes et représentations basés sur la syntaxe ? Nous traitons la question en prenant en considération un grand ensemble de preuves linguistiques et cognitives, en nous fondant sur la manière dont les locuteurs apprennent, structurent, ont accès et utilisent leur lexique mental pour analyser et produire des mots. Conformément à l'idée que l'observation empirique de productions linguistiques concrètes peut donner des indications sur des domaines spécifiques à l'intérieur de la grammaire, nous concluons qu'il est difficile de concilier les preuves linguistiques fondées sur l'usage avec une approche qui consisterait à voir la morphologie comme la syntaxe des morphèmes. Cependant, il serait également erroné et inutile du point de vue logique de caractériser l'autonomie fonctionnelle de la morphologie par rapport à la syntaxe en termes de modularité de procès}, KEYWORDS = {Theoretical Morphology, Mental Lexicon, Language Learning, Self-Organizing Maps}, PAGES = {245-269}, URL = {https://iris.cnr.it/handle/20.500.14243/65066}, PUBLISHER = {LINCOM academic publishers (LINCOM GmbH) (München, DEU)}, ISBN = {9783895865046}, CONFERENCE_NAME = {Actes du colloque international de Morphologie 4èmes Décembrettes}, CONFERENCE_PLACE = {München}, BOOKTITLE = {Morphologie à Toulouse}, EDITOR = {Hathout, N. and Montermini, F.}, } @INPROCEEDINGS{PIRRELLI_2007_INPROCEEDINGS_PH_65067, AUTHOR = {Pirrelli, V. and Herreros, I.}, TITLE = {Learning Inflection by Itself}, YEAR = {2007}, ABSTRACT = {The paper reports on a few experimental results of a computer simulation of learning the verb morphology of Italian, English and Arabic with the same type of neural architecture based on Kohonen's self-organizing maps. Issues of the mental organization of the resulting morphological lexica are explored in some detail and discussed in the light of the differential distribution of regular and irregular inflections in the three languages. It is shown that typologically diverse, non trivial aspects of the underlying paradigmatic structure of the three verb systems effectively emerge through sheer exposure to realistic distributions of verb forms devoid of morpho-syntactic content. We argue that these results go a long way towards explaining how global organization effects in the mental morphological lexicon may eventually result from local word processing steps}, KEYWORDS = {Theoretical Morphology, Mental Lexicon, Language Learning, Self-Organizing Maps}, PAGES = {269-290}, URL = {http://mmm.lingue.unibo.it/}, ISSN = {1826-7491}, CONFERENCE_NAME = {V Mediterranean Morphology Meeting}, BOOKTITLE = {ONLINE PROCEEDINGS OF THE MEDITERRANEAN MORPHOLOGY MEETINGS}, EDITOR = {Booij, G. and Ducceschi, L. and Fradin, B. and Guevara, E. and Ralli, A. and Scalise, S.}, } @INPROCEEDINGS{SORIA_2007_INPROCEEDINGS_SBLMP_65060, AUTHOR = {Soria, C. and Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Automatic Extraction of Semantics in Law Documents}, YEAR = {2007}, URL = {https://iris.cnr.it/handle/20.500.14243/65060}, CONFERENCE_NAME = {V Legislative XML Workshop}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BCGLMPRS_61476, AUTHOR = {Bartolini, R. and Caracciolo, C. and Giovannetti, E. and Lenci, A. and Marchi, S. and Pirrelli, V. and Renso, C. and Spinsanti, L.}, TITLE = {Creation and use of lexicons and ontologies for natural language interface to databases}, YEAR = {2006}, ABSTRACT = {In this paper we present an original approach to natural language query interpretation which has been implemented within the FuLL (Fuzzy Logic and Language) Italian project of BC S. r. l. In particular, we discuss here the creation of linguistic and ontological resources, together with the exploitation of existing ones, for natural language-driven database access and retrieval. Both the database and the queries we experiment with are Italian, but the methodology we broach naturally extends to other languages}, KEYWORDS = {Natual language processing, ontologies, gis, databases}, PAGES = {6}, URL = {https://iris.cnr.it/handle/20.500.14243/61476}, CONFERENCE_NAME = {LREC Conference}, BOOKTITLE = {LREC 2006}, } @INPROCEEDINGS{DELLORLETTA_2006_INPROCEEDINGS_DLMP_65043, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Probing the space of grammatical variation: induction of cross-lingual grammatical constraints from treebanks}, YEAR = {2006}, ABSTRACT = {The paper reports on a detailed quantitative analysis of distributional language data of both Italian and Czech, highlighting the relative contribution of a number of distributed grammatical factors to sentence-based identification of subjects and direct objects. The work uses a Maximum Entropy model of stochastic resolution of conflicting grammatical constraints and is demonstrably capable of putting explanatory theoretical accounts to the test of usage-based empirical verification}, PAGES = {21-28}, URL = {https://iris.cnr.it/handle/20.500.14243/65043}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {1-932432-78-7}, CONFERENCE_NAME = {Coling/ACL 2006}, CONFERENCE_PLACE = {Stroudsburg}, BOOKTITLE = {Proceedings of the Workshop on Frontiers in Linguistically Annotated Corpora 2006 (LAC 06)}, } @INPROCEEDINGS{DELLORLETTA_2006_INPROCEEDINGS_DLMP_65015, AUTHOR = {Dellorletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Searching treebanks for functional constraints: cross-lingual experiments in grammatical relation assignment}, YEAR = {2006}, URL = {https://iris.cnr.it/handle/20.500.14243/65015}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, } @ARTICLE{LENCI_2005_ARTICLE_LMP_433719, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Acquiring and Representing Meaning: Theoretical and Computational Perspectives}, YEAR = {2005}, PAGES = {19-66}, URL = {https://iris.cnr.it/handle/20.500.14243/433719}, VOLUME = {22-23}, } @BOOK{LENCI_2005_BOOK_LMP_134788, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Testo e computer-Elementi di linguistica computazionale}, YEAR = {2005}, ABSTRACT = {In che modo il computer può aiutarci a comprendere come funziona la nostra lingua? Cosa significa analizzare un testo con l'aiuto di un calcolatore? In che misura possiamo estendere le potenzialità del computer rendendolo capace di interagire con gli utenti umani nella loro lingua' Queste e altre domande sono l'oggetto di indagine della linguistica computazionale, una disciplina che ha al suo centro proprio il rapporto tra lingua e computer. Il libro fornisce gli elementi di base della linguistica computazionale partendo da un interesse primario per il testo, la sua struttura e il suo contenuto. Il volume propone una sintesi equilibrata e accessibile tra sapere e fare, nozioni di base e loro applicazione, ed è destinato in primo luogo agli studenti delle facoltà umanistiche e scientifiche interessati all'interazione tra scienze umane e informatica, ma anche agli studiosi che vogliano imparare a usare il computer come strumento di ricerca sul linguaggio}, KEYWORDS = {Linguistica Computazionale}, PAGES = {255}, URL = {https://iris.cnr.it/handle/20.500.14243/134788}, PUBLISHER = {Carocci (Roma, ITA)}, ISBN = {8843034251}, CONFERENCE_PLACE = {Roma}, } @BOOK{LENCI_2005_BOOK_LMP_134787, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Acquiring and Representing Word Meaning: Computational perspectives}, YEAR = {2005}, URL = {https://iris.cnr.it/handle/20.500.14243/134787}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Pisa-Roma, ITA)}, ISBN = {88-8147-413-1}, CONFERENCE_PLACE = {Pisa-Roma}, } @INPROCEEDINGS{BARTOLINI_2005_INPROCEEDINGS_BGLMP_431279, AUTHOR = {Bartolini, R. and Giorgetti, D. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Automatic Incremental Term Acquisition from Domain Corpora}, YEAR = {2005}, ABSTRACT = {We describe a technique for the acquisition of terms from Italian domain text corpora, which relies both on sophisticated linguistic analysis and on statistical measures applied to linguistically processed text rather than to raw text as it is usually the case. The main advantage of this technique is that minimal a priori knowledge of term structure is required, thus allowing to explore and discover terms in a given domain without imposing a strict pattern matching structure on them, and also to easily extend it to different domains. The approach we present in this paper is incremental as it may be iterated to discover terms of increasing complexity built on top of terms discovered in the previous iteration. The reason why it is convenient to adopt such an incremental approach is that it allows to "clean" data from noise in the first step, elicitating the constituent terms, and then to refine term acquisition on "skimmed" term data}, PAGES = {293-300}, URL = {https://iris.cnr.it/handle/20.500.14243/431279}, CONFERENCE_NAME = {7th International conference on Terminology and Knowledge Engineering (TKE2005)}, BOOKTITLE = {Proceedings of TKE 2005-7th International Conference on Terminology and Knowledge Engineering}, } @INPROCEEDINGS{DELLORLETTA_2005_INPROCEEDINGS_DLMP_77226, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Climbing the path to grammar: a maximum entropy model of subject/object learning}, YEAR = {2005}, URL = {https://iris.cnr.it/handle/20.500.14243/77226}, CONFERENCE_NAME = {Psychocomputational Models of Human Language Acquisition (PsychoCompLA-2005)}, } @ARTICLE{FURFARI_2004_ARTICLE_FSPSB_170613, AUTHOR = {Furfari, F. and Soria, C. and Pirrelli, V. and Signore, O. and Bianchi Bandinelli, R.}, TITLE = {NICHE: Natural Interaction in Computerised Home Environments}, YEAR = {2004}, ABSTRACT = {Future technologies will provide users with increasing control over surrounding devices embedded in a common home environment. Somewhat paradoxically, this could result in an increase rather than a reduction in complexity if support for high-level interfacing is not introduced. This concern prompted the launching of a medium-term project aimed at promoting natural user-home interaction along the lines of the Ambient Intelligence vision}, KEYWORDS = {HCI, Home Autoamtion, Smart Home}, PAGES = {55-56}, URL = {http://www.ercim.org/publication/Ercim_News/enw58/furfari.html}, VOLUME = {58}, ISSN = {0926-4981}, JOURNAL = {ERCIM NEWS}, } @INCOLLECTION{BARTOLINI_2004_INCOLLECTION_BLMPS_436876, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Automatic Classification and Analysis of Provisions in Italian Legal Texts: A Case Study}, YEAR = {2004}, ABSTRACT = {In this paper we address the problem of automatically enriching legal texts with semantic annotation, an essential pre–requisite to effective indexing and retrieval of legal documents. This is done through illustration of SALEM (Semantic Annotation for LEgal Management), a computational system developed for automated semantic annotation of (Italian) law texts. SALEM is an incremental system using Natural Language Processing techniques to perform two tasks: i) classify law paragraphs according to their regulatory content, and ii) extract relevant text fragments corresponding to specific semantic roles that are relevant for the different types of regulatory content. The paper sketches the overall architecture of SALEM and reports results of a preliminary case study on a sample of Italian law texts}, KEYWORDS = {Annotazione semantica, Classificazione automatica}, PAGES = {593-604}, URL = {https://rdcu.be/dftjm}, DOI = {10.1007/978-3-540-30470-8_72}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-3-540-23664-1}, CONFERENCE_PLACE = {Berlin}, BOOKTITLE = {On the Move to Meaningful Internet Systems 2004: OTM 2004 Workshops. OTM 2004}, EDITOR = {Meersman, R. and Tari, Z. and Corsaro, A.}, } @INPROCEEDINGS{BARTOLINI_2004_INPROCEEDINGS_BLMP_77220, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Hybrid Constraints for Robust Parsing: First Experiments and Evaluation}, YEAR = {2004}, URL = {https://iris.cnr.it/handle/20.500.14243/77220}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, } @INPROCEEDINGS{BARTOLINI_2004_INPROCEEDINGS_BLMPS_431278, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Semantic Mark-up of Italian Legal Texts Through NLP-based Techniques}, YEAR = {2004}, URL = {https://iris.cnr.it/handle/20.500.14243/431278}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, } @INPROCEEDINGS{PIRRELLI_2004_INPROCEEDINGS_PCHV_13247, AUTHOR = {Pirrelli, V. and Calderone, B. and Herreros, I. and Virgilio, M.}, TITLE = {Non-locality all the way through: Emergent Global Constraints in the Italian Morphological Lexicon}, YEAR = {2004}, ABSTRACT = {The paper reports on the behaviour of a Kohonen map of the mental lexicon, monitored through different phases of acquisition of the Italian verb system. Reported experiments appear to consistently reproduce emergent global ordering constraints on memory traces of inflected verb forms, developed through principles of local interactions between parallel processing neurons}, URL = {https://iris.cnr.it/handle/20.500.14243/13247}, } @ARTICLE{ALLEGRINI_2003_ARTICLE_AMP_37654, AUTHOR = {Allegrini, P. and Montemagni, S. and Pirrelli, V.}, TITLE = {Example-based automatic induction of semantic classes through entropic scores}, YEAR = {2003}, ABSTRACT = {Abstract-The paper deals in some detail with the application of examplebased machine learning techniques to the task of automatically acquiring semantic information from functionally annotated texts. Special emphasis is placed on the use of “analogical proportions” as a means of structuring the knowledge embodied in attested examples, and weighing up their contribution to a variety of lexico-semantic classification tasks. Careful quantitative analysis of automatically acquired information proves to shed considerable light on the semantic inter-connectivity of input data, their structure and organising principles}, PAGES = {1-45}, URL = {https://iris.cnr.it/handle/20.500.14243/37654}, VOLUME = {16-17}, } @ARTICLE{LENCI_2003_ARTICLE_LMP_37664, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Chunk-it. An Italian shallow parser for robust syntactic annotation}, YEAR = {2003}, PAGES = {353-386}, URL = {https://iris.cnr.it/handle/20.500.14243/37664}, VOLUME = {16-17}, } @ARTICLE{MONTEMAGNI_2003_ARTICLE_MBBCCLPZFMRBPSZMPD_37665, AUTHOR = {Montemagni, S. and Barsotti, F. and Battista, M. and Calzolari, N. and Corazzari, O. and Lenci, A. and Pirrelli, V. and Zampolli, A. and Fanciulli, F. and Massetani, M. and Raffaelli, R. and Basili, R. and Pazienza, M. T. and Saracino, D. and Zanzotto, F. and Mana, N. and Pianesi, F. and Delmonte, R.}, TITLE = {The syntactic-semantic Treebank of Italian. An Overview}, YEAR = {2003}, PAGES = {461-492}, URL = {https://iris.cnr.it/handle/20.500.14243/37665}, VOLUME = {16-17}, } @ARTICLE{PIRRELLI_2003_ARTICLE_PB_37650, AUTHOR = {Pirrelli, V. and Battista, M.}, TITLE = {Syntagmatic and paradigmatic issues in computational morphology}, YEAR = {2003}, ABSTRACT = {Abstract-In this paper some germane theoretical issues in inflectional morphology will be addressed from a computational point of view. In particular we shall focus on the proper treatment of verb stem allomorphy in Italian conjugation and discuss several different formal solutions in some detail. To put our discussion on a more computational footing, all our examples are illustrated by using the DATR formalism as our metalanguage. This allows us to combine the advantages of the advanced expressive power and flexibility of DATR with the further bonus of offering a running piece of program code that actually works on the discussed examples. The upshot of the paper is that a computational treatment of Italian conjugation can considerably benefit from recent theoretical advances in word and paradigm morphology, as this level of description allows the rule writer to capture generalizations which would otherwise completely elude a purely syntagmatic approach to allomorphy}, PAGES = {679-701}, URL = {https://iris.cnr.it/handle/20.500.14243/37650}, VOLUME = {18-19}, } @ARTICLE{SORIA_2003_ARTICLE_SP_37658, AUTHOR = {Soria, C. and Pirrelli, V.}, TITLE = {A multi-level annotation meta-scheme for dialogue acts}, YEAR = {2003}, ABSTRACT = {Abstract-This article describes a new principled framework for comparison, design and standardization of annotation schemes for dialogue acts. Previous attempts at comparing existing schemes in order to identify a common core of generally agreed-upon dialogue acts share the assumption that tags belonging to different schemes and describing the same general phenomena can always be related through hypo-or hyperonymy relationships. Consequently, general-purpose schemes have often been the result of a merger of different tag sets. In this article, we show the extent to which comparability of different annotation schemes is prevented by the very limited tag inter-translatability. We thus describe an alternative approach to the comparison of dialogue act taxonomies based on a compositional analysis of tags according to independent classificatory dimensions. The framework takes a recognition-based approach to dialogue tagging and defines four independent taxonomies of tags, one for each orthogonal dimension of linguistic and contextual analysis assumed to have a bearing on identification of dialogue acts. We also show how the same framework can be used to design a generalpurpose annotation scheme which combines the features of generality and expressivity by exploiting a modular structure. The advantages and limitations of this proposal over other previous attempts are discussed and concretely exemplified}, KEYWORDS = {dialogue acts, annotation scheme, pragmatics}, PAGES = {925-952}, URL = {https://iris.cnr.it/handle/20.500.14243/37658}, VOLUME = {18-19}, ISSN = {0392-6907}, JOURNAL = {LINGUISTICA COMPUTAZIONALE}, } @INCOLLECTION{ALLEGRINI_2003_INCOLLECTION_ALMP_134779, AUTHOR = {Allegrini, P. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Le forme del significato. Acquisizione e rappresentazione dell'informazione semantica}, YEAR = {2003}, KEYWORDS = {Acquisizione, Semantica Lessicale, Ontologia, Machine Learning}, URL = {https://iris.cnr.it/handle/20.500.14243/134779}, } @INCOLLECTION{PIRRELLI_2003_INCOLLECTION_P_134776, AUTHOR = {Pirrelli, V.}, TITLE = {Machine language learning meets information technology}, YEAR = {2003}, KEYWORDS = {Apprendimento, Sistemi integrati, Semantic web, Machine Learning}, URL = {https://iris.cnr.it/handle/20.500.14243/134776}, PUBLISHER = {Angeli (Milano, ITA)}, CONFERENCE_PLACE = {Milano}, } @ARTICLE{PIRRELLI_2000_ARTICLE_PB_264442, AUTHOR = {Pirrelli, V. and Battista, M.}, TITLE = {The Paradigmatic Dimension of Stem Allomorphy in Italian Verb Inflection}, YEAR = {2000}, ABSTRACT = {This paper is concerned with a detailed analysis of stem allomorphy in Italian Conjugation, carried out from a phonological and paradigmatic perspective. In theory, one would expect these two complementary viewpoints to take care of neatly separable classes of phenomena. In fact, the two dimensions turn out to be interlocked in a complex way, to define a grammatical continuum ranging from minor phonological processes to full suppletion. A formal descriptive framework is proposed here, whereby several insights into the structure of inflectional paradigms (Matthews 1974, Carstairs 1987, Wurzel 1989, Stump 1991, Aronoff 1994) are dealt with from a unifying, purely morphological perspective. In this framework, the structure of a verb paradigm is characterised in terms of a distribution of slots into a number of equivalence classes, or set partition, where each equivalence class is associated with a morphologically distinct stem root. It is shown that, in Italian, a few set partitions account for the structure of all Italian verb paradigms, whether regular or less regular. Moreover, all these partitions are mutually related homomorphically. This well-behaved family of distributions tightly constrains stem allomorphy at an appropriate level of abstraction, independently of whether the origin of allomorphy is morpho-phonological or purely morphological, showing the superiority of the obtained generalisations over more traditional syntagmatic accounts}, KEYWORDS = {Morfologia, allomorfia, paradigmi flessionali}, PAGES = {307-379}, URL = {https://iris.cnr.it/handle/20.500.14243/264442}, VOLUME = {12}, ISSN = {1120-2726}, JOURNAL = {RIVISTA DI LINGUISTICA}, } @BOOK{PIRRELLI_2000_BOOK_P_263719, AUTHOR = {Pirrelli, V.}, TITLE = {Paradigmi in morfologia. Un approccio interdisciplinare alla flessione verbale dell'italiano}, YEAR = {2000}, ABSTRACT = {Cosa vuol dire apprendere le forme flesse di un verbo in una lingua come l'italiano? Attraverso quali processi mentali e quali strutture cognitive un parlante è in grado di memorizzare, strutturare ed estendere a parole mai ascoltate prima i dati morfologici cui è esposto nel corso dell'apprendimento? Quali risposte sono disponibili allo stato attuale delle ricerche, e quanto sono soddisfacenti? E infine, che cosa ci insegnano queste ricerche riguardo al modo in cui la mente umana associa forma e contenuto nel linguaggio? Questo libro cerca di affrontare buona parte di questi problemi attraverso l'analisi di tre fonti primarie di dati. La prima riguarda l'evidenza del sistema verbale dell'italiano, la sua variabilità dal punto di vista formale, e l'apparentemente caotico pullulare di eccezioni e sacche di sotto-regolarità. Considereremo con attenzione come questi dati sono analizzati in morfologia teorica e con quali strumenti formali. In secondo luogo guarderemo ai dati psicolinguistici relativi all'apprendimento delle parole da parte del bambino, ai suoi errori, alle sue generalizzazioni e alle sue difficoltà, per poi confrontare questi dati sia con i modelli della linguistica teorica che con i modelli computazionali di elaborazione e di apprendimento del linguaggio ad oggi disponibili. Infine, daremo uno sguardo al cambiamento linguistico dal punto di vista diacronico, e in particolare alle direzioni che il sistema verbale dell'italiano ha preso nella sua progressiva deriva dal verbo latino attraverso il tempo}, URL = {https://iris.cnr.it/handle/20.500.14243/263719}, }