[{"id":366733,"last_updated":"2024-01-10 11:03:41","id_people":491082,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Linguistic Profile of a Text and Human Ratings of Writing Quality: a Case Study on Italian L1 Learner Essays","year":2023,"authors_people":"Aldo Cerulli; Dominique Brunato; Felice Dell'Orletta","authors_cnr":["Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Cerulli, A.","Brunato, D.","Dell'Orletta, F."],"abstract":"This paper presents a study based on the linguistic profiling methodology to explore the relationship between the linguistic structure of a text and how it is perceived in terms of writing quality by humans. The approach is tested on a selection of Italian L1 learners essays, which were taken from a larger longitudinal corpus of essays written by Italian L1 students enrolled in the first and second year of lower secondary school. Human ratings of writing quality by Italian native speakers were collected through a crowdsourcing task, in which annotators were asked to read pairs of essays and rated which one they believed to be better written. By analyzing these ratings, the study identifies a variety of linguistic phenomena spanning across distinct levels of linguistic description that distinguish the essays considered as 'winners' and evaluates the impact of students' errors on the human perception of writing quality.","keywords":["human ratings","text quality","Natural Language Processing","learner corpus"],"pages":"7-34","url":"https:\/\/www.ai-lc.it\/wp-content\/uploads\/2023\/09\/IJCOL_9_1_1_cerulli_et_al.pdf","volume":"1","doi":"","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":358095,"last_updated":"2023-11-06 19:31:05","id_people":482226,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Linguistic markers of demoralization improvement in schizophrenia: A pilot study","year":2023,"authors_people":"Folesani F.; Belvederi Murri M.; Puggioni C.; Tiberto E.; Marella M.; Toffanin T.; Zerbinati L.; Nanni M.G.; Caruso R.; Brunato D.; Ravelli A.A.; Dell'Orletta F.; Chochinov H.M.; Grassi L.","authors_cnr":["Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Folesani, F.","Belvederi, M. M.","Puggioni, C.","Tiberto, E.","Marella, M.","Toffanin, T.","Zerbinati, L.","Nanni, M. G.","Caruso, R.","Brunato, D.","Ravelli, A. A.","Dell'Orletta, F.","Chochinov, H. M.","Grassi, L."],"abstract":"Background and objectives: Individuals with schizophrenia display language impairments involving pragmatics, semantics and syntax. Language impairments may show diagnostic specificity and could relate to the ability of engaging in psychotherapy. This pilot study sought to: (1) identify linguistic features that might differentiate individuals with schizophrenia from distressed controls without psychotic symptoms; and (2) examine the association between linguistic abilities and clinical changes during psychotherapy. Methods: We recruited patients with schizophrenia and a comparison group of individuals with demoralization and distress due to cancer. Participants underwent Dignity Therapy (DT), an existentially-oriented brief psychotherapy focused on legacy and subjective dignity. Verbatim transcripts of the DT sessions were analysed using Natural Language Processing (NLP). In addition, we measured changes in levels of demoralization and dignity-related distress before and after DT, exploring the association with linguistic variables with network analysis. Results: Patients with schizophrenia could be differentiated from those with cancer-related distress using only three out of 141 linguistic variables: total number of words, number of prepositional chains and conversational elements. Across groups, better levels of discourse coherence and higher number of arguments controlled by a predicate (verb \"arity\") were associated with larger improvements in demoralization and, indirectly, dignity-related distress. Conclusions: Reproducible linguistic markers may be able to differentiate individuals with schizophrenia from those with less severe psychopathology, and to predict better uptake of psychotherapy independent from diagnosis. Future studies should explore whether linguistic features derived from NLP may be exploited as accessible diagnostic or prognostic markers to tailor psychotherapy and other interventions in schizophrenia.","keywords":["Schizophrenia","Dignity Therapy","Natural Language Processing","Linguistic Profiling","Psychotherapy"],"pages":"","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85153800425&origin=inward","volume":"","doi":"10.1016\/j.ejpsy.2023.03.001","editors_people":"","editors":[""],"published":"The European journal of psychiatry","publisher":"European Journal of Psychiatry (Saragosse, Spagna)","issn":"0213-6163","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":362242,"last_updated":"2024-01-30 16:38:01","id_people":488203,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Testing the Effectiveness of the Diagnostic Probing Paradigm on Italian Treebanks","year":2023,"authors_people":"Alessio Miaschi, Chiara Alzetta, Dominique Brunato, Felice Dell'Orletta, Giulia Venturi","authors_cnr":["Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Alzetta, C.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"The outstanding performance recently reached by neural language models (NLMs) across many natural language processing (NLP) tasks has steered the debate towards understanding whether NLMs implicitly learn linguistic competence. Probes, i.e., supervised models trained using NLM representations to predict linguistic properties, are frequently adopted to investigate this issue. However, it is still questioned if probing classification tasks really enable such investigation or if they simply hint at surface patterns in the data. This work contributes to this debate by presenting an approach to assessing the effectiveness of a suite of probing tasks aimed at testing the linguistic knowledge implicitly encoded by one of the most prominent NLMs, BERT. To this aim, we compared the performance of probes when predicting gold and automatically altered values of a set of linguistic features. Our experiments were performed on Italian and were evaluated across BERT's layers and for sentences with different lengths. As a general result, we observed higher performance in the prediction of gold values, thus suggesting that the probing model is sensitive to the distortion of feature values. However, our experiments also showed that the length of a sentence is a highly influential factor that is able to confound the probing model's predictions.","keywords":["Neural language model","Probing tasks","Treebanks"],"pages":"19","url":"https:\/\/www.mdpi.com\/2078-2489\/14\/3\/144","volume":"14","doi":"10.3390\/info14030144","editors_people":"","editors":[""],"published":"Information (Basel)","publisher":"MDPI (Basel, Svizzera)","issn":"2078-2489","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":366732,"last_updated":"2024-01-10 11:02:52","id_people":491078,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Coherent or Not? Stressing a Neural Language Model for Discourse Coherence in Multiple Languages","year":2023,"authors_people":"Dominique Brunato; Felice Dell'Orletta; Irene Dini; Andrea Amelio Ravelli","authors_cnr":["Ravelli, Andrea Amelio","Dini, Irene","Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F.","Dini, I.","Ravelli, A. A."],"abstract":"In this study, we investigate the capability of a Neural Language Model (NLM) to distinguish between coherent and incoherent text, where the latter has been artificially created to gradually undermine local coherence within text. While previous research on coherence assessment using NLMs has primarily focused on English, we extend our investigation to multiple languages. We employ a consistent evaluation framework to compare the performance of monolingual and multilingual models in both in-domain and out-domain settings. Additionally, we explore the model's performance in a cross-language scenario.","keywords":["text coherence","neural language models","multilingual corpora"],"pages":"10690-10700","url":"https:\/\/aclanthology.org\/2023.findings-acl.680","volume":"","doi":"10.18653\/v1\/2023.findings-acl.680","editors_people":"","editors":[""],"published":"","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-959429-62-3","conference_name":"61st Annual Meeting of the Association for Computational Linguistics (ACL 2023)","conference_place":"Toronto, Canada","conference_date":"9-14\/07\/2023"},{"id":132500,"last_updated":"2023-11-06 19:31:16","id_people":464954,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Linguistically-Based Comparison of Different Approaches to Building Corpora for Text Simplification: A Case Study on Italian","year":2022,"authors_people":"Dominique Brunato, Felice Dell'Orletta, Giulia Venturi","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper, we present an overview of existing parallel corpora for Automatic Text Simplification (ATS) in different languages focusing on the approach adopted for their construction. We make the main distinction between manual and (semi)-automatic approaches in order to investigate in which respect complex and simple texts vary and whether and how the observed modifications may depend on the underlying approach. To this end, we perform a two-level comparison on Italian corpora, since this is the only language, with the exception of English, for which there are large parallel resources derived through the two approaches considered. The first level of comparison accounts for the main types of sentence transformations occurring in the simplification process, the second one examines the results of a linguistic profiling analysis based on Natural Language Processing techniques and carried out on the original and the simple version of the same texts. For both levels of analysis, we chose to focus our discussion mostly on sentence transformations and linguistic characteristics that pertain to the morpho-syntactic and syntactic structure of the sentence.","keywords":["linguistic complexity","Italian language","corpus construction","text simplification","aligned corpora"],"pages":"1-19","url":"https:\/\/www.frontiersin.org\/articles\/10.3389\/fpsyg.2022.707630\/full","volume":"13","doi":"10.3389\/fpsyg.2022.707630","editors_people":"","editors":[""],"published":"Frontiers in Psychology","publisher":"Frontiers Research Foundation (Switzerland)","issn":"1664-1078","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":351804,"last_updated":"2023-11-06 19:31:10","id_people":474123,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Analisi della scrittura giovanile da una prospettiva linguistico-computazionale: il caso di studio della Fanfiction","year":2022,"authors_people":"Dominique Brunato, Andrea Mattei, Felice Dell'Orletta","authors_cnr":["Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Mattei, A.","Dell'Orletta, F."],"abstract":"This paper presents a study aimed at characterizing the linguistic style of an emerging literary genre of the web, particularly appreciated by teens and young adults: fanfiction. By relying on Natural Language Processing approaches, and in particular on the methodology of linguistic profiling applied to a novel corpus of Italian fanfiction stories inspired by the fantasy saga \"Harry Potter\", we investigate the relationship between linguistic style and 'success', measured in terms of number of reviews obtained by the readers. We show that it is possible to detect a set of features, among a wide set of linguistic ones modeling lexical, morpho-syntactic and syntactic phenomena, which help more in discriminating between 'successful' and 'unsuccessful' fanfics.","keywords":["Trattamento Automatico del Linguaggio","stilometria computazionale","linguistic profiling","corpora","fanfiction"],"pages":"171-189","url":"https:\/\/publications.cnr.it\/doc\/474123","volume":"2021\/3","doi":"","editors_people":"","editors":[""],"published":"Rassegna Italiana di Linguistica Applicata (Testo stamp.)","publisher":"Bulzoni (Roma, Italia)","issn":"0033-9725","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":343200,"last_updated":"2022-10-26 09:47:18","id_people":472409,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Why is this language complex? Cherry-pick the optimal set of features in multilingual treebanks","year":2022,"authors_people":"D. Brunato; G. Venturi","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["17692","21125"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Venturi, G."],"abstract":"This paper investigates linguistic complexity across natural languages from a corpus-based perspective and relies on the assumptions of linguistic profiling as a methodological framework. We focus in particular on the domain of syntactic complexity and analyze the distribution of a set of features taken as proxies of complexity phenomena at the sentence level, which were extracted from 63 treebanks annotated according to the Universal Dependencies formalism. This dataset guarantees that the features considered are modeling the same linguistic phenomena in different treebanks, allowing reliable comparison among languages. We show that our approach is able to identify tendencies of structural proximity between languages not necessarily in line with typologically-supported classification, thus shedding light on new corpus-based findings.","keywords":["Linguistic Complexity","Linguistic Profiling","Universal Dependencies","Syntactic Domain"],"pages":"1-14","url":"https:\/\/www.degruyter.com\/document\/doi\/10.1515\/lingvan-2021-0017\/html","volume":"","doi":"10.1515\/lingvan-2021-0017","editors_people":"","editors":[""],"published":"Linguistics vanguard","publisher":"De Gruyter Mouton (Berlin; New York NY, Germania)","issn":"2199-174X","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":352735,"last_updated":"2023-11-09 18:10:01","id_people":475015,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"On Robustness and Sensitivity of a Neural Language Model: A Case Study on Italian L1 Learner Errors","year":2022,"authors_people":"Miaschi, Alessio and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper, we propose a comprehensive linguistic study aimed at assessing the implicit behavior of one of the most prominent Neural Language Models (NLM) based on Transformer architectures, BERT (Devlin et al., 2019), when dealing with a particular source of noisy data, namely essays written by L1 Italian learners containing a variety of errors targeting grammar, orthography and lexicon. Differently from previous works, we focus on the pre-training stage and we devise two complementary evaluation tasks aimed at assessing the impact of errors on sentence-level inner representations in terms of semantic robustness and linguistic sensitivity. While the first evaluation perspective is meant to probe the model's ability to encode the semantic similarity between sentences also in the presence of errors, the second type of probing task evaluates the influence of errors on BERT's implicit knowledge of a set of raw and morpho-syntactic properties of a sentence. Our experiments show that BERT's ability to compute sentence similarity and to correctly encode multi-leveled linguistic information of a sentence are differently modulated by the category of errors and that the error hierarchies in terms of robustness and sensitivity change across layer-wise representations.","keywords":["nlp","interpretability","transformers","learner errors"],"pages":"426-438","url":"https:\/\/doi.org\/10.1109\/TASLP.2022.3226333","volume":"","doi":"10.1109\/TASLP.2022.3226333","editors_people":"","editors":[""],"published":"IEEE\/ACM transactions on audio, speech, and language processing (Online)","publisher":"[Institute of Electrical and Electronics Engineers] ([Piscataway NJ], Stati Uniti d'America)","issn":"2329-9304","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":341004,"last_updated":"2023-11-06 19:31:21","id_people":469733,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Probing Linguistic Knowledge in Italian Neural Language Models across Language Varieties","year":2022,"authors_people":"Miaschi, Alessio and Sarti, Gabriele and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Sarti, G.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper, we present an in-depth investigation of the linguistic knowledge encoded by the transformer models currently available for the Italian language. In particular, we investigate how the complexity of two different architectures of probing models affects the performance of the Transformers in encoding a wide spectrum of linguistic features. Moreover, we explore how this implicit knowledge varies according to different textual genres and language varieties.","keywords":["nlp","transformer models","interpretability"],"pages":"25-44","url":"http:\/\/www.aaccademia.it\/ita\/scheda-libro?aaref=1518","volume":"","doi":"10.4000\/ijcol.965","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":341608,"last_updated":"2023-11-06 19:31:23","id_people":470081,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"SemEval-2022 Task 3: PreTENS-Evaluating Neural Networks on Presuppositional Semantic Knowledge","year":2022,"authors_people":"Roberto Zamparelli, Shammur A Chowdhury, Dominique Brunato, Cristiano Chesi, Felice Dell'Orletta, Arid Hasan, Giulia Venturi","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Zamparelli, R.","Chowdhury, S. A.","Brunato, D.","Chesi, C.","Dell'Orletta, F.","Hasan, A.","Venturi, G."],"abstract":"We report the results of the SemEval 2022 Task 3, PreTENS, on evaluation the acceptability of simple sentences containing constructions whose two arguments are presupposed to be or not to be in an ordered taxonomic relation. The task featured two sub-tasks articulated as: (i) binary prediction task and (ii) regression task, predicting the acceptability in a continuous scale. The sentences were artificially generated in three languages (English, Italian and French). 21 systems, with 8 system papers were submitted for the task, all based on various types of fine-tuned transformer systems, often with ensemble methods and various data augmentation techniques. The best systems reached an F1-macro score of 94.49 (sub-task1) and a Spearman correlation coefficient of 0.80 (sub-task2), with interesting variations in specific constructions and\/or languages.","keywords":["Neural Networks","Presuppositional Knowledge","Evaluation"],"pages":"228-238","url":"https:\/\/aclanthology.org\/2022.semeval-1.29.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"16th International Workshop on Semantic Evaluation (SemEval-2022)","conference_place":"Seattle","conference_date":"14-15\/07\/2022"},{"id":132452,"last_updated":"2023-11-06 19:31:24","id_people":454570,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"A NLP-based stylometric approach for tracking the evolution of L1 written language competence","year":2021,"authors_people":"Miaschi, Alessio and Brunato, Dominique and Dell'Orletta, Felice","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Brunato, D.","Dell'Orletta, F."],"abstract":"In this study we present a Natural Language Processing (NLP)-based stylometric approach for tracking the evolution of written language competence in Italian L1 learners. The approach relies on a wide set of linguistically motivated features capturing stylistic aspects of a text, which were extracted from students' essays contained in CItA (Corpus Italiano di Apprendenti L1), the first longitudinal corpus of texts written by Italian L1 learners enrolled in the first and second year of lower secondary school. We address the problem of modeling written language development as a supervised classification task consisting in predicting the chronological order of essays written by the same student at different temporal spans. The promising results obtained in several classification scenarios allow us to conclude that it is possible to automatically model the highly relevant changes affecting written language evolution across time, as well as identifying which features are more predictive of this process. In the last part of the article, we focus the attention on the possible influence of background variables on language learning and we present preliminary results of a pilot study aiming at understanding how the observed developmental patterns are affected by information related to the school environment of the student.","keywords":["stylometry","computational linguistics","language competence"],"pages":"71-105","url":"https:\/\/www.jowr.org\/abstracts\/vol13_1\/Miaschi_et_al_2021_13_1_abstract.html","volume":"vol. 13","doi":"10.17239\/jowr-2021.13.01.03","editors_people":"","editors":[""],"published":"Journal of Writing Research","publisher":"Universiteit Antwerpen (Antwerpen, Belgio)","issn":"2030-1006","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":341844,"last_updated":"2022-11-29 19:11:45","id_people":465394,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"On the role of textual connectives in sentence comprehension: A new dataset for Italian","year":2021,"authors_people":"Albertin G.; Miaschi A.; Brunato D.","authors_cnr":["Miaschi, Alessio","Brunato, Dominique Pierina"],"authors_cnr_id":["21125"],"authors_cnr_institute":[""],"authors":["Albertin, G.","Miaschi, A.","Brunato, D."],"abstract":"In this paper we present a new evaluation resource for Italian aimed at assessing the role of textual connectives in the comprehension of the meaning of a sentence. The resource is arranged in two sections (acceptability assessment and cloze test), each one corresponding to a distinct challenge task conceived to test how subtle modifications involving connectives in real usage sentences influence the perceived acceptability of the sentence by native speakers and Neural Language Models (NLMs). Although the main focus is the presentation of the dataset, we also provide some preliminary data comparing human judgments and NLMs performance in the two tasks.","keywords":["neural language models","textual connectives","sentence acceptability"],"pages":"","url":"http:\/\/ceur-ws.org\/Vol-3033\/paper16.pdf","volume":"3033","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"8th Italian Conference on Computational Linguistics (CLIC-it 2021)","conference_place":"Milano","conference_date":"26-28\/01\/2022"},{"id":343044,"last_updated":"2023-11-06 19:31:25","id_people":472155,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Analyzing the Interaction between the Reader's Voice and the Linguistic Structure of the Text: a Preliminary Study","year":2021,"authors_people":"Iavarone B., Morelli M. S., Brunato D., Ghiasi S., Scilingo E. P., Vanello N., Dell'Orletta F., Greco A.","authors_cnr":["Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Iavarone, B.","Morelli, M. S.","Brunato, D.","Ghiasi, S.","Scilingo, E. P.","Vanello, N.","Dell'Orletta, F.","Greco, A."],"abstract":"In this study, we present a preliminary analysis of the relationship between the linguistic profile of a text and the voice properties of the reader aiming to improve the speech-based emotion recognition systems. To this aim, we recorded the speech signals from a group of 32 healthy volunteers reading aloud neutral and affective texts and used the BioVoice toolbox to compute some of the main speech features. The selected texts were analyzed to quantify their lexical, morpho-syntactic, and syntactic content. Correlation and Support Vector Regressor analyses between linguistic and speech features have shown a significant modulation of some voice acoustic properties performed by the linguistic structure of the text. Particularly, a significant effect was shown on some specific speech features often used for the assessment of human emotional state (e.g., F0). This suggests that the lexical, morpho-syntactic, and syntactic properties could play an important role in the emotional dynamics of a person.","keywords":["Natural Language Processing","Speech analysis","linguistic profile"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/472155","volume":"","doi":"10.36253\/978-88-5518-449-6","editors_people":"","editors":[""],"published":"Proceedings of 12th INTERNATIONAL WORKSHOP \"MODELS AND ANALYSIS OF VOCAL EMISSIONS FOR BIOMEDICAL APPLICATIONS\"","publisher":"","issn":"","isbn":"978-88-5518-448-9","conference_name":"12th INTERNATIONAL WORKSHOP \"MODELS AND ANALYSIS OF VOCAL EMISSIONS FOR BIOMEDICAL APPLICATIONS\"","conference_place":"Firenze, Italia","conference_date":"14-16\/12\/2021"},{"id":132487,"last_updated":"2023-11-06 19:31:30","id_people":463833,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Probing tasks under pressure","year":2021,"authors_people":"Miaschi A.; Alzetta C.; Brunato D.; Dell'Orletta F.; Venturi G.","authors_cnr":["Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Alzetta, C.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"Probing tasks are frequently used to evaluate whether the representations of Neural Language Models (NLMs) encode linguistic information. However, it is still questioned if probing classification tasks really enable such investigation or they simply hint for surface patterns in the data. We present a method to investigate this question by comparing the accuracies of a set of probing tasks on gold and automatically generated control datasets. Our results suggest that probing tasks can be used as reliable diagnostic methods to investigate the linguistic information encoded in NLMs representations.","keywords":["Neural Language Models","Linguistic probing","Treebanks"],"pages":"1-7","url":"http:\/\/ceur-ws.org\/Vol-3033\/paper29.pdf","volume":"3033","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"8th Italian Conference on Computational Linguistics (CLIC-it 2021)","conference_place":"Milano","conference_date":"29\/06-01\/07\/2022"},{"id":132451,"last_updated":"2023-11-06 19:31:35","id_people":454441,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"What Makes My Model Perplexed? A Linguistic Investigation on Neural Language Models Perplexity","year":2021,"authors_people":"Miaschi, Alessio and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"This paper presents an investigation aimed at studying how the linguistic structure of a sentence affects the perplexity of two of the most popular Neural Language Models (NLMs), BERT and GPT-2. We first compare the sentence-level likelihood computed with BERT and the GPT-2's perplexity showing that the two metrics are correlated. In addition, we exploit linguistic features capturing a wide set of morpho-syntactic and syntactic phenomena showing how they contribute to predict the perplexity of the two NLMs.","keywords":["nlp","interpretability","deep learning"],"pages":"40-47","url":"https:\/\/www.aclweb.org\/anthology\/2021.deelio-1.5","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-954085-30-5","conference_name":"2nd Workshop on Knowledge Extraction and Integrationfor Deep Learning Architectures","conference_place":"","conference_date":"10\/06\/2021"},{"id":132502,"last_updated":"2023-11-06 19:31:34","id_people":464972,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"That Looks Hard: Characterizing Linguistic Complexity in Humans and Language Models","year":2021,"authors_people":"Sarti G, Brunato D, Dell'Orletta F","authors_cnr":["Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Sarti, G.","Brunato, D.","Dell'Orletta, F."],"abstract":"This paper investigates the relationship between two complementary perspectives in the human assessment of sentence complexity and how they are modeled in a neural language model (NLM). The first perspective takes into account multiple online behavioral metrics obtained from eye-tracking recordings. The second one concerns the offline perception of complexity measured by explicit human judgments. Using a broad spectrum of linguistic features modeling lexical, morpho-syntactic, and syntactic properties of sentences, we perform a comprehensive analysis of linguistic phenomena associated with the two complexity viewpoints and report similarities and differences. We then show the effectiveness of linguistic features when explicitly leveraged by a regression model for predicting sentence complexity and compare its results with the ones obtained by a fine-tuned neural language model. We finally probe the NLM's linguistic competence before and after fine-tuning, highlighting how linguistic information encoded in representations changes when the model learns to predict complexity.","keywords":["linguistic complexity","eyetracking","human evaluation"],"pages":"48-60","url":"https:\/\/aclanthology.org\/2021.cmcl-1.5","volume":"","doi":"10.18653\/v1\/2021.cmcl-1.5","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-954085-35-0","conference_name":"Proceedings of Workshop on Cognitive Modeling and Computational Linguistics (CMCL 2021)","conference_place":"","conference_date":"10\/06\/2021"},{"id":132427,"last_updated":"2023-11-06 19:31:38","id_people":444114,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"AcCompl-it @ EVALITA2020: Overview of the acceptability & complexity evaluation task for Italian","year":2020,"authors_people":"Brunato D.; Chesi C.; Dell'Orletta F.; Montemagni S.; Venturi G.; Zamparelli R.","authors_cnr":["Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Chesi, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G.","Zamparelli, R."],"abstract":"The Acceptability and Complexity evaluation task for Italian (AcCompl-it) was aimed at developing and evaluating methods to classify Italian sentences according to Acceptability and Complexity. It consists of two independent tasks asking participants to predict either the acceptability or the complexity rate (or both) of a given set of sentences previously scored by native speakers on a 1-to-7 points Likert scale. In this paper, we introduce the datasets distributed to the participants, we describe the different approaches of the participating systems and provide a first analysis of the obtained results.","keywords":["Shared Task","Linguistic Complexity","Acceptability"],"pages":"1-8","url":"http:\/\/ceur-ws.org\/Vol-2765\/paper163.pdf","volume":"2765","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"EVALITA '20, Evaluation of NLP and Speech Tools for Italian","conference_place":"Online","conference_date":"17\/12\/2020"},{"id":132394,"last_updated":"2023-11-06 19:31:54","id_people":435966,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Profiling-UD: a Tool for Linguistic Profiling of Texts","year":2020,"authors_people":"Dominique Brunato, Andrea Cimino, Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Cimino, Andrea","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Cimino, A.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this paper, we introduce Profiling-UD, a new text analysis tool inspired to the principles of linguistic profiling that can support language variation research from different perspectives. It allows the extraction of more than 130 features, spanning across different levels of linguistic description. Beyond the large number of features that can be monitored, a main novelty of Profiling-UD is that it has been specifically devised to be multilingual since it is based on the Universal Dependencies framework. In the second part of the paper, we demonstrate the effectiveness of these features in a number of theoretical and applicative studies in which they were successfully used for text and author profiling.","keywords":["Computational Language Variation Analysis","Linguistic Profiling","Universal Dependencies"],"pages":"7145-7151","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2020\/pdf\/2020.lrec-1.883.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"979-10-95546-34-4","conference_name":"Conference on Language Resources and Evaluation (LREC)","conference_place":"","conference_date":"11-16\/05\/2020"},{"id":132416,"last_updated":"2023-11-06 19:31:44","id_people":442040,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Is Neural Language Model Perplexity Related to Readability?","year":2020,"authors_people":"Miaschi, Alessio and Alzetta, Chiara and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Alzetta, C.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"This paper explores the relationship between Neural Language Model (NLM) perplexity and sentence readability. Starting from the evidence that NLMs implicitly acquire sophisticated linguistic knowledge from a huge amount of training data, our goal is to investigate whether perplexity is affected by linguistic features used to automatically assess sentence readability and if there is a correlation between the two metrics. Our findings suggest that this correlation is actually quite weak and the two metrics are affected by different linguistic phenomena.","keywords":["nlp","neural language models","readability"],"pages":"","url":"http:\/\/ceur-ws.org\/Vol-2769\/paper_57.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"979-12-80136-28-2","conference_name":"Seventh Italian Conference on Computational Linguistics","conference_place":"","conference_date":"01-03\/03\/2021"},{"id":132391,"last_updated":"2023-11-06 19:31:46","id_people":438491,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linguistic Profiling of a Neural Language Model","year":2020,"authors_people":"Miaschi A., Brunato D., Dell'Orletta F., Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we investigate the linguistic knowledge learned by a Neural Language Model (NLM) before and after a fine-tuning process and how this knowledge affects its predictions during several classification problems. We use a wide set of probing tasks, each of which corresponds to a distinct sentence-level feature extracted from different levels of linguistic annotation. We show that BERT is able to encode a wide range of linguistic characteristics, but it tends to lose this information when trained on specific downstream tasks. We also find that BERT's capacity to encode different kind of linguistic properties has a positive influence on its predictions: the more it stores readable linguistic information of a sentence, the higher will be its capacity of predicting the expected label assigned to that sentence.","keywords":["Linguistic Profiling","Neural Language Model","Interpretability"],"pages":"745-756","url":"https:\/\/www.aclweb.org\/anthology\/2020.coling-main.65\/","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-952148-27-9","conference_name":"International Conference on Computational Linguistics (COLING)","conference_place":"Online","conference_date":"8-13\/12\/2020"},{"id":132395,"last_updated":"2023-11-06 19:31:58","id_people":435969,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Tracking the Evolution of Written Language Competence in L2 Spanish Learners","year":2020,"authors_people":"Miaschi, Alessio; Davidson, Sam; Brunato, Dominique; Dell'Orletta, Felice; Sagae, Kenji; Sanchez-Gutierrez, Claudia H.; Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Davidson, S.","Brunato, D.","Dell'Orletta, F.","Sagae, K.","Sanchez Gutierrez, C. H.","Venturi, G."],"abstract":"In this paper we present an NLP-based approach for tracking the evolution of written language competence in L2 Spanish learners using a wide range of linguistic features automatically extracted from students' written productions. Beyond reporting classification results for different scenarios, we explore the connection between the most predictive features and the teaching curriculum, finding that our set of linguistic features often reflects the explicit instruction that students receive during each course.","keywords":["Evolution of Language Competence","Natural Language Processing","Linguistic Profiling"],"pages":"92-101","url":"https:\/\/www.aclweb.org\/anthology\/2020.bea-1.9.pdf","volume":"","doi":"10.18653\/v1\/W16-05","editors_people":"","editors":[""],"published":"","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-941643-83-9","conference_name":"15th Workshop on Innovative Use of NLP for Building Educational Applications","conference_place":"","conference_date":"10\/07\/2020"},{"id":132417,"last_updated":"2023-11-06 19:31:45","id_people":442038,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Italian Transformers Under the Linguistic Lens","year":2020,"authors_people":"Miaschi, Alessio and Sarti, Gabriele and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Sarti, G.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we present an in-depth investigation of the linguistic knowledge encoded by the transformer models currently available for the Italian language. In particular, we investigate whether and how using different architectures of probing models affects the performance of Italian transformers in encoding a wide spectrum of linguistic features. Moreover, we explore how this implicit knowledge varies according to different textual genres.","keywords":["nlp","neural language models","interpretability"],"pages":"","url":"http:\/\/ceur-ws.org\/Vol-2769\/paper_56.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"979-12-80136-28-2","conference_name":"Seventh Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"","conference_date":"01-03\/03\/2021"},{"id":132359,"last_updated":"2023-11-06 19:32:17","id_people":423885,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"What makes a review helpful? Predicting the helpfulness of Italian tripadvisor reviews","year":2019,"authors_people":"Chiriatti G.; Brunato D.; Dell'Orletta F.; Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Chiriatti, G.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we introduce a classification system devoted to predict the helpfulness of Italian online reviews. It is based on a wide set of features reflecting the different factors involved and tested on different categories of TripAdvisor reviews. For this purpose, we collected the first Italian corpus of online reviews enriched with metadata related to their helpfulness and we carried out an in-depth analysis of the most predictive features.","keywords":["Natural Language Processing","Documenti Classification","Linguistic Profiling"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85074834351&origin=inward","volume":"2481","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"6th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Bari","conference_date":"13-15\/11\/2019"},{"id":132358,"last_updated":"2023-11-06 19:32:09","id_people":423883,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Italian and English sentence simplification: How many differences?","year":2019,"authors_people":"Fieromonte M.; Brunato D.; Dell'Orletta F.; Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Fieromonte, M.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"The paper proposes a cross-linguistic analysis of two parallel monolingual corpora conceived for automatic text simplification in two languages, Italian and English. The aim is to find similarities and differences in the process of simplification in two typologically different languages. To carry out the comparison, 1,000 sentences were extracted from the two corpora and annotated with a scheme previously used to annotate simplification phenomena..","keywords":["Natural Language Processing","Automatic Text Simplification"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85074816689&origin=inward","volume":"2481","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"6th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Bari","conference_date":"13-15\/11\/2019"},{"id":132308,"last_updated":"2023-11-06 19:32:26","id_people":391619,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Is this sentence difficult? Do you agree?","year":2018,"authors_people":"Brunato D., De Mattei L., Dell'Orletta F., Iavarone B., Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Brunato, D.","De Mattei, L.","Dell'Orletta, F.","Iavarone, B.","Venturi, G."],"abstract":"In this paper, we present a crowdsourcing-based approach to model the human perception of sentence complexity. We collect a large corpus of sentences rated with judgments of complexity for two typologically-different languages, Italian and English. We test our approach in two experimental scenarios aimed to investigate the contribution of a wide set of lexical, morpho-syntactic and syntactic phenomena in predicting i) the degree of agreement among annotators independently from the assigned judgment and ii) the perception of sentence complexity.","keywords":["Linguistic complexity","Crowdsourcing","Human perception"],"pages":"1-10","url":"https:\/\/www.aclweb.org\/anthology\/D18-1289\/","volume":"","doi":"10.18653\/v1\/D18-1289","editors_people":"","editors":[""],"published":"","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-948087-84-1","conference_name":"Conference on Empirical Methods in Natural Language Processing (EMNLP)","conference_place":"Brussels","conference_date":"31\/10\/2018-04\/11\/2018"},{"id":132350,"last_updated":"2023-11-06 19:32:33","id_people":423870,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Sentences and documents in native language identification","year":2018,"authors_people":"Cimino A.; Dell'Orletta F.; Brunato D.; Venturi G.","authors_cnr":["Cimino, Andrea","Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Cimino, A.","Dell'Orletta, F.","Brunato, D.","Venturi, G."],"abstract":"Starting from a wide set of linguistic features, we present the first in depth feature analysis in two different Native Language Identification (NLI) scenarios. We compare the results obtained in a traditional NLI document classification task and in a newly introduced sentence classification task, investigating the different role played by the considered features. Finally, we study the impact of a set of selected features extracted from the sentence classifier in document classification.","keywords":["Natural Language Processing","Native Language Identification"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85057749754&origin=inward","volume":"2253","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"5th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Torino","conference_date":"10-12\/12\/2018"},{"id":132353,"last_updated":"2023-11-06 19:32:25","id_people":423873,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Gender and Genre Linguistic profiling: A case study on female and male journalistic and diary prose","year":2018,"authors_people":"Cocciu E.; Brunato D.; Venturi G.; Dell'Orletta F.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Cocciu, E.","Brunato, D.","Venturi, G.","Dell'Orletta, F."],"abstract":"This paper intends to investigate the linguistic profile of male- and female-authored texts belonging to two very different textual genres: newspaper articles and diary prose. By using a wide set of linguistic features automatically extracted from text and spanning across different levels of linguistic description, from lexicon to syntax, our analysis highlights the peculiarities of the two examined genres and how the genre dimension is influenced by variation depending on author's gender (and vice versa).","keywords":["Natural Language Processing","Genre Classification","Linguistic Profiling"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85057759773&origin=inward","volume":"2253","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"5th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Torino","conference_date":"10-12\/12\/2018"},{"id":132245,"last_updated":"2023-11-06 19:32:45","id_people":382461,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"On the order of words in Italian: a study on genre vs complexity","year":2017,"authors_people":"Dominique Brunato, Felice Dell'Orletta","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F."],"abstract":"In this paper we present a cross-genre study on word order variation in Italian based on automatically dependency-parsed corpora. A comparative analysis focused on dependency direction and dependency distance for major constituents in the sentence is carried out in order to assess the influence of both textual genre and linguistic complexity on the distribution of phenonemena of syntactic markedeness.","keywords":["word order","syntactic analysis","linguistic complexity","natural language processing"],"pages":"25-31","url":"https:\/\/publications.cnr.it\/doc\/382461","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"International Conference on Dependency Linguistics (Depling 2017)","conference_place":"Pisa","conference_date":"18-20\/09\/2017"},{"id":132169,"last_updated":"2023-11-06 19:32:55","id_people":366755,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"ISACCO: a corpus for investigating spoken and written language development in Italian school-age children","year":2016,"authors_people":"Dominique Brunato, Felice Dell'Orletta","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F."],"abstract":"In this paper we present ISACCO (Italian School-Age Children COrpus), a corpus of oral and written retellings of Italian-speaking children attending primary school. All texts were digitalized and automatically enriched with multi-level linguistic annotation. Preliminary explorations of both the form and the content of children's productions were carried out based on a set of features automatically extracted by NLP tools. Written retellings were manually annotated with a typology of errors belonging to three different linguistic levels. The resource, which has been made publicly available1, is conceived to support research and computational modeling of \"later language acquisition\", with an emphasis on comparative assessment of the evolution of oral and written language competencies in early school grades.","keywords":["Child language acquisition","Oral and Written language","multi-level linguistic analysis"],"pages":"63-76","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2016\/09\/04_brunato_dell-orletta.pdf","volume":"2","doi":"","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132251,"last_updated":"2023-11-06 19:32:57","id_people":385220,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Monitoraggio linguistico di Scritture Brevi: aspetti metodologici e primi risultati","year":2016,"authors_people":"D. BRUNATO, F. DELL'ORLETTA, S. MONTEMAGNI, G. VENTURI","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"Se da un lato le tecnologie del linguaggio svolgono un ruolo ormai indiscusso per l'accesso al contenuto testuale, ci\u00f2 non appare scontato quando si va a considerare il loro ruolo nella valutazione delle strutture linguistiche sottostanti al testo. Questo contributo si focalizza sulla definizione di una metodologia innovativa di monitoraggio linguistico della lingua italiana che a partire dall'output di strumenti di annotazione linguistica automatica permette di ricostruire un profilo linguistico di una collezione di testi rappresentativa di una specifica variet\u00e0 d'uso della lingua. Tale metodologia \u00e8 stata applicata a un corpus di tweet allo scopo di far luce su interrogativi aperti quali la possibilit\u00e0 di rintracciare tendenze lessicali, morfo-sintattiche e sintattiche peculiari all'interno di questa tipologia testuale; di studiare come queste tendenze si rapportino ai tratti caratterizzanti della lingua scritta e parlata; di individuare possibili differenze nella forma linguistica in cui si twittano contenuti di natura diversa.","keywords":["Trattamento Automatico del Linguaggio","Monitoraggio Linguistico","Variet\u00e0 d'Uso della Lingua","Lingua del Web"],"pages":"149-176","url":"https:\/\/publications.cnr.it\/doc\/385220","volume":"N. S. 5","doi":"","editors_people":"","editors":[""],"published":"Quaderni Aion","publisher":"Universit\u00e0 degli Studi di Napoli \"L'Orientale\" (Napoli, Italia)","issn":"1825-2796","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132171,"last_updated":"2017-02-28 15:50:00","id_people":366759,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Le tecnologie linguistico-computazionali per la leggibilit\u00e0 della comunicazione istituzionale","year":2016,"authors_people":"Dominique Brunato, Giulia Venturi","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":[""],"authors_cnr_institute":[""],"authors":["Brunato, D.","Venturi, G."],"abstract":"Il contributo illustra il ruolo delle tecnologie linguistico-computazionali per la valutazione automatica della leggibilit\u00e0 dei testi della comunicazione istituzionale e propone alcuni esempi di semplificazione semi-automatica di testi amministrativi e normativi.","keywords":["tecnologie linguistico-computazionali","valutazione automatica della leggibilit\u00e0","comunicazione istituzionale"],"pages":"119-157","url":"https:\/\/publications.cnr.it\/doc\/366759","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"Pisa University Press (Pisa, ITA)","issn":"","isbn":"978-88-6741-627-1","conference_name":"","conference_place":"","conference_date":""},{"id":132176,"last_updated":"2023-11-06 19:33:00","id_people":367760,"institutes":["ILC"],"type":"edited_volume","type_order":3,"type_people":"book","title":"Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC 2016)","year":2016,"authors_people":"Dominique Brunato, Felice Dell'Orletta, Giulia Venturi, Thomas Fran\u00e7ois, Philippe Blache","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G.","Fran\u00e7ois, T.","Blache, P."],"abstract":"Introduzione agli atti della prima edizione del workshop \"Computational Linguistics for Linguistic Complexity\" che raccoglie lavori che studiano da prospettive diverse il tema della complessit\u00e0 linguistica workshop allo scopo di promuovere una riflessione comune su approcci diversi all'indagine, al trattamento e alla valutazione di aspetti che rendono complessa la lingua.","keywords":["Linguistic Complexity","Computational Linguistics"],"pages":"1-245","url":"https:\/\/aclweb.org\/anthology\/W\/W16\/W16-41.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-4-87974-709-9","conference_name":"","conference_place":"","conference_date":""},{"id":132165,"last_updated":"2023-11-06 19:32:58","id_people":366726,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"PaCCSS-IT: A Parallel Corpus of Complex-Simple Sentences for Automatic Text Simplification","year":2016,"authors_people":"Dominique Brunato, Andrea Cimino, Felice Dell'Orletta, Giulia Venturi","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Cimino, A.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we present PaCCSS-IT, a Parallel Corpus of Complex-Simple Sentences for ITalian. To build the resource we develop a new method for automatically acquiring a corpus of complex-simple paired sentences able to intercept structural transformations and particularly suitable for text simplification. The method requires a wide amount of texts that can be easily extracted from the web making it suitable also for less-resourced languages. We test it on the Italian language making available the biggest Italian corpus for automatic text simplification.","keywords":["Automatic Text Simplification","Sentence alignment","Italian corpus"],"pages":"351-361","url":"https:\/\/www.aclweb.org\/anthology\/D\/D16\/D16-1034.pdf","volume":"","doi":"10.18653\/v1\/d16-1034","editors_people":"","editors":[""],"published":"","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-945626-25-8","conference_name":"Conference on Empirical Methods in Natural Language Processing (EMNLP 2016)","conference_place":"Austin, Texas","conference_date":"01-05\/11\/2016"},{"id":132164,"last_updated":"2023-11-06 19:33:01","id_people":366724,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Studio sull'ordinamento dei costituenti nel confronto tra generi e complessit\u00e0","year":2016,"authors_people":"Giulia Pieri, Dominique Brunato, Felice Dell'Orletta","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Pieri, G.","Brunato, D.","Dell'Orletta, F."],"abstract":"In questo articolo presentiamo uno studio sull'ordine dei costituenti in italiano basato su corpora annotati in maniera automatica fino all'analisi sintattica a dipendenze. L'indagine comparativa ha permesso di valutare l'influenza sia del genere testuale sia della complessit\u00e0 linguistica nella distribuzione dei fenomeni di marcatezza sintattica.","keywords":["Complessit\u00e0 linguistica","Corpora annotati","Generi testuali"],"pages":"5","url":"http:\/\/ceur-ws.org\/Vol-1749\/paper44.pdf","volume":"1749","doi":"","editors_people":"Basile, Pierpaolo; Corazza, Anna; Cutugno, Franco; Montemagni, Simonetta; Nissim, Malvina; Patti, Viviana; Semeraro, Giovanni; Sprugnoli, Rachele","editors":["Basile, P.","Corazza, A.","Cutugno, F.","Montemagni, S.","Nissim, M.","Patti, V.","Semeraro, G.","Sprugnoli, R."],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Third Italian Conference on Computational Linguistics (CLiC-it 2016)","conference_place":"Napoli","conference_date":"5-6\/12\/2016"},{"id":132135,"last_updated":"2023-11-06 19:33:12","id_people":359256,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"ISACCO: a corpus for investigating spoken and written language development in Italian school-age children","year":2015,"authors_people":"D. Brunato, F. Dell'Orletta","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F."],"abstract":"We present ISACCO (Italian school-age children corpus)1, a new corpus of oral and written retellings of Italian speaking children attending the primary school. All texts were digitalized and automatically enriched with linguistic information allowing preliminary explorations based on NLP features. Written retellings were also manually annotated with a typology of linguistic errors. The resource is conceived to support research and computational modeling of \"later language acquisition\", with an emphasis for comparative assessment of oral and written language skills across early school grades.","keywords":["Child language acquisition","Oral and written language","multi-level linguistic analysis"],"pages":"62-66","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2016\/03\/IsaccoCorpus.pdf","volume":"","doi":"","editors_people":"Cristina Bosco, Sara Tonelli, Fabio Massimo Zanzotto","editors":["Bosco, C.","Tonelli, S.","Zanzotto, F. M."],"published":"Proceedings of the Second Italian Conference on Computational Linguistics (CLiC-it 2015)","publisher":"Accademia University Press (Torino, ITA)","issn":"","isbn":"978-88-99200-62-6","conference_name":"Second Italian Conference on Computational Linguistics (CLiC-it 2015)","conference_place":"Trento","conference_date":"03\/12\/2015-04\/12\/2015"},{"id":132076,"last_updated":"2023-11-06 19:33:09","id_people":332693,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Design and Annotation of the First Italian Corpus for Text Simplification","year":2015,"authors_people":"Brunato D., Dell'Orletta F., Venturi G., Montemagni S.","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"In this paper, we present design and construction of the first Italian corpus for automatic and semi--automatic text simplification. In line with current approaches, we propose a new annotation scheme specifically conceived to identify the typology of changes an original sentence undergoes when it is manually simplified. Such a scheme has been applied to two aligned Italian corpora, containing original texts with corresponding simplified versions, selected as representative of two different manual simplification strategies and addressing different target reader populations. Each corpus was annotated with the operations foreseen in the annotation scheme, covering different levels of linguistic description. Annotation results were analysed with the final aim of capturing peculiarities and differences of the different simplification strategies pursued in the two corpora.","keywords":["Annotation Scheme","Automatic Text Simplification"],"pages":"31-34","url":"https:\/\/aclweb.org\/anthology\/W\/W15\/W15-1604.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-941643-47-1","conference_name":"Proceedings of LAW IX-The 9th Linguistic Annotation Workshop","conference_place":"Denver, Colorado","conference_date":"5 giugno 2015"},{"id":132005,"last_updated":"2015-02-23 16:50:00","id_people":311157,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Le tecnologie linguistico-computazionali nella misura della leggibilit\u00e0 di testi giuridici","year":2014,"authors_people":"Brunato D., Venturi G.","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":[""],"authors_cnr_institute":["048","048"],"authors":["Brunato, D.","Venturi, G."],"abstract":"Il presente contributo illustra una innovativa metodologia per il calcolo della leggibilit\u00e0 di un testo giuridico basata su strumenti di Trattamento Automatico del Linguaggio ed espressamente rivolta alla sua semplificazione. Inserendoci nel pi\u00f9 ampio filone di ricerche che affronta il tema dell'accessibilit\u00e0 della lingua del diritto, discutiamo con esempi tratti da testi reali, il caso specifico della prosa burocratico-amministrativa dal momento che l'accessibilit\u00e0 a tali documenti costituisce un elemento chiave della comunicazione istituzioni-cittadini. A nostra conoscenza, tale studio rappresenta il primo tentativo volto a mostrare come tecnologie linguistico-computazionali allo stato dell'arte per la lingua italiana incomincino ad essere mature per costituire non solo un ausilio per definire automaticamente la leggibilit\u00e0 di testi giuridici ma anche una guida per una loro stesura semplificata. Tali funzionalit\u00e0 saranno illustrate grazie a READ-IT, il primo e al momento unico strumento di valutazione della leggibilit\u00e0 oggi esistente per la lingua italiana basato su strumenti di Trattamento Automatico del Linguaggio.","keywords":[""],"pages":"111-142","url":"https:\/\/publications.cnr.it\/doc\/311157","volume":"XXIII","doi":"","editors_people":"","editors":[""],"published":"Informatica e diritto","publisher":"Edizioni Scientifiche Italiane (Firenze, Italia)","issn":"0390-0975","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132012,"last_updated":"2015-02-18 11:00:48","id_people":311792,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Complessit\u00e0 necessaria o stereotipi del \"burocratese\"? Un'indagine sulla leggibilit\u00e0 del linguaggio amministrativo da una prospettiva linguistico-computazionale","year":2014,"authors_people":"Dominique Brunato","authors_cnr":["Brunato, Dominique Pierina"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Brunato, D."],"abstract":"Questo contributo intende presentare una metodologia di ricostruzione del profilo linguistico di un corpus di testi amministrativi basata sull'uso delle tecnologie linguistico-computazionali e finalizzata alla specializzazione di un indice di leggibilit\u00e0 \"avanzato\" sulle caratteristiche di questi testi. Tale metodologia, documentata in [3], si propone di indagare la variazione linguistica tramite il monitoraggio di parametri estratti automaticamente dal testo sottoposto ad analisi linguistica multi-livello. La complessit\u00e0 della lingua della pubblica amministrazione, soprattutto nei documenti rivolti al cittadino, \u00e8 un problema ben noto e, malgrado le molteplici iniziative in favore di un linguaggio pi\u00f9 chiaro ed efficace, tratti tipici del \"burocratese\" continuano a persistere, anche quando non imposti da requisiti di legittimit\u00e0 e precisione. Un ausilio alla semplificazione pu\u00f2 venire dai sistemi per la misurazione della leggibilit\u00e0 del testo, come suggerito anche dai manuali di stile ispirati alla letteratura del Plain Language. Tuttavia le formule tradizionali, quali Gulpease [2], si limitano ad approssimare la complessit\u00e0 testuale, in quanto considerano esclusivamente parametri del testo superficiali, come la lunghezza della frase e della parola. Pi\u00f9 recentemente, \u00e8 emersa una nuova generazione di indici di leggibilit\u00e0, fondati su metodologie di Trattamento Automatico del Linguaggio, che riescono a intercettare i luoghi di complessit\u00e0 del testo in maniera pi\u00f9 granulare, computando un ampio spettro di parametri linguistici, che risultano anche maggiormente implicati nei processi di comprensione. \u00c8 il caso di READ-IT [1], lo strumento utilizzato in questo studio. L'analisi linguistica ha esplorato la distribuzione di caratteristiche lessicali, morfo-sintattiche e sintattiche, estratte automaticamente da un \"corpus parallelo monolingue\" di testi amministrativi, ovvero internamente suddiviso in due sotto-corpora: uno costituito da testi autentici delle pubbliche amministrazioni e uno dalle relative versioni semplificate, frutto di un lavoro di riscrittura coordinato da linguisti. Queste caratteristiche, gi\u00e0 risultate predittive del livello di leggibilit\u00e0 di testi giornalistici, sono state selezionate allo scopo di verificare l'incidenza delle peculiarit\u00e0 della scrittura amministrativa nella caratterizzazione della leggibilit\u00e0. I risultati hanno permesso infatti di discriminare tra aspetti di complessit\u00e0 \"ineliminabile\" e tratti tipici del burocratese: i primi sono rappresentati da quei parametri che, pur indicativi di maggior complessit\u00e0 nella lingua comune, risultano similmente distribuiti nelle due sottovariet\u00e0; i secondi sono invece quelli che contraddistinguono solo la variet\u00e0 dei testi originali, dunque quelli su cui hanno agito le riscritture. Questi dati potranno supportare tanto lo sviluppo di indici di leggibilit\u00e0 adattati alle peculiarit\u00e0 del linguaggio amministrativo, quanto le ricerche pi\u00f9 attuali sulla semplificazione semiautomatica del testo.","keywords":[""],"pages":"","url":"http:\/\/www.csfls.it\/silfi2014\/wp-content\/uploads\/2014\/08\/Atti-SILFI-2014-Riassunti.pdf","volume":"","doi":"","editors_people":"Centro di studi filologici e linguistici siciliani, Dipartimento di Scienze Umanistiche, Universit\u00e0 degli studi di Palermo","editors":["Di Studi Filologici, C.","Siciliani, L.","Di Scienze Umanistiche, D.","Di Palermo, U. D. S."],"published":"La lingua variabile nei testi letterari, artistici e funzionali contemporanei. Analisi, interpretazione, traduzione","publisher":"","issn":"","isbn":"978-88-96312-56-8","conference_name":"XIII Congresso della SILFI (Societ\u00e0 Internazionale di Linguistica e Filologia Italiana)","conference_place":"Palermo","conference_date":"22-24 settembre 2014"},{"id":131989,"last_updated":"2023-11-06 19:33:21","id_people":294073,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Defining an annotation scheme with a view to automatic text simplification","year":2014,"authors_people":"Brunato D., Dell'Orletta F., Venturi G., Montemagni S.","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"This paper presents the preliminary steps of ongoing research in the field of automatic text simplification. In line with current approaches, we propose here a new annotation scheme specifically conceived to identify the typologies of changes an original sentence undergoes when it is manually simplified. Such a scheme has been tested on a parallel corpus available for Italian, which we have first aligned at sentence level and then annotated with simplification rules.","keywords":[""],"pages":"87-92","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2014\/12\/Text-simplification.pdf","volume":"","doi":"10.12871\/CLICIT2014118","editors_people":"Roberto Basili, Alessandro Lenci, Bernardo Magnini","editors":["Basili, R.","Lenci, A.","Magnini, B."],"published":"Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)","publisher":"Pisa University Press srl (Pisa, ITA)","issn":"","isbn":"978-8-86741-472-7","conference_name":"First Italian Conference on Computational Linguistics (CLiC-it 2014)","conference_place":"Pisa","conference_date":"9-11 dicembre 2014"}]