[{"id":362241,"last_updated":"2024-01-30 16:37:08","id_people":488202,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Tell me how you write and I'll tell you what you read: a study on the writing style of book reviews","year":2023,"authors_people":"Chiara Alzetta, Felice Dell'Orletta, Alessio Miaschi, Elena Prat, Giulia Venturi","authors_cnr":["Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Miaschi, A.","Prat, E.","Venturi, G."],"abstract":"Purpose: The authors' goal is to investigate variations in the writing style of book reviews published on different social reading platforms and referring to books of different genres, which enables acquiring insights into communication strategies adopted by readers to share their reading experiences. Design\/methodology\/approach: The authors propose a corpus-based study focused on the analysis of A Good Review, a novel corpus of online book reviews written in Italian, posted on Amazon and Goodreads, and covering six literary fiction genres. The authors rely on stylometric analysis to explore the linguistic properties and lexicon of reviews and the authors conducted automatic classification experiments using multiple approaches and feature configurations to predict either the review's platform or the literary genre. Findings: The analysis of user-generated reviews demonstrates that language is a quite variable dimension across reading platforms, but not as much across book genres. The classification experiments revealed that features modelling the syntactic structure of the sentence are reliable proxies for discerning Amazon and Goodreads reviews, whereas lexical information showed a higher predictive role for automatically discriminating the genre. Originality\/value: The high availability of cultural products makes information services necessary to help users navigate these resources and acquire information from unstructured data. This study contributes to a better understanding of the linguistic characteristics of user-generated book reviews, which can support the development of linguistically-informed recommendation services. Additionally, the authors release a novel corpus of online book reviews meant to support the reproducibility and advancements of the research.","keywords":["Stylometric analysis","Genre detection","Natural language processing","Book reviews"],"pages":"23","url":"https:\/\/www.emerald.com\/insight\/content\/doi\/10.1108\/JD-04-2023-0073\/full\/html","volume":"79","doi":"10.1108\/JD-04-2023-0073","editors_people":"","editors":[""],"published":"Journal of documentation","publisher":"Emerald (Bingley, Regno Unito)","issn":"0022-0418","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":362240,"last_updated":"2023-11-07 08:25:05","id_people":488201,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"A text style transfer system for reducing the physician-patient expertise gap: An analysis with automatic and human evaluations","year":2023,"authors_people":"Luca Bacco, Felice Dell'Orletta, Huiyuan Lai, Mario Merone, Malvina Nissim","authors_cnr":["Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Bacco, L.","Dell'Orletta, F.","Lai, H.","Merone, M.","Nissim, M."],"abstract":"Physicians and patients often come from different backgrounds and have varying levels of education, which can result in communication difficulties in the healthcare process. To address this expertise gap, we present a \"Text Style Transfer\" system. Our system uses Semantic Textual Similarity techniques based on Sentence Transformers models to create pseudo-parallel datasets from a large, non-parallel corpus of lay and expert texts. This approach allowed us to train a denoising autoencoder model (BART), overcoming the limitations of previous systems. Our extensive analysis, which includes both automatic metrics and human evaluations from both lay (patients) and expert (physicians) individuals, shows that our system outperforms state-of-the-art models and is comparable to human-provided gold references in some cases.","keywords":["Healthcare","Natural language processing","Text style transfer","Text simplification"],"pages":"1-18","url":"https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0957417423013763","volume":"233","doi":"10.1016\/j.eswa.2023.120874","editors_people":"","editors":[""],"published":"Expert systems with applications","publisher":"Pergamon (Oxford, Regno Unito)","issn":"0957-4174","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":366733,"last_updated":"2024-01-10 11:03:41","id_people":491082,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Linguistic Profile of a Text and Human Ratings of Writing Quality: a Case Study on Italian L1 Learner Essays","year":2023,"authors_people":"Aldo Cerulli; Dominique Brunato; Felice Dell'Orletta","authors_cnr":["Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Cerulli, A.","Brunato, D.","Dell'Orletta, F."],"abstract":"This paper presents a study based on the linguistic profiling methodology to explore the relationship between the linguistic structure of a text and how it is perceived in terms of writing quality by humans. The approach is tested on a selection of Italian L1 learners essays, which were taken from a larger longitudinal corpus of essays written by Italian L1 students enrolled in the first and second year of lower secondary school. Human ratings of writing quality by Italian native speakers were collected through a crowdsourcing task, in which annotators were asked to read pairs of essays and rated which one they believed to be better written. By analyzing these ratings, the study identifies a variety of linguistic phenomena spanning across distinct levels of linguistic description that distinguish the essays considered as 'winners' and evaluates the impact of students' errors on the human perception of writing quality.","keywords":["human ratings","text quality","Natural Language Processing","learner corpus"],"pages":"7-34","url":"https:\/\/www.ai-lc.it\/wp-content\/uploads\/2023\/09\/IJCOL_9_1_1_cerulli_et_al.pdf","volume":"1","doi":"","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":358095,"last_updated":"2023-11-06 19:31:05","id_people":482226,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Linguistic markers of demoralization improvement in schizophrenia: A pilot study","year":2023,"authors_people":"Folesani F.; Belvederi Murri M.; Puggioni C.; Tiberto E.; Marella M.; Toffanin T.; Zerbinati L.; Nanni M.G.; Caruso R.; Brunato D.; Ravelli A.A.; Dell'Orletta F.; Chochinov H.M.; Grassi L.","authors_cnr":["Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Folesani, F.","Belvederi, M. M.","Puggioni, C.","Tiberto, E.","Marella, M.","Toffanin, T.","Zerbinati, L.","Nanni, M. G.","Caruso, R.","Brunato, D.","Ravelli, A. A.","Dell'Orletta, F.","Chochinov, H. M.","Grassi, L."],"abstract":"Background and objectives: Individuals with schizophrenia display language impairments involving pragmatics, semantics and syntax. Language impairments may show diagnostic specificity and could relate to the ability of engaging in psychotherapy. This pilot study sought to: (1) identify linguistic features that might differentiate individuals with schizophrenia from distressed controls without psychotic symptoms; and (2) examine the association between linguistic abilities and clinical changes during psychotherapy. Methods: We recruited patients with schizophrenia and a comparison group of individuals with demoralization and distress due to cancer. Participants underwent Dignity Therapy (DT), an existentially-oriented brief psychotherapy focused on legacy and subjective dignity. Verbatim transcripts of the DT sessions were analysed using Natural Language Processing (NLP). In addition, we measured changes in levels of demoralization and dignity-related distress before and after DT, exploring the association with linguistic variables with network analysis. Results: Patients with schizophrenia could be differentiated from those with cancer-related distress using only three out of 141 linguistic variables: total number of words, number of prepositional chains and conversational elements. Across groups, better levels of discourse coherence and higher number of arguments controlled by a predicate (verb \"arity\") were associated with larger improvements in demoralization and, indirectly, dignity-related distress. Conclusions: Reproducible linguistic markers may be able to differentiate individuals with schizophrenia from those with less severe psychopathology, and to predict better uptake of psychotherapy independent from diagnosis. Future studies should explore whether linguistic features derived from NLP may be exploited as accessible diagnostic or prognostic markers to tailor psychotherapy and other interventions in schizophrenia.","keywords":["Schizophrenia","Dignity Therapy","Natural Language Processing","Linguistic Profiling","Psychotherapy"],"pages":"","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85153800425&origin=inward","volume":"","doi":"10.1016\/j.ejpsy.2023.03.001","editors_people":"","editors":[""],"published":"The European journal of psychiatry","publisher":"European Journal of Psychiatry (Saragosse, Spagna)","issn":"0213-6163","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":362242,"last_updated":"2024-01-30 16:38:01","id_people":488203,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Testing the Effectiveness of the Diagnostic Probing Paradigm on Italian Treebanks","year":2023,"authors_people":"Alessio Miaschi, Chiara Alzetta, Dominique Brunato, Felice Dell'Orletta, Giulia Venturi","authors_cnr":["Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Alzetta, C.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"The outstanding performance recently reached by neural language models (NLMs) across many natural language processing (NLP) tasks has steered the debate towards understanding whether NLMs implicitly learn linguistic competence. Probes, i.e., supervised models trained using NLM representations to predict linguistic properties, are frequently adopted to investigate this issue. However, it is still questioned if probing classification tasks really enable such investigation or if they simply hint at surface patterns in the data. This work contributes to this debate by presenting an approach to assessing the effectiveness of a suite of probing tasks aimed at testing the linguistic knowledge implicitly encoded by one of the most prominent NLMs, BERT. To this aim, we compared the performance of probes when predicting gold and automatically altered values of a set of linguistic features. Our experiments were performed on Italian and were evaluated across BERT's layers and for sentences with different lengths. As a general result, we observed higher performance in the prediction of gold values, thus suggesting that the probing model is sensitive to the distortion of feature values. However, our experiments also showed that the length of a sentence is a highly influential factor that is able to confound the probing model's predictions.","keywords":["Neural language model","Probing tasks","Treebanks"],"pages":"19","url":"https:\/\/www.mdpi.com\/2078-2489\/14\/3\/144","volume":"14","doi":"10.3390\/info14030144","editors_people":"","editors":[""],"published":"Information (Basel)","publisher":"MDPI (Basel, Svizzera)","issn":"2078-2489","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":362243,"last_updated":"2024-01-30 16:38:00","id_people":488204,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Analysing Deception in Witness Memory Though Linguistic Styles in Spontaneous Language","year":2023,"authors_people":"Sara Sol\u00e0 Sales, Chiara Alzetta, Carmen Moret Tatay, Felice Dell'Orletta","authors_cnr":["Alzetta, Chiara","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Sales, S. S.","Alzetta, C.","Tatay, C. M.","Dell'Orletta, F."],"abstract":"The act of lying and its detection have raised interest in many fields, from the legal system to our daily lives. Considering that testimonies are commonly based on linguistic parameters, natural language processing, a research field concerned with programming computers to process and analyse natural language texts or speech, is a topic of interest on this front. This study aimed to examine the linguistic styles of simulated deception and true testimonies collected with the aim of studying witness memory. Study participants were asked to act as a witness of a crime by retelling the story they had just read. Cognitive interviewing techniques were used to collect testimony under two conditions: truth and simulated deception. A sample of 48 participants volunteered to participate in the study. Analyses of the linguistic indicators and content were carried out. Specifically, we performed a comparison of testimonies of the same participant by condition to analyse the variation between (i) lexical and (ii) linguistic features and (iii) content and speech characteristics (disfluencies) depending on the narrative condition. Concerning lexical properties, adjectives were the most-varying grammatical category between truthful and deceptive testimonies. Furthermore, in the linguistic analysis, we observed that truthful testimonies were generally longer than deceptive ones in terms of the number of words and sentences and also characterised by more articulated sentence structures, and these differences were also statistically significant. Regarding the analysis of the content, cognitive criteria (details) and admitting lack of memory were more present in truthful statements. By providing an objective measure, these results are of interest in developing NLP tools for assessing the credibility of testimonies in forensics.","keywords":["Natural language processing","Simulated deception","Stylometric analysis"],"pages":"26","url":"https:\/\/www.mdpi.com\/2076-3425\/13\/2\/317","volume":"13","doi":"10.3390\/brainsci13020317","editors_people":"","editors":[""],"published":"Brain sciences","publisher":"Molecular Diversity Preservation International (Basel)","issn":"2076-3425","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":366732,"last_updated":"2024-01-10 11:02:52","id_people":491078,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Coherent or Not? Stressing a Neural Language Model for Discourse Coherence in Multiple Languages","year":2023,"authors_people":"Dominique Brunato; Felice Dell'Orletta; Irene Dini; Andrea Amelio Ravelli","authors_cnr":["Ravelli, Andrea Amelio","Dini, Irene","Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F.","Dini, I.","Ravelli, A. A."],"abstract":"In this study, we investigate the capability of a Neural Language Model (NLM) to distinguish between coherent and incoherent text, where the latter has been artificially created to gradually undermine local coherence within text. While previous research on coherence assessment using NLMs has primarily focused on English, we extend our investigation to multiple languages. We employ a consistent evaluation framework to compare the performance of monolingual and multilingual models in both in-domain and out-domain settings. Additionally, we explore the model's performance in a cross-language scenario.","keywords":["text coherence","neural language models","multilingual corpora"],"pages":"10690-10700","url":"https:\/\/aclanthology.org\/2023.findings-acl.680","volume":"","doi":"10.18653\/v1\/2023.findings-acl.680","editors_people":"","editors":[""],"published":"","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-959429-62-3","conference_name":"61st Annual Meeting of the Association for Computational Linguistics (ACL 2023)","conference_place":"Toronto, Canada","conference_date":"9-14\/07\/2023"},{"id":343152,"last_updated":"2023-11-06 19:31:18","id_people":472298,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Natural language processing in low back pain and spine diseases: A systematic review","year":2022,"authors_people":"Bacco L.; Russo F.; Ambrosio L.; D'Antoni F.; Vollero L.; Vadala G.; Dell'Orletta F.; Merone M.; Papalia R.; Denaro V.","authors_cnr":["Bacco, Luca","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Bacco, L.","Russo, F.","Ambrosio, L.","D'Antoni, F.","Vollero, L.","Vadala, G.","Dell'Orletta, F.","Merone, M.","Papalia, R.","Denaro, V."],"abstract":"Natural Language Processing (NLP) is a discipline at the intersection between Computer Science (CS), Artificial Intelligence (AI), and Linguistics that leverages unstructured human-interpretable (natural) language text. In recent years, it gained momentum also in health-related applications and research. Although preliminary, studies concerning Low Back Pain (LBP) and other related spine disorders with relevant applications of NLP methodologies have been reported in the literature over the last few years. It motivated us to systematically review the literature comprised of two major public databases, PubMed and Scopus. To do so, we first formulated our research question following the PICO guidelines. Then, we followed a PRISMA-like protocol by performing a search query including terminologies of both technical (e.g., natural language and computational linguistics) and clinical (e.g., lumbar and spine surgery) domains. We collected 221 non-duplicated studies, 16 of which were eligible for our analysis. In this work, we present these studies divided into sub-categories, from both tasks and exploited models' points of view. Furthermore, we report a detailed description of techniques used to extract and process textual features and the several evaluation metrics used to assess the performance of the NLP models. However, what is clear from our analysis is that additional studies on larger datasets are needed to better define the role of NLP in the care of patients with spinal disorders.","keywords":["natural language processing","Low Back Pain","Survey"],"pages":"","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85135163810&origin=inward","volume":"9","doi":"10.3389\/fsurg.2022.957085","editors_people":"","editors":[""],"published":"Frontiers in surgery","publisher":"Frontiers Media (Lausanne, Svizzera)","issn":"2296-875X","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132500,"last_updated":"2023-11-06 19:31:16","id_people":464954,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Linguistically-Based Comparison of Different Approaches to Building Corpora for Text Simplification: A Case Study on Italian","year":2022,"authors_people":"Dominique Brunato, Felice Dell'Orletta, Giulia Venturi","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper, we present an overview of existing parallel corpora for Automatic Text Simplification (ATS) in different languages focusing on the approach adopted for their construction. We make the main distinction between manual and (semi)-automatic approaches in order to investigate in which respect complex and simple texts vary and whether and how the observed modifications may depend on the underlying approach. To this end, we perform a two-level comparison on Italian corpora, since this is the only language, with the exception of English, for which there are large parallel resources derived through the two approaches considered. The first level of comparison accounts for the main types of sentence transformations occurring in the simplification process, the second one examines the results of a linguistic profiling analysis based on Natural Language Processing techniques and carried out on the original and the simple version of the same texts. For both levels of analysis, we chose to focus our discussion mostly on sentence transformations and linguistic characteristics that pertain to the morpho-syntactic and syntactic structure of the sentence.","keywords":["linguistic complexity","Italian language","corpus construction","text simplification","aligned corpora"],"pages":"1-19","url":"https:\/\/www.frontiersin.org\/articles\/10.3389\/fpsyg.2022.707630\/full","volume":"13","doi":"10.3389\/fpsyg.2022.707630","editors_people":"","editors":[""],"published":"Frontiers in Psychology","publisher":"Frontiers Research Foundation (Switzerland)","issn":"1664-1078","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":351804,"last_updated":"2023-11-06 19:31:10","id_people":474123,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Analisi della scrittura giovanile da una prospettiva linguistico-computazionale: il caso di studio della Fanfiction","year":2022,"authors_people":"Dominique Brunato, Andrea Mattei, Felice Dell'Orletta","authors_cnr":["Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Mattei, A.","Dell'Orletta, F."],"abstract":"This paper presents a study aimed at characterizing the linguistic style of an emerging literary genre of the web, particularly appreciated by teens and young adults: fanfiction. By relying on Natural Language Processing approaches, and in particular on the methodology of linguistic profiling applied to a novel corpus of Italian fanfiction stories inspired by the fantasy saga \"Harry Potter\", we investigate the relationship between linguistic style and 'success', measured in terms of number of reviews obtained by the readers. We show that it is possible to detect a set of features, among a wide set of linguistic ones modeling lexical, morpho-syntactic and syntactic phenomena, which help more in discriminating between 'successful' and 'unsuccessful' fanfics.","keywords":["Trattamento Automatico del Linguaggio","stilometria computazionale","linguistic profiling","corpora","fanfiction"],"pages":"171-189","url":"https:\/\/publications.cnr.it\/doc\/474123","volume":"2021\/3","doi":"","editors_people":"","editors":[""],"published":"Rassegna Italiana di Linguistica Applicata (Testo stamp.)","publisher":"Bulzoni (Roma, Italia)","issn":"0033-9725","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":352735,"last_updated":"2023-11-09 18:10:01","id_people":475015,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"On Robustness and Sensitivity of a Neural Language Model: A Case Study on Italian L1 Learner Errors","year":2022,"authors_people":"Miaschi, Alessio and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper, we propose a comprehensive linguistic study aimed at assessing the implicit behavior of one of the most prominent Neural Language Models (NLM) based on Transformer architectures, BERT (Devlin et al., 2019), when dealing with a particular source of noisy data, namely essays written by L1 Italian learners containing a variety of errors targeting grammar, orthography and lexicon. Differently from previous works, we focus on the pre-training stage and we devise two complementary evaluation tasks aimed at assessing the impact of errors on sentence-level inner representations in terms of semantic robustness and linguistic sensitivity. While the first evaluation perspective is meant to probe the model's ability to encode the semantic similarity between sentences also in the presence of errors, the second type of probing task evaluates the influence of errors on BERT's implicit knowledge of a set of raw and morpho-syntactic properties of a sentence. Our experiments show that BERT's ability to compute sentence similarity and to correctly encode multi-leveled linguistic information of a sentence are differently modulated by the category of errors and that the error hierarchies in terms of robustness and sensitivity change across layer-wise representations.","keywords":["nlp","interpretability","transformers","learner errors"],"pages":"426-438","url":"https:\/\/doi.org\/10.1109\/TASLP.2022.3226333","volume":"","doi":"10.1109\/TASLP.2022.3226333","editors_people":"","editors":[""],"published":"IEEE\/ACM transactions on audio, speech, and language processing (Online)","publisher":"[Institute of Electrical and Electronics Engineers] ([Piscataway NJ], Stati Uniti d'America)","issn":"2329-9304","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":341004,"last_updated":"2023-11-06 19:31:21","id_people":469733,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Probing Linguistic Knowledge in Italian Neural Language Models across Language Varieties","year":2022,"authors_people":"Miaschi, Alessio and Sarti, Gabriele and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Sarti, G.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper, we present an in-depth investigation of the linguistic knowledge encoded by the transformer models currently available for the Italian language. In particular, we investigate how the complexity of two different architectures of probing models affects the performance of the Transformers in encoding a wide spectrum of linguistic features. Moreover, we explore how this implicit knowledge varies according to different textual genres and language varieties.","keywords":["nlp","transformer models","interpretability"],"pages":"25-44","url":"http:\/\/www.aaccademia.it\/ita\/scheda-libro?aaref=1518","volume":"","doi":"10.4000\/ijcol.965","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132501,"last_updated":"2023-11-06 19:31:13","id_people":464964,"institutes":["ILC"],"type":"book","type_order":1,"type_people":"book","title":"La fede dichiarata. Un'analisi linguistico-computazionale","year":2022,"authors_people":"Giulia Venturi, Andrea Cimino, Felice Dell'Orletta","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Venturi, G.","Cimino, A.","Dell'Orletta, F."],"abstract":"Il volume indaga l'apporto di tecnologie basate sul Natural Language Processing (NLP) all'analisi di un corpus di trascrizioni di 164 interviste orali raccolte durante la ricerca 2017 sulla \"Religiosit\u00e0 in Italia\". Gli autori illustrano metodologie e strumenti che permettono di trasformare l'informazione implicitamente contenuta nelle interviste in informazione esplicitamente strutturata. Il risultato finale di questo processo interpretativo spazia dall'acquisizione di conoscenze lessicali e terminologiche complesse alla loro organizzazione in strutture proto-concettuali, fino ad arrivare alla qualificazione dell'atteggiamento con il quale l'intervistato si esprime. Il lettore viene accompagnato a scoprire quale sia il valore aggiunto delle analisi basate su NLP e quali nuovi orizzonti di ricerca siano aperti da queste analisi.","keywords":["Knowledge Extraction","Knowledge Organization"],"pages":"1-181","url":"https:\/\/publications.cnr.it\/doc\/464964","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"Franco Angeli Editore (Milano, ITA)","issn":"","isbn":"978-88-351-2146-6","conference_name":"","conference_place":"","conference_date":""},{"id":343041,"last_updated":"2023-11-06 19:31:12","id_people":472144,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"How About Time? Probing a Multilingual Language Model for Temporal Relations","year":2022,"authors_people":"Caselli T., Dini I., Dell'Orletta F.","authors_cnr":["Dini, Irene","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Caselli, T.","Dini, I.","Dell'Orletta, F."],"abstract":"This paper presents a comprehensive set of probing experiments using a multilingual language model, XLM-R, for temporal relation classification between events in four languages. Results show an advantage of contextualized embeddings over static ones and a detrimental role of sentence level embeddings. While obtaining competitive results against state-of-the-art systems, our probes indicate a lack of suitable encoded information to properly address this task.","keywords":["Natural Language Processing","Neural Language Models","Temporal Relation Classification"],"pages":"","url":"https:\/\/aclanthology.org\/2022.coling-1.283\/","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of the 29th International Conference on Computational Linguistics, COLING 2022","publisher":"","issn":"","isbn":"","conference_name":"International Conference on Computational Linguistics (COLING)","conference_place":"Gyeongju, Republic of Kore","conference_date":"12-17 ottobre 2022"},{"id":343042,"last_updated":"2023-11-06 19:31:20","id_people":472145,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"On the Nature of BERT: Correlating Fine-Tuning and Linguistic Competence","year":2022,"authors_people":"Merendi F., Dell'Orletta F., Venturi G.","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Merendi, F.","Dell'Orletta, F.","Venturi, G."],"abstract":"Several studies in the literature on the interpretation of Neural Language Models (NLM) focus on the linguistic generalization abilities of pre-trained models. However, little attention is paid to how the linguistic knowledge of the models changes during the fine-tuning steps. In this paper, we contribute to this line of research by showing to what extent a wide range of linguistic phenomena are forgotten across 50 epochs of fine-tuning, and how the preserved linguistic knowledge is correlated with the resolution of the fine-tuning task. To this end, we considered a quite understudied task where linguistic information plays the main role, i.e. the prediction of the evolution of written language competence of native language learners. In addition, we investigate whether it is possible to predict the fine-tuned NLM accuracy across the 50 epochs solely relying on the assessed linguistic competence. Our results are encouraging and show a high relationship between the model's linguistic competence and its ability to solve a linguistically-based downstream task.","keywords":["Natural Language Processing","Neural Language Models","Linguistic Generalization Abilities"],"pages":"","url":"https:\/\/aclanthology.org\/2022.coling-1.275","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"International Conference on Computational Linguistics (COLING)","conference_place":"Gyeongju, Republic of Kore","conference_date":"12-17 ottobre 2022"},{"id":341003,"last_updated":"2023-11-06 19:31:22","id_people":469732,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Punctuation Restoration in\u00a0Spoken Italian Transcripts with\u00a0Transformers","year":2022,"authors_people":"Miaschi A.; Ravelli A.A.; Dell'Orletta F.","authors_cnr":["Miaschi, Alessio","Ravelli, Andrea Amelio","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Ravelli, A. A.","Dell'Orletta, F."],"abstract":"In this paper, we propose an evaluation of a Transformer-based punctuation restoration model for the Italian language. Experimenting with a BERT-base model, we perform several fine-tuning with different training data and sizes and tested them in an in- and cross-domain scenario. Moreover, we conducted an error analysis of the main weaknesses of the model related to specific punctuation marks. Finally, we test our system either quantitatively and qualitatively, by offering a typical task-oriented and a perception-based acceptability evaluation.","keywords":["nlp","transformer models","puncutation restoration"],"pages":"245-260","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85135083576&origin=inward","volume":"13196 LNAI","doi":"10.1007\/978-3-031-08421-8_17","editors_people":"","editors":[""],"published":"Lecture notes in computer science","publisher":"Springer (Berlin, Germania)","issn":"0302-9743","isbn":"","conference_name":"AIxIA 2021-Advances in Artificial Intelligence","conference_place":"","conference_date":"1-3\/12\/2021"},{"id":352734,"last_updated":"2023-11-06 19:31:11","id_people":474890,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Evaluating Text-To-Text Framework for Topic and Style Classification of Italian texts","year":2022,"authors_people":"Papucci, Michele; De Nigris, Chiara; Miaschi, Alessio; Dell'Orletta, Felice","authors_cnr":["Miaschi, Alessio","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Papucci, M.","De Nigris, C.","Miaschi, A.","Dell'Orletta, F."],"abstract":"In this paper, we propose an extensive evaluation of the first text-to-text Italian Neural Language Model (NLM), IT5 [1], on a classification scenario. In particular, we test the performance of IT5 on several tasks involving both the classification of the topic and the style of a set of Italian posts. We assess the model in two different configurations, single- and multi-task classification, and we compare it with a more traditional NLM based on the Transformer architecture (i.e. BERT). Moreover, we test its performance in a few-shot learning scenario. We also perform a qualitative investigation on the impact of label representations in modeling the classification of the IT5 model. Results show that IT5 could achieve good results, although generally lower than the BERT model. Nevertheless, we observe a significant performance improvement of the Text-to-text model in a multi-task classification scenario. Finally, we found that altering the representation of the labels mainly impacts the classification of the topic.","keywords":["bert","style classification","t5","text-to-text","topic classification","transformers"],"pages":"56-70","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85143252156&origin=inward","volume":"3287","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"Sixth Workshop on Natural Language for Artificial Intelligence, NL4AI 2022","conference_place":"","conference_date":"30\/11\/2022"},{"id":341608,"last_updated":"2023-11-06 19:31:23","id_people":470081,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"SemEval-2022 Task 3: PreTENS-Evaluating Neural Networks on Presuppositional Semantic Knowledge","year":2022,"authors_people":"Roberto Zamparelli, Shammur A Chowdhury, Dominique Brunato, Cristiano Chesi, Felice Dell'Orletta, Arid Hasan, Giulia Venturi","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Zamparelli, R.","Chowdhury, S. A.","Brunato, D.","Chesi, C.","Dell'Orletta, F.","Hasan, A.","Venturi, G."],"abstract":"We report the results of the SemEval 2022 Task 3, PreTENS, on evaluation the acceptability of simple sentences containing constructions whose two arguments are presupposed to be or not to be in an ordered taxonomic relation. The task featured two sub-tasks articulated as: (i) binary prediction task and (ii) regression task, predicting the acceptability in a continuous scale. The sentences were artificially generated in three languages (English, Italian and French). 21 systems, with 8 system papers were submitted for the task, all based on various types of fine-tuned transformer systems, often with ensemble methods and various data augmentation techniques. The best systems reached an F1-macro score of 94.49 (sub-task1) and a Spearman correlation coefficient of 0.80 (sub-task2), with interesting variations in specific constructions and\/or languages.","keywords":["Neural Networks","Presuppositional Knowledge","Evaluation"],"pages":"228-238","url":"https:\/\/aclanthology.org\/2022.semeval-1.29.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"16th International Workshop on Semantic Evaluation (SemEval-2022)","conference_place":"Seattle","conference_date":"14-15\/07\/2022"},{"id":343043,"last_updated":"2023-11-06 19:31:27","id_people":472153,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Explainable sentiment analysis: A hierarchical transformer-based extractive summarization approach","year":2021,"authors_people":"Bacco L.; Cimino A.; Dell'orletta F.; Merone M.","authors_cnr":["Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Bacco, L.","Cimino, A.","Dell'Orletta, F.","Merone, M."],"abstract":"In recent years, the explainable artificial intelligence (XAI) paradigm is gaining wide research interest. The natural language processing (NLP) community is also approaching the shift of paradigm: building a suite of models that provide an explanation of the decision on some main task, without affecting the performances. It is not an easy job for sure, especially when very poorly interpretable models are involved, like the almost ubiquitous (at least in the NLP literature of the last years) transformers. Here, we propose two different transformer-based methodologies exploiting the inner hierarchy of the documents to perform a sentiment analysis task while extracting the most important (with regards to the model decision) sentences to build a summary as the explanation of the output. For the first architecture, we placed two transformers in cascade and leveraged the attention weights of the second one to build the summary. For the other architecture, we employed a single transformer to classify the single sentences in the document and then combine the probability scores of each to perform the classification and then build the summary. We compared the two methodologies by using the IMDB dataset, both in terms of classification and explainability performances. To assess the explainability part, we propose two kinds of metrics, based on benchmarking the models' summaries with human annotations. We recruited four independent operators to annotate few documents retrieved from the original dataset. Furthermore, we conducted an ablation study to highlight how implementing some strategies leads to important improvements on the explainability performance of the cascade transformers model.","keywords":["Natural Language Processing","Sentiment Analysis","Explainable IA"],"pages":"","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85114289346&origin=inward","volume":"10","doi":"10.3390\/electronics10182195","editors_people":"","editors":[""],"published":"Electronics (Basel)","publisher":"MDPI (Basel)","issn":"2079-9292","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132452,"last_updated":"2023-11-06 19:31:24","id_people":454570,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"A NLP-based stylometric approach for tracking the evolution of L1 written language competence","year":2021,"authors_people":"Miaschi, Alessio and Brunato, Dominique and Dell'Orletta, Felice","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Brunato, D.","Dell'Orletta, F."],"abstract":"In this study we present a Natural Language Processing (NLP)-based stylometric approach for tracking the evolution of written language competence in Italian L1 learners. The approach relies on a wide set of linguistically motivated features capturing stylistic aspects of a text, which were extracted from students' essays contained in CItA (Corpus Italiano di Apprendenti L1), the first longitudinal corpus of texts written by Italian L1 learners enrolled in the first and second year of lower secondary school. We address the problem of modeling written language development as a supervised classification task consisting in predicting the chronological order of essays written by the same student at different temporal spans. The promising results obtained in several classification scenarios allow us to conclude that it is possible to automatically model the highly relevant changes affecting written language evolution across time, as well as identifying which features are more predictive of this process. In the last part of the article, we focus the attention on the possible influence of background variables on language learning and we present preliminary results of a pilot study aiming at understanding how the observed developmental patterns are affected by information related to the school environment of the student.","keywords":["stylometry","computational linguistics","language competence"],"pages":"71-105","url":"https:\/\/www.jowr.org\/abstracts\/vol13_1\/Miaschi_et_al_2021_13_1_abstract.html","volume":"vol. 13","doi":"10.17239\/jowr-2021.13.01.03","editors_people":"","editors":[""],"published":"Journal of Writing Research","publisher":"Universiteit Antwerpen (Antwerpen, Belgio)","issn":"2030-1006","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":343045,"last_updated":"2023-11-06 19:31:29","id_people":472158,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Human Perception in Natural Language Generation","year":2021,"authors_people":"De Mattei L.; Lai H.; Dell'Orletta F.; Nissim M.","authors_cnr":["Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["De Mattei, L.","Lai, H.","Dell'Orletta, F.","Nissim, M."],"abstract":"We take a collection of short texts, some of which are human-written, while others are automatically generated, and ask subjects, who are unaware of the texts' source, whether they perceive them as human-produced. We use this data to fine-tune a GPT-2 model to push it to generate more human-like texts, and observe that the production of this fine-tuned model is indeed perceived as more human-like than that of the original model. Contextually, we show that our automatic evaluation strategy correlates well with human judgements. We also run a linguistic analysis to unveil the characteristics of human- vs machine-perceived language.","keywords":["Natural Language Generation","Neural Language Models","Evaluation"],"pages":"15-23","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85123713456&origin=inward","volume":"","doi":"10.18653\/v1\/2021.gem-1.2","editors_people":"","editors":[""],"published":"Proceedings of the First Workshop on Generation Evaluation and Metrics (GEM 2021)","publisher":"","issn":"","isbn":"978-1-954085-67-1","conference_name":"First Workshop on Generation Evaluation and Metrics (GEM 2021)","conference_place":"Online","conference_date":"05\/08\/2021"},{"id":343044,"last_updated":"2023-11-06 19:31:25","id_people":472155,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Analyzing the Interaction between the Reader's Voice and the Linguistic Structure of the Text: a Preliminary Study","year":2021,"authors_people":"Iavarone B., Morelli M. S., Brunato D., Ghiasi S., Scilingo E. P., Vanello N., Dell'Orletta F., Greco A.","authors_cnr":["Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Iavarone, B.","Morelli, M. S.","Brunato, D.","Ghiasi, S.","Scilingo, E. P.","Vanello, N.","Dell'Orletta, F.","Greco, A."],"abstract":"In this study, we present a preliminary analysis of the relationship between the linguistic profile of a text and the voice properties of the reader aiming to improve the speech-based emotion recognition systems. To this aim, we recorded the speech signals from a group of 32 healthy volunteers reading aloud neutral and affective texts and used the BioVoice toolbox to compute some of the main speech features. The selected texts were analyzed to quantify their lexical, morpho-syntactic, and syntactic content. Correlation and Support Vector Regressor analyses between linguistic and speech features have shown a significant modulation of some voice acoustic properties performed by the linguistic structure of the text. Particularly, a significant effect was shown on some specific speech features often used for the assessment of human emotional state (e.g., F0). This suggests that the lexical, morpho-syntactic, and syntactic properties could play an important role in the emotional dynamics of a person.","keywords":["Natural Language Processing","Speech analysis","linguistic profile"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/472155","volume":"","doi":"10.36253\/978-88-5518-449-6","editors_people":"","editors":[""],"published":"Proceedings of 12th INTERNATIONAL WORKSHOP \"MODELS AND ANALYSIS OF VOCAL EMISSIONS FOR BIOMEDICAL APPLICATIONS\"","publisher":"","issn":"","isbn":"978-88-5518-448-9","conference_name":"12th INTERNATIONAL WORKSHOP \"MODELS AND ANALYSIS OF VOCAL EMISSIONS FOR BIOMEDICAL APPLICATIONS\"","conference_place":"Firenze, Italia","conference_date":"14-16\/12\/2021"},{"id":132487,"last_updated":"2023-11-06 19:31:30","id_people":463833,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Probing tasks under pressure","year":2021,"authors_people":"Miaschi A.; Alzetta C.; Brunato D.; Dell'Orletta F.; Venturi G.","authors_cnr":["Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Alzetta, C.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"Probing tasks are frequently used to evaluate whether the representations of Neural Language Models (NLMs) encode linguistic information. However, it is still questioned if probing classification tasks really enable such investigation or they simply hint for surface patterns in the data. We present a method to investigate this question by comparing the accuracies of a set of probing tasks on gold and automatically generated control datasets. Our results suggest that probing tasks can be used as reliable diagnostic methods to investigate the linguistic information encoded in NLMs representations.","keywords":["Neural Language Models","Linguistic probing","Treebanks"],"pages":"1-7","url":"http:\/\/ceur-ws.org\/Vol-3033\/paper29.pdf","volume":"3033","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"8th Italian Conference on Computational Linguistics (CLIC-it 2021)","conference_place":"Milano","conference_date":"29\/06-01\/07\/2022"},{"id":132451,"last_updated":"2023-11-06 19:31:35","id_people":454441,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"What Makes My Model Perplexed? A Linguistic Investigation on Neural Language Models Perplexity","year":2021,"authors_people":"Miaschi, Alessio and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"This paper presents an investigation aimed at studying how the linguistic structure of a sentence affects the perplexity of two of the most popular Neural Language Models (NLMs), BERT and GPT-2. We first compare the sentence-level likelihood computed with BERT and the GPT-2's perplexity showing that the two metrics are correlated. In addition, we exploit linguistic features capturing a wide set of morpho-syntactic and syntactic phenomena showing how they contribute to predict the perplexity of the two NLMs.","keywords":["nlp","interpretability","deep learning"],"pages":"40-47","url":"https:\/\/www.aclweb.org\/anthology\/2021.deelio-1.5","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-954085-30-5","conference_name":"2nd Workshop on Knowledge Extraction and Integrationfor Deep Learning Architectures","conference_place":"","conference_date":"10\/06\/2021"},{"id":341002,"last_updated":"2023-11-06 19:31:26","id_people":469731,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Evaluating Transformer Models for Punctuation Restoration in Italian","year":2021,"authors_people":"Miaschi A.; Ravelli A.A.; Dell'Orletta F.","authors_cnr":["Miaschi, Alessio","Ravelli, Andrea Amelio","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Ravelli, A. A.","Dell'Orletta, F."],"abstract":"In this paper, we propose an evaluation of a Transformerbased punctuation restoration model for the Italian language. Experimenting with a BERT-base model, we perform several fine-tuning with different training data and sizes and tested them in an in- and crossdomain scenario. Moreover, we offer a comparison in a multilingual setting with the same model fine-tuned on English transcriptions. Finally, we conclude with an error analysis of the main weaknesses of the model related to specific punctuation marks.","keywords":["transformer models","nlp","punctuation restoration"],"pages":"","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85121647978&origin=inward","volume":"3015","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"5th Workshop on Natural Language for Artificial Intelligence (NL4AI 2021)","conference_place":"","conference_date":"29\/11\/2021"},{"id":132450,"last_updated":"2023-11-06 19:31:28","id_people":454440,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"How Do BERT Embeddings Organize Linguistic Knowledge?","year":2021,"authors_people":"Puccetti, Giovanni and Miaschi, Alessio and Dell'Orletta, Felice","authors_cnr":["Miaschi, Alessio","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Puccetti, G.","Miaschi, A.","Dell'Orletta, F."],"abstract":"Several studies investigated the linguistic information implicitly encoded in Neural Language Models. Most of these works focused on quantifying the amount and type of information available within their internal representations and across their layers. In line with this scenario, we proposed a different study, based on Lasso regression, aimed at understanding how the information encoded by BERT sentence-level representations is arrange within its hidden units. Using a suite of several probing tasks, we showed the existence of a relationship between the implicit knowledge learned by the model and the number of individual units involved in the encodings of this competence. Moreover, we found that it is possible to identify groups of hidden units more relevant for specific linguistic properties.","keywords":["nlp","interpretability","deep learning"],"pages":"48-57","url":"https:\/\/www.aclweb.org\/anthology\/2021.deelio-1.6","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-954085-30-5","conference_name":"2nd Workshop on Knowledge Extraction and Integrationfor Deep Learning Architectures","conference_place":"","conference_date":"10\/06\/2021"},{"id":132502,"last_updated":"2023-11-06 19:31:34","id_people":464972,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"That Looks Hard: Characterizing Linguistic Complexity in Humans and Language Models","year":2021,"authors_people":"Sarti G, Brunato D, Dell'Orletta F","authors_cnr":["Dell'Orletta, Felice","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","21125"],"authors_cnr_institute":[""],"authors":["Sarti, G.","Brunato, D.","Dell'Orletta, F."],"abstract":"This paper investigates the relationship between two complementary perspectives in the human assessment of sentence complexity and how they are modeled in a neural language model (NLM). The first perspective takes into account multiple online behavioral metrics obtained from eye-tracking recordings. The second one concerns the offline perception of complexity measured by explicit human judgments. Using a broad spectrum of linguistic features modeling lexical, morpho-syntactic, and syntactic properties of sentences, we perform a comprehensive analysis of linguistic phenomena associated with the two complexity viewpoints and report similarities and differences. We then show the effectiveness of linguistic features when explicitly leveraged by a regression model for predicting sentence complexity and compare its results with the ones obtained by a fine-tuned neural language model. We finally probe the NLM's linguistic competence before and after fine-tuning, highlighting how linguistic information encoded in representations changes when the model learns to predict complexity.","keywords":["linguistic complexity","eyetracking","human evaluation"],"pages":"48-60","url":"https:\/\/aclanthology.org\/2021.cmcl-1.5","volume":"","doi":"10.18653\/v1\/2021.cmcl-1.5","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-954085-35-0","conference_name":"Proceedings of Workshop on Cognitive Modeling and Computational Linguistics (CMCL 2021)","conference_place":"","conference_date":"10\/06\/2021"},{"id":132486,"last_updated":"2023-11-06 19:31:48","id_people":463828,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Linguistically-driven Selection of Difficult-to-Parse Dependency Structures","year":2020,"authors_people":"Chiara Alzetta, Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"The paper illustrates a novel methodology meeting a twofold goal, namely quantifying the reliability of automatically generated dependency relations without using gold data on the one hand, and identifying which are the linguistic constructions negatively affecting the parser performance on the other hand. These represent objectives typically investigated in different lines of research, with different methods and techniques. Our methodology, at the crossroads of these perspectives, allows not only to quantify the parsing reliability of individual dependency types but also to identify and weight the contextual properties making relation instances more or less difficult to parse. The proposed methodology was tested in two different and complementary experiments, aimed at assessing the degree of parsing difficulty across (a) different dependency relation types, and (b) different instances of the same relation. The results show that the proposed methodology is able to identify difficult-to-parse dependency relations without relying on gold data and by taking into account a variety of intertwined linguistic factors. These findings pave the way to novel applications of the methodology, both in the direction of defining new evaluation metrics based purely on automatically parsed data and towards the automatic creation of challenge sets.","keywords":["Linguistic Complexity","Syntactic Parsing","Evaluation metrics"],"pages":"37-60","url":"https:\/\/journals.openedition.org\/ijcol\/719","volume":"6","doi":"10.4000\/ijcol.719","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132413,"last_updated":"2023-11-06 19:31:50","id_people":441971,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Metodi e Tecniche di Trattamento Automatico della Lingua per l'Estrazione di Conoscenza dalla Documentazione Scolastica","year":2020,"authors_people":"Venturi G., Dell'Orletta F., Montemagni S., Morini E. e Sagri M.T.","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Venturi, G.","Dell'Orletta, F.","Montemagni, S.","Morini, E.","Sagri, M. T."],"abstract":"Il contributo riguarda la creazione di un sistema integrato di \"knowledge management\", per la gestione e condivisione della conoscenza prodotta e utilizzata dalla scuola.","keywords":["Estrazione di informazione","Documenti scolastici","Indicizzazione","Terminology extraction"],"pages":"49-68","url":"https:\/\/publications.cnr.it\/doc\/441971","volume":"2","doi":"10.3280\/CAD2020-002005","editors_people":"","editors":[""],"published":"Cadmo (Testo stamp.)","publisher":"Franco Angeli (Napoli, Italia)","issn":"1122-5165","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132411,"last_updated":"2023-11-06 19:32:01","id_people":441967,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Verba et Acta. Un esperimento per promuovere l'evoluzione delle compe-tenze linguistiche degli studenti degli istituti professionali","year":2020,"authors_people":"Vertecchi, Benedetto; Agrusti, Francesco; Dell'Orletta, Felice; Montemagni, Simonetta; Venturi, Giulia","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Vertecchi, B.","Agrusti, F.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"Ricerche in corso. Verba et Acta. Un esperimento per promuovere l'evoluzione delle competenze linguistiche degli studenti degli istituti professionali","keywords":["Evoluzione competenze linguistiche","Annotazione linguistica","Previsione dello sviluppo delle competenze di scrittura"],"pages":"109-117","url":"https:\/\/publications.cnr.it\/doc\/441967","volume":"","doi":"10.3280\/CAD2020-001008","editors_people":"","editors":[""],"published":"Cadmo (Testo stamp.)","publisher":"Franco Angeli (Napoli, Italia)","issn":"1122-5165","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132426,"last_updated":"2023-11-06 19:31:55","id_people":444113,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Quantitative linguistic investigations across universal dependencies treebanks","year":2020,"authors_people":"Alzetta C.; Dell'Orletta F.; Montemagni S.; Osenova P.; Simov K.; Venturi G.","authors_cnr":["Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Osenova, P.","Simov, K.","Venturi, G."],"abstract":"The paper illustrates a case study aimed at identifying cross-lingual quantitative trends in the distribution of dependency relations in treebanks for typologically different languages. Preliminary results show interesting differences rooted either in language-specific peculiarities or cross-lingual annotation inconsistencies, with a potential impact on different application scenarios.","keywords":["Universal Dependencies Treebanks","Cross-linguistic analysis","Typology"],"pages":"1-7","url":"http:\/\/ceur-ws.org\/Vol-2769\/paper_59.pdf","volume":"2769","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"979-12-80136-28-2","conference_name":"7th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Online","conference_date":"1-3\/03\/2021"},{"id":132418,"last_updated":"2023-11-06 19:31:53","id_people":442044,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"PRELEARN @ EVALITA 2020: Overview of the Prerequisite Relation Learning Task for Italian","year":2020,"authors_people":"Alzetta, Chiara and Miaschi, Alessio and Dell'Orletta, Felice and Koceva, Frosina and Torre, Ilaria","authors_cnr":["Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Miaschi, A.","Dell'Orletta, F.","Koceva, F.","Torre, I."],"abstract":"The Prerequisite Relation Learning (PRELEARN) task is the EVALITA 2020 shared task on concept prerequisite learning, which consists of classifying prerequisite relations between pairs of concepts distinguishing between prerequisite pairs and non-prerequisite pairs. Four sub-tasks were defined: two of them define different types of features that participants are allowed to use when training their model, while the other two define the classification scenarios where the proposed models would be tested. In total, 14 runs were submitted by 3 teams comprising 9 total individual participants.","keywords":["nlp","prerequisite learning","shared task"],"pages":"","url":"http:\/\/ceur-ws.org\/Vol-2765\/paper164.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Seventh Evaluation Campaign of Natural Language Processing and Speech Tools for Italian (EVALITA)","conference_place":"","conference_date":"17\/12\/2020"},{"id":132427,"last_updated":"2023-11-06 19:31:38","id_people":444114,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"AcCompl-it @ EVALITA2020: Overview of the acceptability & complexity evaluation task for Italian","year":2020,"authors_people":"Brunato D.; Chesi C.; Dell'Orletta F.; Montemagni S.; Venturi G.; Zamparelli R.","authors_cnr":["Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Chesi, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G.","Zamparelli, R."],"abstract":"The Acceptability and Complexity evaluation task for Italian (AcCompl-it) was aimed at developing and evaluating methods to classify Italian sentences according to Acceptability and Complexity. It consists of two independent tasks asking participants to predict either the acceptability or the complexity rate (or both) of a given set of sentences previously scored by native speakers on a 1-to-7 points Likert scale. In this paper, we introduce the datasets distributed to the participants, we describe the different approaches of the participating systems and provide a first analysis of the obtained results.","keywords":["Shared Task","Linguistic Complexity","Acceptability"],"pages":"1-8","url":"http:\/\/ceur-ws.org\/Vol-2765\/paper163.pdf","volume":"2765","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"EVALITA '20, Evaluation of NLP and Speech Tools for Italian","conference_place":"Online","conference_date":"17\/12\/2020"},{"id":132394,"last_updated":"2023-11-06 19:31:54","id_people":435966,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Profiling-UD: a Tool for Linguistic Profiling of Texts","year":2020,"authors_people":"Dominique Brunato, Andrea Cimino, Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Cimino, Andrea","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Cimino, A.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this paper, we introduce Profiling-UD, a new text analysis tool inspired to the principles of linguistic profiling that can support language variation research from different perspectives. It allows the extraction of more than 130 features, spanning across different levels of linguistic description. Beyond the large number of features that can be monitored, a main novelty of Profiling-UD is that it has been specifically devised to be multilingual since it is based on the Universal Dependencies framework. In the second part of the paper, we demonstrate the effectiveness of these features in a number of theoretical and applicative studies in which they were successfully used for text and author profiling.","keywords":["Computational Language Variation Analysis","Linguistic Profiling","Universal Dependencies"],"pages":"7145-7151","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2020\/pdf\/2020.lrec-1.883.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"979-10-95546-34-4","conference_name":"Conference on Language Resources and Evaluation (LREC)","conference_place":"","conference_date":"11-16\/05\/2020"},{"id":132393,"last_updated":"2023-11-06 19:32:02","id_people":435958,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Voices of the Great War: A Richly Annotated Corpus of Italian Texts on the First World War","year":2020,"authors_people":"Alessandro Lenci, Simonetta Montemagni, Federico Boschetti, Irene De Felice, Stefano dei Rossi, Felice Dell'Orletta, Michele Di Giorgio, Martina Miliani, Lucia C. Passaro, Angelica Puddu, Giulia Venturi, Nicola Labanca","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Boschetti, Federico","Venturi, Giulia"],"authors_cnr_id":["5595","14329","14630","17692"],"authors_cnr_institute":[""],"authors":["Lenci, A.","Montemagni, S.","Boschetti, F.","De Felice, I.","Dei Rossi, S.","Dell'Orletta, F.","Di Giorgio, M.","Miliani, M.","Passaro, L. C.","Puddu, A.","Venturi, G.","Labanca, N."],"abstract":"Voci della Grande Guerra (\"Voices of the Great War\") is the first large corpus of Italian historical texts dating back to the period of First World War. This corpus differs from other existing resources in several respects. First, from the linguistic point of view it gives account of the wide range of varieties in which Italian was articulated in that period, namely from a diastratic (educated vs. uneducated writers), diaphasic (low\/informal vs. high\/formal registers) and diatopic (regional varieties, dialects) points of view. From the historical perspective, through a collection of texts belonging to different genres it represents different views on the war and the various styles of narrating war events and experiences. The final corpus is balanced along various dimensions, corresponding to the textual genre, the language variety used, the author type and the typology of conveyed contents. The corpus is annotated with lemmas, part-of-speech, terminology, and named entities. Significant corpus samples representative of the different \"voices\" have also been enriched with meta-linguistic and syntactic information. The layer of syntactic annotation forms the first nucleus of an Italian historical treebank complying with the Universal Dependencies standard. The paper illustrates the final resource, the methodology and tools used to build it, and the Web Interface for navigating it.","keywords":["Historical Corpora","Linguistic and Meta-linguistic Annotation","Information Extraction"],"pages":"911-918","url":"https:\/\/www.aclweb.org\/anthology\/2020.lrec-1.114.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"979-10-95546-34-4","conference_name":"Conference on Language Resources and Evaluation (LREC)","conference_place":"","conference_date":"11-16\/05\/2020"},{"id":132416,"last_updated":"2023-11-06 19:31:44","id_people":442040,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Is Neural Language Model Perplexity Related to Readability?","year":2020,"authors_people":"Miaschi, Alessio and Alzetta, Chiara and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Alzetta, C.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"This paper explores the relationship between Neural Language Model (NLM) perplexity and sentence readability. Starting from the evidence that NLMs implicitly acquire sophisticated linguistic knowledge from a huge amount of training data, our goal is to investigate whether perplexity is affected by linguistic features used to automatically assess sentence readability and if there is a correlation between the two metrics. Our findings suggest that this correlation is actually quite weak and the two metrics are affected by different linguistic phenomena.","keywords":["nlp","neural language models","readability"],"pages":"","url":"http:\/\/ceur-ws.org\/Vol-2769\/paper_57.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"979-12-80136-28-2","conference_name":"Seventh Italian Conference on Computational Linguistics","conference_place":"","conference_date":"01-03\/03\/2021"},{"id":132391,"last_updated":"2023-11-06 19:31:46","id_people":438491,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linguistic Profiling of a Neural Language Model","year":2020,"authors_people":"Miaschi A., Brunato D., Dell'Orletta F., Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we investigate the linguistic knowledge learned by a Neural Language Model (NLM) before and after a fine-tuning process and how this knowledge affects its predictions during several classification problems. We use a wide set of probing tasks, each of which corresponds to a distinct sentence-level feature extracted from different levels of linguistic annotation. We show that BERT is able to encode a wide range of linguistic characteristics, but it tends to lose this information when trained on specific downstream tasks. We also find that BERT's capacity to encode different kind of linguistic properties has a positive influence on its predictions: the more it stores readable linguistic information of a sentence, the higher will be its capacity of predicting the expected label assigned to that sentence.","keywords":["Linguistic Profiling","Neural Language Model","Interpretability"],"pages":"745-756","url":"https:\/\/www.aclweb.org\/anthology\/2020.coling-main.65\/","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-952148-27-9","conference_name":"International Conference on Computational Linguistics (COLING)","conference_place":"Online","conference_date":"8-13\/12\/2020"},{"id":132395,"last_updated":"2023-11-06 19:31:58","id_people":435969,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Tracking the Evolution of Written Language Competence in L2 Spanish Learners","year":2020,"authors_people":"Miaschi, Alessio; Davidson, Sam; Brunato, Dominique; Dell'Orletta, Felice; Sagae, Kenji; Sanchez-Gutierrez, Claudia H.; Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Davidson, S.","Brunato, D.","Dell'Orletta, F.","Sagae, K.","Sanchez Gutierrez, C. H.","Venturi, G."],"abstract":"In this paper we present an NLP-based approach for tracking the evolution of written language competence in L2 Spanish learners using a wide range of linguistic features automatically extracted from students' written productions. Beyond reporting classification results for different scenarios, we explore the connection between the most predictive features and the teaching curriculum, finding that our set of linguistic features often reflects the explicit instruction that students receive during each course.","keywords":["Evolution of Language Competence","Natural Language Processing","Linguistic Profiling"],"pages":"92-101","url":"https:\/\/www.aclweb.org\/anthology\/2020.bea-1.9.pdf","volume":"","doi":"10.18653\/v1\/W16-05","editors_people":"","editors":[""],"published":"","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-941643-83-9","conference_name":"15th Workshop on Innovative Use of NLP for Building Educational Applications","conference_place":"","conference_date":"10\/07\/2020"},{"id":132415,"last_updated":"2023-11-06 19:31:40","id_people":442036,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Contextual and Non-Contextual Word Embeddings: an in-depth Linguistic Investigation","year":2020,"authors_people":"Miaschi, Alessio and Dell'Orletta, Felice","authors_cnr":["Miaschi, Alessio","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Dell'Orletta, F."],"abstract":"In this paper we present a comparison between the linguistic knowledge encoded in the internal representations of a contextual Language Model (BERT) and a contextual-independent one (Word2vec). We use a wide set of probing tasks, each of which corresponds to a distinct sentence-level feature extracted from different levels of linguistic annotation. We show that, although BERT is capable of understanding the full context of each word in an input sequence, the implicit knowledge encoded in its aggregated sentence representations is still comparable to that of a contextual-independent model. We also find that BERT is able to encode sentence-level properties even within single-word embeddings, obtaining comparable or even superior results than those obtained with sentence representations.","keywords":["nlp","interpretability","representation learning"],"pages":"110-119","url":"https:\/\/www.aclweb.org\/anthology\/2020.repl4nlp-1.15","volume":"","doi":"10.18653\/v1\/2020.repl4nlp-1.15","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-952148-15-6","conference_name":"5th Workshop on Representation Learning for NLP","conference_place":"","conference_date":"09\/07\/2020"},{"id":132417,"last_updated":"2023-11-06 19:31:45","id_people":442038,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Italian Transformers Under the Linguistic Lens","year":2020,"authors_people":"Miaschi, Alessio and Sarti, Gabriele and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Sarti, G.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we present an in-depth investigation of the linguistic knowledge encoded by the transformer models currently available for the Italian language. In particular, we investigate whether and how using different architectures of probing models affects the performance of Italian transformers in encoding a wide spectrum of linguistic features. Moreover, we explore how this implicit knowledge varies according to different textual genres.","keywords":["nlp","neural language models","interpretability"],"pages":"","url":"http:\/\/ceur-ws.org\/Vol-2769\/paper_56.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"979-12-80136-28-2","conference_name":"Seventh Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"","conference_date":"01-03\/03\/2021"},{"id":132356,"last_updated":"2023-11-06 19:32:08","id_people":423880,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"INFERRING QUANTITATIVE TYPOLOGICAL TRENDS FROM MULTILINGUAL TREEBANKS. A CASE STUDY","year":2019,"authors_people":"Alzetta, Chiara; Dell'Orletta, Felice; Montemagni, Simonetta; Venturi, Giulia","authors_cnr":["Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In the past decades, linguistic typology went through a renewing phase that involved a significant change in the research questions and methods of the discipline, which is now interested in fine-grained features underlying language diversity. In this paper, we propose a novel approach to address the newly defined needs of linguistic typology by extracting qualitative and quantitative information about a wide range of features from multilingual annotated corpora based on Natural Language Processing methods and techniques. We tested our method in a case study focusing on word order variation in two widely investigated constructions, VERB-SUBJ(ect) and NOUN-ADJ(ective), with a specific view to structural and functional factors underlying the preference for one or the other order, both intra- and cross-linguistically, and their interaction. Preliminary experiments have been carried out aimed at acquiring typological evidence from a selection of linguistically annotated treebanks for three different languages, namely Italian, Spanish and English. Our results show the effectiveness of the method in letting similarities and differences also emerge from typologically close languages.","keywords":["language typology","multilingual annotated corpora","linguistic knowledge extraction and modelling","word order variation"],"pages":"209-242","url":"https:\/\/www.rivisteweb.it\/doi\/10.1418\/95391","volume":"18","doi":"10.1418\/95391","editors_people":"","editors":[""],"published":"Lingue e linguaggio","publisher":"Il Mulino, Bologna (Italia)","issn":"1720-9331","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132354,"last_updated":"2023-11-06 19:32:10","id_people":423874,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Le parole del miglioramento. Come le scuole descrivono il cambiamento","year":2019,"authors_people":"Dell'Orletta F., Greco S., Montemagni S., Morini E., Rossi F., Sagri M.T., Venturi G.","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Greco, S.","Montemagni, S.","Morini, E.","Rossi, F.","Sagri, M. T.","Venturi, G."],"abstract":"Il presente contributo intende illustrare i risultati di una ricerca condotta con l'uso di strumenti di trattamento automatico del linguaggio (Natural Language Processing: nlp) su quanto dichiarato dalle scuole in circa 2500 Piani di Miglioramento (modello indire ) con l'obiettivo di comprendere le scelte strategiche in un'ottica di miglioramento continuo. Il disegno d'analisi permette di restituire sia una visione complessiva dei Piani di Miglioramento che approfondimenti qualitativi di confronto tra tipologie di scuola e aree geografiche e relativi a tematiche strategiche quali formazione e innovazione.","keywords":["Piano di Miglioramento","Natural Language Processing","Formazione","Innovazione"],"pages":"47-68","url":"https:\/\/www.rivistainfanzia.it\/pvw\/app\/default\/pvw_sito.php?sede_codice=1PWPSE01&page=2432193","volume":"1\/2019","doi":"","editors_people":"","editors":[""],"published":"Psicologia dell'educazione","publisher":"Edizioni Centro Studi Erickson (Gardolo (TN), Italia)","issn":"1971-3711","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132355,"last_updated":"2023-11-06 19:32:04","id_people":423878,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Analytics dei testi riflessivi scritti dai docenti neoassunti nel portfolio digitale","year":2019,"authors_people":"Della Gala V., Chiriatti G., Dell'Orletta F., Pettenati M.C., Venturi G.","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Della Gala, V.","Chiriatti, G.","Dell'Orletta, F.","Pettenati, M. C.","Venturi, G."],"abstract":"Presentiamo i risultati preliminari e l'analisi svolta su circa 50.000 testi scritti dai docenti neo nominati in ruolo per riflettere su due attivit\u00e0 didattiche svolte con gli studenti, nel contesto del percorso dell'anno di formazione e prova 2016\/17. Il percorso prevede attivit\u00e0 in presenza e attivit\u00e0 a distanza completate sul portfolio digitale, ospitato nell'ambiente online gestito dall'Indire. Nell'ambito del monitoraggio della formazione, con il fine di ottimizzare gli strumenti e il supporto fornito, abbiamo interrogato i dati testuali prodotti dai docenti nell'interazione con l'ambiente per capire se i testi presentassero evidenze riconducibili alle scritture riflessive. Obiettivi dell'indagine sono stati la definizione di uno schema per la classificazione dei testi sulla base del livello di riflessivit\u00e0 evidenziato e l'impiego di strumenti di Trattamento Automatico del Linguaggio (TAL) per l'analisi dell'interocorpus testuale prodotto dai docenti. Descriveremo il contesto scientifico e progettuale,le caratteristiche dei dati analizzati, come questo abbia determinato il disegno d'indagine;descriveremo inoltre la sua implementazione e dunque le procedure, gli strumenti e le metriche adottate o elaborate per rappresentare il contenuto dei dati; infine discuteremo i primi risultati e alcuni vantaggi e limiti dell'approccio adottato.","keywords":["Teacher professional development","Natural Language Processing","Reflective writing","Linguistic Profiling","Document Classification"],"pages":"187-204","url":"https:\/\/ojs.pensamultimedia.it\/index.php\/sird\/article\/view\/3454\/3360","volume":"Special issue","doi":"10.7346\/SIRD-2S2019-P189","editors_people":"","editors":[""],"published":"Giornale italiano della ricerca educativa (Online)","publisher":"Pensa Multimedia (Lecce, Italia)","issn":"2038-9744","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132357,"last_updated":"2023-11-06 19:32:06","id_people":423881,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Dissecting Treebanks to Uncover Typological Trends. A Multilingual Comparative Approach","year":2019,"authors_people":"Alzetta C., Dell'Orletta F., Montemagni S., Venturi G.","authors_cnr":["Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"Over the last years, linguistic typology started attracting the interest of the community working on cross- and multi-lingual NLP as a way to tackle the bottleneck deriving from the lack of annotated data for many languages. Typological information is mostly acquired from publicly accessible typological databases, manually constructed by linguists. As reported in Ponti et al. (2018), despite the abundant information contained in them for many languages, these resources suffer from two main shortcomings, i.e. their limited coverage and the discrete nature of features (only \"the majority value rather than the full range of possible values and their corresponding frequencies\" is reported). Corpus-based studies can help to automatically acquire quantitative typological evidence which might be exploited for polyglot NLP. Recently, the availability of corpora annotated following a cross-linguistically consistent annotation scheme such as the one developed in the Universal Dependencies project is prompting new comparative linguistic studies aimed to identify similarities as well as idiosyncrasies among typologically different languages (Nivre, 2015). The line of research described here is aimed at acquiring quantitative typological evidence from UD treebanks through a multilingual contrastive approach.","keywords":["Natural Language Processing","Linguistic Typology"],"pages":"1-3","url":"https:\/\/typology-and-nlp.github.io\/2019\/assets\/2019\/papers\/5.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-950737-29-1","conference_name":"1st TyP-NLP: The Workshop on Typology for Polyglot NLP, ACL workshop","conference_place":"Firenze","conference_date":"01\/08\/2019"},{"id":132359,"last_updated":"2023-11-06 19:32:17","id_people":423885,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"What makes a review helpful? Predicting the helpfulness of Italian tripadvisor reviews","year":2019,"authors_people":"Chiriatti G.; Brunato D.; Dell'Orletta F.; Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Chiriatti, G.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we introduce a classification system devoted to predict the helpfulness of Italian online reviews. It is based on a wide set of features reflecting the different factors involved and tested on different categories of TripAdvisor reviews. For this purpose, we collected the first Italian corpus of online reviews enriched with metadata related to their helpfulness and we carried out an in-depth analysis of the most predictive features.","keywords":["Natural Language Processing","Documenti Classification","Linguistic Profiling"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85074834351&origin=inward","volume":"2481","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"6th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Bari","conference_date":"13-15\/11\/2019"},{"id":132358,"last_updated":"2023-11-06 19:32:09","id_people":423883,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Italian and English sentence simplification: How many differences?","year":2019,"authors_people":"Fieromonte M.; Brunato D.; Dell'Orletta F.; Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Fieromonte, M.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"The paper proposes a cross-linguistic analysis of two parallel monolingual corpora conceived for automatic text simplification in two languages, Italian and English. The aim is to find similarities and differences in the process of simplification in two typologically different languages. To carry out the comparison, 1,000 sentences were extracted from the two corpora and annotated with a scheme previously used to annotate simplification phenomena..","keywords":["Natural Language Processing","Automatic Text Simplification"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85074816689&origin=inward","volume":"2481","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"6th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Bari","conference_date":"13-15\/11\/2019"},{"id":132253,"last_updated":"2023-11-06 19:32:24","id_people":385339,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Extracting dependency relations from digital learning content","year":2018,"authors_people":"Adorni G.; Dell'Orletta F.; Koceva F.; Torre I.; Venturi G.","authors_cnr":["Venturi, Giulia","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Adorni, G.","Dell'Orletta, F.","Koceva, F.","Torre, I.","Venturi, G."],"abstract":"Digital Libraries present tremendous potential for developing e-learning applications, such as text comprehension and question-answering tools. A way to build this kind of tools is structuring the digital content into relevant concepts and dependency relations among them. While the literature offers several approaches for the former, the identification of dependencies, and specifically of prerequisite relations, is still an open issue. We present an approach to manage this task.","keywords":["Prerequisite relationship","Concept extraction","Graph mining"],"pages":"114-119","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85041860435&origin=inward","volume":"806","doi":"10.1007\/978-3-319-73165-0_11","editors_people":"","editors":[""],"published":"Communications in computer and information science (Print)","publisher":"Springer (Heidelberg, Germania)","issn":"1865-0929","isbn":"","conference_name":"14th Italian Research Conference on Digital Libraries (IRCDL 2018)","conference_place":"Udine","conference_date":"25-26 gennaio 2018"},{"id":132307,"last_updated":"2023-11-06 19:32:19","id_people":391617,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Assessing the Impact of Iterative Error Detection and Correction. A Case Study on the Italian Universal Dependency Treebank","year":2018,"authors_people":"Alzetta C., Dell'Orletta F., Montemagni S., Simi M., Venturi G.","authors_cnr":["Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Simi, M.","Venturi, G."],"abstract":"Detection and correction of errors and inconsistencies in \"gold treebanks\" are becoming more and more central topics of corpus annotation. The paper illustrates a new incremental method for enhancing treebanks, with particular emphasis on the extension of error patterns across different textual genres and registers. Impact and role of corrections have been assessed in a dependency parsing experiment carried out with four different parsers, whose results are promising. For both evaluation datasets, the performance of parsers increases, in terms of the standard LAS and UAS measures and of a more focused measure taking into account only relations involved in error patterns, and at the level of individual dependencies.","keywords":["Error Detection","Universal Dependency Treebanks","Syntactic parsing"],"pages":"1-7","url":"http:\/\/universaldependencies.org\/udw18\/PDFs\/39_Paper.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-948087-84-1","conference_name":"Universal Dependencies Workshop 2018 (UDW 2018)","conference_place":"Brussels","conference_date":"01\/11\/2018"},{"id":132241,"last_updated":"2023-11-06 19:32:22","id_people":382333,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Dangerous Relations in Dependency Treebanks","year":2018,"authors_people":"Chiara Alzetta, Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Venturi, Giulia","Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"The paper illustrates an effective and innovative method for detecting erroneously annotated arcs in gold dependency treebanks based on an algorithm originally developed to measure the reliability of automatically produced dependency relations. The method permits to significantly restrict the error search space and, more importantly, to reliably identify patterns of systematic recurrent errors which represent dangerous evidence to a parser which tendentially will replicate them. Achieved results demonstrate effectiveness and reliability of the method.","keywords":["Dependency treebanks","Error Detection","Linguistic Annotation"],"pages":"201-210","url":"http:\/\/aclweb.org\/anthology\/W\/W17\/W17-7624.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-80-88132-04-2","conference_name":"16th International Workshop on Treebanks and Linguistic Theories","conference_place":"Praga","conference_date":"23-24 gennaio 2018"},{"id":132252,"last_updated":"2023-11-06 19:32:36","id_people":385342,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Universal Dependencies and Quantitative Typological Trends. A Case Study on Word Order","year":2018,"authors_people":"Chiara Alzetta, Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"The paper presents a new methodology aimed at acquiring typological evidence from \"gold\" treebanks for different languages. In particular, it investigates whether and to what extent algorithms developed for assessing the plausibility of automatically produced syntactic annotations could contribute to shed light on key issues of the linguistic typological literature. It reports the first and promising results of a case study focusing on word order patterns carried out on three different languages (English, Italian and Spanish).","keywords":["Linguistic Knowledge Extraction","Dependency Treebanks","Linguistic Typology"],"pages":"4540-4549","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2018\/pdf\/1109.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"979-10-95546-00-9","conference_name":"Proceedings of the 11th Edition of the Language Resources and Evaluation Conference (LREC 2018)","conference_place":"Miyazaki (Japan)","conference_date":"7-12 maggio 2018"},{"id":130087,"last_updated":"2023-11-06 19:32:31","id_people":398987,"institutes":["IIT","ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Overview of the EVALITA 2018 hate speech detection task","year":2018,"authors_people":"Bosco C. (1), Sanguinetti M. (1), Dell'Orletta F. (2), Poletto F. (3), Tesconi M. (4)","authors_cnr":["Tesconi, Maurizio","Dell'Orletta, Felice"],"authors_cnr_id":["11029","14329"],"authors_cnr_institute":["044","048"],"authors":["Bosco, C.","Sanguinetti, M.","Dell'Orletta, F.","Poletto, F.","Tesconi, M."],"abstract":"The Hate Speech Detection (HaSpeeDe) task is a shared task on Italian social media (Facebook and Twitter) for the detection of hateful content, and it has been proposed for the first time at EVALITA 2018. Providing two datasets from two different online social platforms differently featured from the linguistic and communicative point of view, we organized the task in three tasks where systems must be trained and tested on the same resource or using one in training and the other in testing: HaSpeeDe-FB, HaSpeeDe-TW and Cross-HaSpeeDe (further subdivided into Cross-HaSpeeDe FB and Cross-HaSpeeDe TW sub-tasks). Overall, 9 teams participated in the task, and the best system achieved a macro F1-score of 0.8288 for HaSpeeDe-FB, 0.7993 for HaSpeeDe-TW, 0.6541 for Cross-HaSpeeDe FB and 0.6985 for Cross-HaSpeeDe TW. In this report, we describe the datasets released and the evaluation measures, and we discuss results.","keywords":["Hate Speech Detection","Social Media Analysis"],"pages":"9","url":"http:\/\/www.scopus.com\/inward\/record.url?eid=2-s2.0-85058647605&partnerID=q2rCbXpz","volume":"2263","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"EVALITA 2018-Sixth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian","conference_place":"Torino, Italia","conference_date":"10-12\/12\/2018"},{"id":132308,"last_updated":"2023-11-06 19:32:26","id_people":391619,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Is this sentence difficult? Do you agree?","year":2018,"authors_people":"Brunato D., De Mattei L., Dell'Orletta F., Iavarone B., Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Brunato, D.","De Mattei, L.","Dell'Orletta, F.","Iavarone, B.","Venturi, G."],"abstract":"In this paper, we present a crowdsourcing-based approach to model the human perception of sentence complexity. We collect a large corpus of sentences rated with judgments of complexity for two typologically-different languages, Italian and English. We test our approach in two experimental scenarios aimed to investigate the contribution of a wide set of lexical, morpho-syntactic and syntactic phenomena in predicting i) the degree of agreement among annotators independently from the assigned judgment and ii) the perception of sentence complexity.","keywords":["Linguistic complexity","Crowdsourcing","Human perception"],"pages":"1-10","url":"https:\/\/www.aclweb.org\/anthology\/D18-1289\/","volume":"","doi":"10.18653\/v1\/D18-1289","editors_people":"","editors":[""],"published":"","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-948087-84-1","conference_name":"Conference on Empirical Methods in Natural Language Processing (EMNLP)","conference_place":"Brussels","conference_date":"31\/10\/2018-04\/11\/2018"},{"id":132351,"last_updated":"2023-11-06 19:32:18","id_people":423871,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"A NLP-based analysis of reflective writings by Italian teachers","year":2018,"authors_people":"Chiriatti G.; Della Gala V.; Dell'Orletta F.; Montemagni S.; Pettenati M.C.; Sagri M.T.; Venturi G.","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Chiriatti, G.","Della Gala, V.","Dell'Orletta, F.","Montemagni, S.","Pettenati, M. C.","Sagri, M. T.","Venturi, G."],"abstract":"This paper reports first results of a wider study devoted to exploit the potentialities of a NLP-based approach to the analysis of a corpus of reflective writings on teaching activities. We investigate how a wide set of linguistic features allows reconstructing the linguistic profile of the texts written by the Italian teachers and predicting whether are reflective.","keywords":["Natural Language Processing","Reflective Writings","Linguistic Profiling","Document Classification"],"pages":"1-7","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85057733802&origin=inward","volume":"2253","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"5th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Torino","conference_date":"10-12\/12\/2018"},{"id":132350,"last_updated":"2023-11-06 19:32:33","id_people":423870,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Sentences and documents in native language identification","year":2018,"authors_people":"Cimino A.; Dell'Orletta F.; Brunato D.; Venturi G.","authors_cnr":["Cimino, Andrea","Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Cimino, A.","Dell'Orletta, F.","Brunato, D.","Venturi, G."],"abstract":"Starting from a wide set of linguistic features, we present the first in depth feature analysis in two different Native Language Identification (NLI) scenarios. We compare the results obtained in a traditional NLI document classification task and in a newly introduced sentence classification task, investigating the different role played by the considered features. Finally, we study the impact of a set of selected features extracted from the sentence classifier in document classification.","keywords":["Natural Language Processing","Native Language Identification"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85057749754&origin=inward","volume":"2253","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"5th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Torino","conference_date":"10-12\/12\/2018"},{"id":132353,"last_updated":"2023-11-06 19:32:25","id_people":423873,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Gender and Genre Linguistic profiling: A case study on female and male journalistic and diary prose","year":2018,"authors_people":"Cocciu E.; Brunato D.; Venturi G.; Dell'Orletta F.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Cocciu, E.","Brunato, D.","Venturi, G.","Dell'Orletta, F."],"abstract":"This paper intends to investigate the linguistic profile of male- and female-authored texts belonging to two very different textual genres: newspaper articles and diary prose. By using a wide set of linguistic features automatically extracted from text and spanning across different levels of linguistic description, from lexicon to syntax, our analysis highlights the peculiarities of the two examined genres and how the genre dimension is influenced by variation depending on author's gender (and vice versa).","keywords":["Natural Language Processing","Genre Classification","Linguistic Profiling"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85057759773&origin=inward","volume":"2253","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"5th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Torino","conference_date":"10-12\/12\/2018"},{"id":132352,"last_updated":"2023-11-06 19:32:27","id_people":423872,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Italian in the Trenches: Linguistic annotation and analysis of texts of the great war","year":2018,"authors_people":"De Felice I.; Dell'Orletta F.; Venturi G.; Lenci A.; Montemagni S.","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["De Felice, I.","Dell'Orletta, F.","Venturi, G.","Lenci, A.","Montemagni, S."],"abstract":"The paper illustrates the design and development of a textual corpus representative of the historical variants of Italian during the Great War, which was enriched with linguistic (lemmatization and pos-tagging) and meta-linguistic annotation. The corpus, after a manual revision of the linguistic annotation, was used for specializing existing NLP tools to process historical texts with promising results.","keywords":["Natural Language Processing","Automatic Linguistic Annotation"],"pages":"1-5","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85057734451&origin=inward","volume":"2253","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"5th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Pisa","conference_date":"10-12\/12\/2018"},{"id":132230,"last_updated":"2023-11-06 19:32:44","id_people":382166,"institutes":["ILC","ISTI"],"type":"journal_article","type_order":0,"type_people":"article","title":"Natural language requirements processing: a 4D vision","year":2017,"authors_people":"Ferrari A.; Dell'Orletta F.; Esuli A.; Gervasi V.; Gnesi S.","authors_cnr":["Gnesi, Stefania","Esuli, Andrea","Dell'Orletta, Felice","Ferrari, Alessio"],"authors_cnr_id":["7405","11607","14329","15226"],"authors_cnr_institute":["074","074","048","074"],"authors":["Ferrari, A.","Dell'Orletta, F.","Esuli, A.","Gervasi, V.","Gnesi, S."],"abstract":"Natural language processing (NLP) and requirements engineering (RE) have had a long relationship, yet their combined use isn't well established in industrial practice. This situation should soon change. The future evolution of the application of NLP technologies in RE can be viewed from four dimensions: discipline, dynamism, domain knowledge, and datasets.","keywords":["Natural Language Processing","Requirement Processing"],"pages":"28-35","url":"http:\/\/ieeexplore.ieee.org\/abstract\/document\/8106888\/","volume":"34","doi":"10.1109\/MS.2017.4121207","editors_people":"","editors":[""],"published":"IEEE software","publisher":"IEEE Computer Society ([Los Alamitos, CA, Stati Uniti d'America)","issn":"0740-7459","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132236,"last_updated":"2023-11-06 19:32:43","id_people":382249,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"La qualit\u00e0 dei consensi informati. Un'analisi linguistico-computazionale della leggibilit\u00e0 dei testi","year":2017,"authors_people":"Venturi G., Dell'Orletta F., Montemagni S., Flore E., Bellandi T.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Venturi, G.","Dell'Orletta, F.","Montemagni, S.","Flore, E.","Bellandi, T."],"abstract":"La leggibilit\u00e0 dei testi delle informative di consenso per le procedure diagnostico-terapeutiche \u00e8 un requisito fondamentale, per offrire alle persone assistite l'accesso alle informazioni necessarie a una scelta consapevole delle opzioni disponibili per curare i diversi problemi di salute. La disponibilit\u00e0 di un testo leggibile \u00e8 inoltre un aiuto per i medici responsabili della comunicazione e della raccolta del consenso, che possono impiegarlo come un ausilio alle informazioni presentate in forma verbale durante il colloquio, in modo tale da poter condividere una base di conoscenze minime da condividere con il paziente e i suoi familiari. Seppure le evidenze siano limitate in merito alla relazione tra la qualit\u00e0 del consenso e l'attitudine al contenzioso da parte dei pazienti in caso di trattamenti che esitano in un danno attribuibile alle cure (Durand et al., 2015), si tratta di un ambito di ricerca di crescente interesse nella letteratura sulla sicurezza (Wu et al., 2005; Manta et al., 2017). Nella casistica regionale della Toscana sulle richieste di risarcimento, solo l'1% dei sinistri include problemi di consenso informato (dati Centro GRC), probabilmente anche a causa di una sottovalutazione del diritto all'informazione da parte dei cittadini che si sottopongono a interventi programmati, connessa con una limitata consapevolezza del potere di scegliere le proprie cure che ogni persona dovrebbe poter esercitare posta di fronte alle opzioni terapeutiche disponibili per i propri problemi di salute.","keywords":["Consenso informato","valutazione automatica della leggibilit\u00e0","Trattamento Automatico del Linguaggio"],"pages":"35-39","url":"http:\/\/www.formas.toscana.it\/rivistadellasalute\/fileadmin\/files\/fascicoli\/2017\/212\/SeT_fascicolo_212.pdf","volume":"212","doi":"","editors_people":"","editors":[""],"published":"Salute e territorio","publisher":"ETS (Pisa, Italia)","issn":"0392-4505","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132245,"last_updated":"2023-11-06 19:32:45","id_people":382461,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"On the order of words in Italian: a study on genre vs complexity","year":2017,"authors_people":"Dominique Brunato, Felice Dell'Orletta","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F."],"abstract":"In this paper we present a cross-genre study on word order variation in Italian based on automatically dependency-parsed corpora. A comparative analysis focused on dependency direction and dependency distance for major constituents in the sentence is carried out in order to assess the influence of both textual genre and linguistic complexity on the distribution of phenonemena of syntactic markedeness.","keywords":["word order","syntactic analysis","linguistic complexity","natural language processing"],"pages":"25-31","url":"https:\/\/publications.cnr.it\/doc\/382461","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"International Conference on Dependency Linguistics (Depling 2017)","conference_place":"Pisa","conference_date":"18-20\/09\/2017"},{"id":132237,"last_updated":"2023-11-06 19:32:42","id_people":382252,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Identifying predictive features for textual genre classification: The key role of syntax","year":2017,"authors_people":"Cimino A.; Wieling M.; Dell'Orletta F.; Montemagni S.; Venturi G.","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Cimino, A.","Wieling, M.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"The paper investigates impact and role of different feature types for the specific task of Automatic Genre Classification with the final aim of identifying the most predictive ones. The goal was pursued by carrying out incremental feature selection through Grafting using different sets of linguistic features. Achieved results for discriminating among four traditional textual genres show the key role played by syntactic features, whose impact turned out to vary across genres.","keywords":["Textual Genre Classification","Feature Selection","Syntactic Features"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85037370866&origin=inward","volume":"2006","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Roma","conference_date":"11-12 dicembre 2017"},{"id":129705,"last_updated":"2023-11-06 19:32:40","id_people":369760,"institutes":["IIT","ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Hate me, hate me not: Hate speech detection on Facebook","year":2017,"authors_people":"F. Del Vigna (1); A. Cimino (2); F. Dell'Orletta (2); M. Petrocchi (1); M. Tesconi (1)","authors_cnr":["Petrocchi, Marinella","Cimino, Andrea","Del Vigna, Fabio","Tesconi, Maurizio","Dell'Orletta, Felice"],"authors_cnr_id":["11029","14329"],"authors_cnr_institute":["044","048","044","044","048"],"authors":["Del Vigna, F.","Cimino, A.","Dell'Orletta, F.","Petrocchi, M.","Tesconi, M."],"abstract":"While favouring communications and easing information sharing, Social Network Sites are also used to launch harmful campaigns against specific groups and individuals. Cyberbullism, incitement to self-harm practices, sexual predation are just some of the severe effects of massive online offensives. Moreover, attacks can be carried out against groups of victims and can degenerate in physical violence. In this work, we aim at containing and preventing the alarming diffusion of such hate campaigns. Using Facebook as a benchmark, we consider the textual content of comments appeared on a set of public Italian pages. We first propose a variety of hate categories to distinguish the kind of hate. Crawled comments are then annotated by up to five distinct human annotators, according to the defined taxonomy. Leveraging morpho-syntactical features, sentiment polarity and word embedding lexicons, we design and implement two classifiers for the Italian language, based on different learning algorithms: the first based on Support Vector Machines (SVM) and the second on a particular Recurrent Neural Network named Long Short Term Memory (LSTM). We test these two learning algorithms in order to verify their classification performances on the task of hate speech recognition. The results show the effectiveness of the two classification approaches tested over the first manually annotated Italian Hate Speech Corpus of social media text.","keywords":["Hate speech","NLP","Social Networks"],"pages":"86-95","url":"http:\/\/www.scopus.com\/inward\/record.url?eid=2-s2.0-85017337270&partnerID=q2rCbXpz","volume":"1816","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"ITA-SEC 17","conference_place":"Venezia, Italia","conference_date":"17-20\/01\/2017"},{"id":359306,"last_updated":"2023-11-06 19:32:47","id_people":377423,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Stylometry in Computer-Assisted Translation: Experiments on the Babylonian Talmud","year":2017,"authors_people":"Emiliano Giovannetti, Davide Albanesi, Andrea Bellandi, David Dattilo, Felice Dell'Orletta","authors_cnr":["Bellandi, Andrea","Giovannetti, Emiliano","Dell'Orletta, Felice","Albanesi, Davide"],"authors_cnr_id":["11969","14329","15457"],"authors_cnr_institute":[""],"authors":["Giovannetti, E.","Albanesi, D.","Bellandi, A.","Dattilo, D.","Dell'Orletta, F."],"abstract":"The purpose of this research is to experiment the application of stylometric techniques in the area of Computer-Assisted Translation to reduce the revision effort in the context of a collaborative, large scale translation project. The obtained results show a correlation between the editing extent and the compliance to some specific linguistic features, proving that supporting translators in writing translations following a desired style can actually reduce the number of following necessary interventions (and, consequently, save time) by revisors, editors and curators.","keywords":["traduco","babylonian talmud","computer-assisted translation","stylometry","readability"],"pages":"177-182","url":"https:\/\/publications.cnr.it\/doc\/377423","volume":"","doi":"","editors_people":"Roberto Basili, Malvina Nissim, Giorgio Satta","editors":["Basili, R.","Nissim, M.","Satta, G."],"published":"Proceedings of 4th Italian Conference on Computational Linguistics (CLiC-it)","publisher":"Accademia University Press (Torino, ITA)","issn":"","isbn":"9788899982942","conference_name":"Fourth Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Roma","conference_date":"11-13\/12\/2017"},{"id":129850,"last_updated":"2023-11-06 19:32:38","id_people":375139,"institutes":["IIT","ILC","ISTI"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Cross-media learning for image sentiment analysis in the wild","year":2017,"authors_people":"Vadicamo L.; Carrara F.; Falchi F.; Cimino A.; Dell'Orletta F.; Cresci S.; Tesconi M.","authors_cnr":["Cimino, Andrea","Vadicamo, Lucia","Carrara, Fabio","Tesconi, Maurizio","Falchi, Fabrizio","Dell'Orletta, Felice","Cresci, Stefano"],"authors_cnr_id":["11029","11508","14329","17179"],"authors_cnr_institute":["048","074","074","044","074","048","044"],"authors":["Vadicamo, L.","Carrara, F.","Falchi, F.","Cimino, A.","Dell'Orletta, F.","Cresci, S.","Tesconi, M."],"abstract":"Much progress has been made in the field of sentiment analysis in the past years. Researchers relied on textual data for this task, while only recently they have started investigating approaches to predict sentiments from multimedia content. With the increasing amount of data shared on social media, there is also a rapidly growing interest in approaches that work \"in the wild\", i.e. that are able to deal with uncontrolled conditions. In this work, we faced the challenge of training a visual sentiment classifier starting from a large set of user-generated and unlabeled contents. In particular, we collected more than 3 million tweets containing both text and images, and we leveraged on the sentiment polarity of the textual contents to train a visual sentiment classifier. To the best of our knowledge, this is the first time that a cross-media learning approach is proposed and tested in this context. We assessed the validity of our model by conducting comparative studies and evaluations on a benchmark for visual sentiment analysis. Our empirical study shows that although the text associated to each image is often noisy and weakly correlated with the image content, it can be profitably exploited to train a deep Convolutional Neural Network that effectively predicts the sentiment polarity of previously unseen images.","keywords":["Big data","Data Mining","Sentiment Analysis","Social Media Analysis"],"pages":"10","url":"https:\/\/ieeexplore.ieee.org\/document\/8265255","volume":"","doi":"10.1109\/ICCVW.2017.45","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-5386-1034-3","conference_name":"ICCV 2017 IEEE International Conference on Computer Vision Workshops","conference_place":"Venezia, Italy","conference_date":"22-29 October 2017"},{"id":130476,"last_updated":"2023-11-06 19:32:48","id_people":429823,"institutes":["IIT","ILC","ISTI"],"type":"misc","type_order":12,"type_people":"other","title":"T4SA: Twitter for Sentiment Analysis","year":2017,"authors_people":"Carrara F.; Cimino A.; Cresci S.; Dell'Orletta F.; Falchi F.; Vadicamo L.; Tesconi M.","authors_cnr":["Cimino, Andrea","Carrara, Fabio","Tesconi, Maurizio","Falchi, Fabrizio","Dell'Orletta, Felice","Cresci, Stefano","Vadicamo, Lucia"],"authors_cnr_id":["11029","11508","14329","17179","17950"],"authors_cnr_institute":["048","074","044","074","048","044","074"],"authors":["Carrara, F.","Cimino, A.","Cresci, S.","Dell'Orletta, F.","Falchi, F.","Vadicamo, L.","Tesconi, M."],"abstract":"T4SA is intended for training and testing image sentiment analysis approaches. It contains little less than a million tweets, corresponding to about 1.5M images. We initially collected about 3.4M tweets corresponding to about 4M images. We classified the sentiment polarity of the texts (as described in Section 4) and we selected the tweets having the most confident textual sentiment predictions to build our Twitter for Sentiment Analysis (T4SA) dataset. The dataset is publicly available at: http:\/\/www.t4sa.it\/","keywords":["social media","sentiment analysis","image analysis","image sentiment analysis","deep learning","multimedia sentiment analysis","dataset","tweets"],"pages":"","url":"http:\/\/www.t4sa.it\/","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132169,"last_updated":"2023-11-06 19:32:55","id_people":366755,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"ISACCO: a corpus for investigating spoken and written language development in Italian school-age children","year":2016,"authors_people":"Dominique Brunato, Felice Dell'Orletta","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F."],"abstract":"In this paper we present ISACCO (Italian School-Age Children COrpus), a corpus of oral and written retellings of Italian-speaking children attending primary school. All texts were digitalized and automatically enriched with multi-level linguistic annotation. Preliminary explorations of both the form and the content of children's productions were carried out based on a set of features automatically extracted by NLP tools. Written retellings were manually annotated with a typology of errors belonging to three different linguistic levels. The resource, which has been made publicly available1, is conceived to support research and computational modeling of \"later language acquisition\", with an emphasis on comparative assessment of the evolution of oral and written language competencies in early school grades.","keywords":["Child language acquisition","Oral and Written language","multi-level linguistic analysis"],"pages":"63-76","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2016\/09\/04_brunato_dell-orletta.pdf","volume":"2","doi":"","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132251,"last_updated":"2023-11-06 19:32:57","id_people":385220,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Monitoraggio linguistico di Scritture Brevi: aspetti metodologici e primi risultati","year":2016,"authors_people":"D. BRUNATO, F. DELL'ORLETTA, S. MONTEMAGNI, G. VENTURI","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"Se da un lato le tecnologie del linguaggio svolgono un ruolo ormai indiscusso per l'accesso al contenuto testuale, ci\u00f2 non appare scontato quando si va a considerare il loro ruolo nella valutazione delle strutture linguistiche sottostanti al testo. Questo contributo si focalizza sulla definizione di una metodologia innovativa di monitoraggio linguistico della lingua italiana che a partire dall'output di strumenti di annotazione linguistica automatica permette di ricostruire un profilo linguistico di una collezione di testi rappresentativa di una specifica variet\u00e0 d'uso della lingua. Tale metodologia \u00e8 stata applicata a un corpus di tweet allo scopo di far luce su interrogativi aperti quali la possibilit\u00e0 di rintracciare tendenze lessicali, morfo-sintattiche e sintattiche peculiari all'interno di questa tipologia testuale; di studiare come queste tendenze si rapportino ai tratti caratterizzanti della lingua scritta e parlata; di individuare possibili differenze nella forma linguistica in cui si twittano contenuti di natura diversa.","keywords":["Trattamento Automatico del Linguaggio","Monitoraggio Linguistico","Variet\u00e0 d'Uso della Lingua","Lingua del Web"],"pages":"149-176","url":"https:\/\/publications.cnr.it\/doc\/385220","volume":"N. S. 5","doi":"","editors_people":"","editors":[""],"published":"Quaderni Aion","publisher":"Universit\u00e0 degli Studi di Napoli \"L'Orientale\" (Napoli, Italia)","issn":"1825-2796","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132176,"last_updated":"2023-11-06 19:33:00","id_people":367760,"institutes":["ILC"],"type":"edited_volume","type_order":3,"type_people":"book","title":"Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC 2016)","year":2016,"authors_people":"Dominique Brunato, Felice Dell'Orletta, Giulia Venturi, Thomas Fran\u00e7ois, Philippe Blache","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G.","Fran\u00e7ois, T.","Blache, P."],"abstract":"Introduzione agli atti della prima edizione del workshop \"Computational Linguistics for Linguistic Complexity\" che raccoglie lavori che studiano da prospettive diverse il tema della complessit\u00e0 linguistica workshop allo scopo di promuovere una riflessione comune su approcci diversi all'indagine, al trattamento e alla valutazione di aspetti che rendono complessa la lingua.","keywords":["Linguistic Complexity","Computational Linguistics"],"pages":"1-245","url":"https:\/\/aclweb.org\/anthology\/W\/W16\/W16-41.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-4-87974-709-9","conference_name":"","conference_place":"","conference_date":""},{"id":132166,"last_updated":"2023-11-06 19:32:51","id_people":366749,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"CItA: an L1 Italian Learners Corpus to Study the Development of Writing Competence","year":2016,"authors_people":"Barbagli A., Lucisano P., Dell'Orletta F., Montemagni S., Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Barbagli, A.","Lucisano, P.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this paper, we present the CItA corpus (Corpus Italiano di Apprendenti L1), a collection of essays written by Italian L1 learners collected during the first and second year of lower secondary school. The corpus was built in the framework of an interdisciplinary study jointly carried out by computational linguistics and experimental pedagogists and aimed at tracking the development of written language competence over the years and students' background information.","keywords":["Italian Learner Corpus","Diachronic Evolution of Written Language Competence","Error Annotation"],"pages":"88-95","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2016\/pdf\/536_Paper.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"978-2-9517408-9-1","conference_name":"Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)","conference_place":"Portoroz (Slovenia)","conference_date":"23-28 maggio 2016"},{"id":132165,"last_updated":"2023-11-06 19:32:58","id_people":366726,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"PaCCSS-IT: A Parallel Corpus of Complex-Simple Sentences for Automatic Text Simplification","year":2016,"authors_people":"Dominique Brunato, Andrea Cimino, Felice Dell'Orletta, Giulia Venturi","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Cimino, A.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we present PaCCSS-IT, a Parallel Corpus of Complex-Simple Sentences for ITalian. To build the resource we develop a new method for automatically acquiring a corpus of complex-simple paired sentences able to intercept structural transformations and particularly suitable for text simplification. The method requires a wide amount of texts that can be easily extracted from the web making it suitable also for less-resourced languages. We test it on the Italian language making available the biggest Italian corpus for automatic text simplification.","keywords":["Automatic Text Simplification","Sentence alignment","Italian corpus"],"pages":"351-361","url":"https:\/\/www.aclweb.org\/anthology\/D\/D16\/D16-1034.pdf","volume":"","doi":"10.18653\/v1\/d16-1034","editors_people":"","editors":[""],"published":"","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-945626-25-8","conference_name":"Conference on Empirical Methods in Natural Language Processing (EMNLP 2016)","conference_place":"Austin, Texas","conference_date":"01-05\/11\/2016"},{"id":132170,"last_updated":"2023-11-06 19:32:53","id_people":366757,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Esplorazioni computazionali nello spazio dell'interlingua: verso una nuova metodologia di indagine","year":2016,"authors_people":"Dell'Orletta F., Montemagni S. e Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"Il presente contributo intende proporre un innovativo approccio all'identificazione delle caratteristiche linguistiche che aiutano a definire l'interlingua. Tale approccio consiste nella ricostruzione del profilo linguistico di corpora di produzioni scritte da apprendenti una lingua seconda basato su strumenti di trattamento automatico del linguaggio.","keywords":["interlingua","annotazione linguistica automatica","monitoraggio linguistico"],"pages":"143-161","url":"https:\/\/www.bulzoni.it\/it\/catalogo\/lingue-in-contatto-contact-linguistics.html","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"Bulzoni Editore (Roma, ITA)","issn":"","isbn":"978-88-6897-029-1","conference_name":"XLVIII Congresso Internazionale di Studi della Societ\u00e0 di Linguistica Italiana (SLI 2014)","conference_place":"Udine","conference_date":"25-27 settembre 2014"},{"id":132167,"last_updated":"2023-11-06 19:33:04","id_people":366752,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"ULISSE: una strategia di adattamento al dominio per l'annotazione sintattica automatica","year":2016,"authors_people":"Dell'Orletta F., Venturi G.","authors_cnr":["Venturi, Giulia","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Venturi, G."],"abstract":"This paper deals with Domain Adaptation for automatic syntactic annotation. Until the half of the 1980s, automatic linguistic annotation was based on algorithms built on groups of hand-written rules, defined a priori on the basis of the knowledge of the system to formalise. Subsequently, thanks to the progress of research in the field of Artificial Intelligence and to the development of linguistic resources, algorithms based on machine learning techniques began to be employed. The major difficulties of those algorithms were due to certain aspects of natural language such as ambiguities, diachronic evolutions, or language variations from the original domain of knowledge. More specifically, the issue of Domain Adaptation can be put in the following terms: \"can an annotated corpus [which is representative of a specific linguistic variety] be used for the syntactic analysis of a second corpus [which is representative of a different linguistic variety]?\". The author answer presenting an algorithm called ULISSE (Unsupervised LInguistically-driven Selection of dEpendency parses), which selects in an optima way the most representative sentences of a new target domain and feed them to the parser in addition to the original training set.","keywords":["Domain Adaptation","annotazione sintattica automatica"],"pages":"55-79","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2016\/10\/Compter_Parler_Soigner_ULISSE.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-88-6952-038-9","conference_name":"Atti del convegno \"Compter parler soigner: tra linguistica e intelligenza artificiale\"","conference_place":"Pavia","conference_date":"15-17 dicembre 2014"},{"id":132201,"last_updated":"2023-11-06 19:32:56","id_people":366723,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"La leggibilit\u00e0 dei testi di ambito medico rivolti al paziente: Il caso dei bugiardini di farmaci senza obbligo di prescrizione medica","year":2016,"authors_people":"Orletti F.; Dell'Orletta F.; Iovino R.","authors_cnr":["Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Orletti, F.","Dell'Orletta, F.","Iovino, R."],"abstract":"In this paper we present the first results of an exploratory analysis of simplification of the package leaflets of medicines, considered representative texts of doctor-patient communication. It will be shown how natural language processing tools can be used to reconstruct the linguistic profile of these texts and to guide their simplification.","keywords":["leggibilit\u00e0"],"pages":"","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85009291162&origin=inward","volume":"1749","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"Third Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Napoli","conference_date":"5-6\/12\/2016"},{"id":132164,"last_updated":"2023-11-06 19:33:01","id_people":366724,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Studio sull'ordinamento dei costituenti nel confronto tra generi e complessit\u00e0","year":2016,"authors_people":"Giulia Pieri, Dominique Brunato, Felice Dell'Orletta","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Pieri, G.","Brunato, D.","Dell'Orletta, F."],"abstract":"In questo articolo presentiamo uno studio sull'ordine dei costituenti in italiano basato su corpora annotati in maniera automatica fino all'analisi sintattica a dipendenze. L'indagine comparativa ha permesso di valutare l'influenza sia del genere testuale sia della complessit\u00e0 linguistica nella distribuzione dei fenomeni di marcatezza sintattica.","keywords":["Complessit\u00e0 linguistica","Corpora annotati","Generi testuali"],"pages":"5","url":"http:\/\/ceur-ws.org\/Vol-1749\/paper44.pdf","volume":"1749","doi":"","editors_people":"Basile, Pierpaolo; Corazza, Anna; Cutugno, Franco; Montemagni, Simonetta; Nissim, Malvina; Patti, Viviana; Semeraro, Giovanni; Sprugnoli, Rachele","editors":["Basile, P.","Corazza, A.","Cutugno, F.","Montemagni, S.","Nissim, M.","Patti, V.","Semeraro, G.","Sprugnoli, R."],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Third Italian Conference on Computational Linguistics (CLiC-it 2016)","conference_place":"Napoli","conference_date":"5-6\/12\/2016"},{"id":132168,"last_updated":"2023-11-06 19:32:52","id_people":366754,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Dieci sfumature di marcatezza sintattica: Verso una nozione computazionale di complessita","year":2016,"authors_people":"Tusa E.; Dell'orletta F.; Montemagni S.; Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Tusa, E.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this work, we will investigate whether and to what extent algorithms typically used to assess the reliability of the output of syntactic parsers can be used to study the correlation between processing complexity and the linguistic notion of markedness. Although still preliminary, achieved results show the key role of features such as dependency direction and length in defining the markedness degrees of a given syntactic construction.","keywords":["marcatezza sintattica","complessit\u00e0 linguistica","annotazione linguistica automatica"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85009279517&origin=inward","volume":"1749","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Napoli","conference_date":"5-6 dicembre 2016"},{"id":132200,"last_updated":"2023-11-06 19:33:14","id_people":366713,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"State of the Art Language Technologies for Italian: The EVALITA 2014 Perspective","year":2015,"authors_people":"Attardi, Giuseppe; Basile, Valerio; Bosco, Cristina; Caselli, Tommaso; Dell'Orletta, Felice; Montemagni, Simonetta; Patti, Viviana; Simi, Maria; Sprugnoli, Rachele","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Attardi, G.","Basile, V.","Bosco, C.","Caselli, T.","Dell'Orletta, F.","Montemagni, S.","Patti, V.","Simi, M.","Sprugnoli, R."],"abstract":"Shared task evaluation campaigns represent a well established form of competitive evaluation, an important opportunity to propose and tackle new challenges for a specific research area and a way to foster the development of benchmarks, tools and resources. The advantages of this approach are evident in any experimental field, including the area of Natural Language Processing. An outlook on state-of-the-art language technologies for Italian can be obtained by reflecting on the results of the recently held workshop \"Evaluation of NLP and Speech Tools for Italian\", EVALITA 2014. The motivations underlying individual shared tasks, the level of knowledge and development achieved within each of them, the impact on applications, society and economy at large as well as directions for future research will be discussed from this perspective.","keywords":["Evaluation Campaign","Natural Language Processing","Dependency Parsing","Sentiment Analysis","Temporal Processing"],"pages":"43-61","url":"https:\/\/publications.cnr.it\/doc\/366713","volume":"9","doi":"10.3233\/IA-150076","editors_people":"","editors":[""],"published":"Intelligenza Artificiale","publisher":"Associazione Italiana per l'Intelligenza Artificiale (Bari, Italia)","issn":"1724-8035","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132127,"last_updated":"2023-11-06 19:33:10","id_people":357152,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Il ruolo delle tecnologie del linguaggio nel monitoraggio dell'evoluzione delle abilit\u00e0 di scrittura: primi risultati","year":2015,"authors_people":"Barbagli A., Lucisano P., Dell'Orletta F., Montemagni S., Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Barbagli, A.","Lucisano, P.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"L'ultimo decennio ha visto l'affermarsi a livello internazionale dell'uso di tecnologie del linguaggio per lo studio dei processi di apprendimento. Questo contributo riporta i primi e promettenti risultati di uno studio interdisciplinare che si \u00e8 avvalso di metodi e tecniche di analisi propri della linguistica computazionale, della linguistica e della pedagogia sperimentale. Lo studio, finalizzato al monitoraggio dell'evoluzione del processo di apprendimento della lingua italiana, \u00e8 stato condotto a partire dalle produzione scritte di studenti della scuola secondaria di primo grado con strumenti di annotazione linguistica automatica e di estrazione di conoscenza e ha portato all'identificazione di un insieme di tratti qualificanti il processo di apprendimento linguistico.","keywords":["evoluzione delle competenze linguistiche","Didattica Sperimentale","Estrazione di conoscenza","Annotazione linguistica automatica"],"pages":"99-117","url":"https:\/\/journals.openedition.org\/ijcol\/326","volume":"","doi":"10.4000\/ijcol.326","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132126,"last_updated":"2023-11-06 19:33:05","id_people":357146,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"CItA: un Corpus di Produzioni Scritte di Apprendenti l'Italiano L1 Annotato con Errori","year":2015,"authors_people":"Alessia Barbagli, Pietro Lucisano, Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Barbagli, A.","Lucisano, P.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In questo articolo presentiamo CItA il primo corpus di produzioni scritte di apprendenti l'italiano L1 del primo e del secondo anno della scuola secondaria di primo grado annotato con errori grammaticali, ortografici e lessicali. Le specificit\u00e0 del corpus e la sua natura diacronica lo rendono particolarmente utile sia per applicazioni linguistico-computazionali sia per studi socio-pedagogici.","keywords":["Apprendiemento della lingua madre","evoluzione delle competenze linguistiche"],"pages":"31-35","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2016\/03\/CItA_errori.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"Accademia University Press (Torino, ITA)","issn":"","isbn":"978-88-99200-62-6","conference_name":"2nd Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Trento","conference_date":"3-4 dicembre 2015"},{"id":132135,"last_updated":"2023-11-06 19:33:12","id_people":359256,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"ISACCO: a corpus for investigating spoken and written language development in Italian school-age children","year":2015,"authors_people":"D. Brunato, F. Dell'Orletta","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F."],"abstract":"We present ISACCO (Italian school-age children corpus)1, a new corpus of oral and written retellings of Italian speaking children attending the primary school. All texts were digitalized and automatically enriched with linguistic information allowing preliminary explorations based on NLP features. Written retellings were also manually annotated with a typology of linguistic errors. The resource is conceived to support research and computational modeling of \"later language acquisition\", with an emphasis for comparative assessment of oral and written language skills across early school grades.","keywords":["Child language acquisition","Oral and written language","multi-level linguistic analysis"],"pages":"62-66","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2016\/03\/IsaccoCorpus.pdf","volume":"","doi":"","editors_people":"Cristina Bosco, Sara Tonelli, Fabio Massimo Zanzotto","editors":["Bosco, C.","Tonelli, S.","Zanzotto, F. M."],"published":"Proceedings of the Second Italian Conference on Computational Linguistics (CLiC-it 2015)","publisher":"Accademia University Press (Torino, ITA)","issn":"","isbn":"978-88-99200-62-6","conference_name":"Second Italian Conference on Computational Linguistics (CLiC-it 2015)","conference_place":"Trento","conference_date":"03\/12\/2015-04\/12\/2015"},{"id":132076,"last_updated":"2023-11-06 19:33:09","id_people":332693,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Design and Annotation of the First Italian Corpus for Text Simplification","year":2015,"authors_people":"Brunato D., Dell'Orletta F., Venturi G., Montemagni S.","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"In this paper, we present design and construction of the first Italian corpus for automatic and semi--automatic text simplification. In line with current approaches, we propose a new annotation scheme specifically conceived to identify the typology of changes an original sentence undergoes when it is manually simplified. Such a scheme has been applied to two aligned Italian corpora, containing original texts with corresponding simplified versions, selected as representative of two different manual simplification strategies and addressing different target reader populations. Each corpus was annotated with the operations foreseen in the annotation scheme, covering different levels of linguistic description. Annotation results were analysed with the final aim of capturing peculiarities and differences of the different simplification strategies pursued in the two corpora.","keywords":["Annotation Scheme","Automatic Text Simplification"],"pages":"31-34","url":"https:\/\/aclweb.org\/anthology\/W\/W15\/W15-1604.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-941643-47-1","conference_name":"Proceedings of LAW IX-The 9th Linguistic Annotation Workshop","conference_place":"Denver, Colorado","conference_date":"5 giugno 2015"},{"id":129242,"last_updated":"2023-11-06 19:33:08","id_people":337237,"institutes":["IIT","ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Crisis Mapping during Natural Disasters via Text Analysis of Social Media Messages","year":2015,"authors_people":"S. Cresci(1); A. Cimino (1); F. Dell'Orletta (2); M. Tesconi (1)","authors_cnr":["Cimino, Andrea","Cresci, Stefano","Tesconi, Maurizio","Dell'Orletta, Felice"],"authors_cnr_id":["11029","14329"],"authors_cnr_institute":["048","044","044","048"],"authors":["Cresci, S.","Cimino, A.","Dell'Orletta, F.","Tesconi, M."],"abstract":"Recent disasters demonstrated the central role of social media during emergencies thus motivating the exploitation of such data for crisis mapping. We propose a crisis mapping system that addresses limitations of current state-of-the-art approaches by analyzing the textual content of disaster reports from a twofold perspective. A damage detection component employs a SVM classifier to detect mentions of damage among emergency reports. A novel geoparsing technique is proposed and used to perform message geolocation. We report on a case study to show how the information extracted through damage detection and message geolocation can be combined to produce accurate crisis maps. Our crisis maps clearly detect both highly and lightly damaged areas, thus opening up the possibility to prioritize rescue efforts where they are most needed.","keywords":["crisis informatics","Emergency Management","geoparsing","social media mining","Twitter"],"pages":"1-8","url":"https:\/\/publications.cnr.it\/doc\/337237","volume":"","doi":"","editors_people":"","editors":[""],"published":"Lecture notes in computer science","publisher":"Springer (Berlin, Germania)","issn":"0302-9743","isbn":"","conference_name":"Web Information Systems Engineering-WISE 2015","conference_place":"Miami, USA","conference_date":"02\/11\/2015"},{"id":129230,"last_updated":"2023-11-06 19:33:04","id_people":336952,"institutes":["IIT","ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"A Linguistically-driven Approach to Cross-Event Damage Assessment of Natural Disasters from Social Media Messages","year":2015,"authors_people":"S. Cresci, M. Tesconi, A. Cimino, F. Dell'Orletta","authors_cnr":["Cresci, Stefano","Tesconi, Maurizio","Dell'Orletta, Felice"],"authors_cnr_id":["11029","14329"],"authors_cnr_institute":[""],"authors":["Cresci, S.","Tesconi, M.","Cimino, A.","Dell'Orletta, F."],"abstract":"This work focuses on the analysis of Italian social media messages for disaster management and aims at the detection of messages carrying critical information for the damage assessment task. A main novelty of this study consists in the focus on out-domain and cross-event damage detection, and on the investigation of the most relevant tweet-derived features for these tasks. We devised different experiments by resorting to a wide set of linguistic features qualifying the lexical and grammatical structure of a text as well as ad-hoc features specifically implemented for this task. We investigated the most effective features that allow to achieve the best results. A further result of this study is the construction of the first manually annotated Italian corpus of social media messages for damage assessment.","keywords":["crisis informatics","Damage assessment","Emergency Management","feature selection","social media mining","Social Sensing"],"pages":"6","url":"https:\/\/publications.cnr.it\/doc\/336952","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Proceedings of the 24th international conference companion on World Wide Web. ACM, 2015","conference_place":"Florence, Italy","conference_date":"18\/05\/2015"},{"id":132203,"last_updated":"2023-11-06 19:33:07","id_people":346045,"institutes":["ILC","ISTI"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"CMT and FDE: tools to bridge the gap between natural language documents and feature diagrams","year":2015,"authors_people":"Ferrari A.; Spagnolo G. O.; Gnesi S.; Dell'Orletta F.","authors_cnr":["Dell'Orletta, Felice","Gnesi, Stefania","Ferrari, Alessio","Spagnolo, Giorgio Oronzo"],"authors_cnr_id":["7405","15226","15822"],"authors_cnr_institute":["074","074","074","074"],"authors":["Ferrari, A.","Spagnolo, G. O.","Gnesi, S.","Dell'Orletta, F."],"abstract":"A business subject who wishes to enter an established technological market is required to accurately analyse the features of the products of the different competitors. Such features are normally accessible through natural language (NL) brochures, or NL Web pages, which describe the products to potential customers. Building a feature model that hierarchically summarises the different features available in competing products can bring relevant benefits in market analysis. A company can easily visualise existing features, and reason about aspects that are not covered by the available solutions. However, designing a feature model starting from publicly available documents of existing products is a time consuming and error-prone task. In this paper, we present two tools, namely Commonality Mining Tool (CMT) and Feature Diagram Editor (FDE), which can jointly support the feature model definition process. CMT allows mining common and variant features from NL descriptions of existing products, by leveraging a natural language processing (NLP) approach based on contrastive analysis, which allows identifying domain-relevant terms from NL documents. FDE takes the commonalities and variabilities extracted by CMT, and renders them in a visual form. Moreover, FDE allows the graphical design and refinement of the final feature model, by means of an intuitive GUI","keywords":["Software Product Lines","Variability Mining","Tools"],"pages":"402-410","url":"http:\/\/dl.acm.org\/citation.cfm?doid=2791060.2791117","volume":"","doi":"10.1145\/2791060.2791117","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-4503-3613-0","conference_name":"19th International Conference on Software Product Line","conference_place":"Nashville, TN, USA","conference_date":"20-24\/07\/2015"},{"id":132125,"last_updated":"2023-11-06 19:33:15","id_people":357144,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Tracking the Evolution of Written Language Competence: an NLP-based Approach","year":2015,"authors_people":"Richter S., Cimino A., Dell'Orletta F., Venturi G.","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":["048","048","048"],"authors":["Richter, S.","Cimino, A.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper, we present an NLP-based innovative approach for tracking the evolution of written language competence relying on different sets of linguistic features that predict text quality. This approach was tested on a corpus essays written by Italian L1 learners of the first and second year of the lower secondary school.","keywords":["Evolution of Written Language Competence","multi-level linguistic analysis"],"pages":"236-240","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2016\/03\/tracking-language-competence.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"Accademia University Press (Torino, ITA)","issn":"","isbn":"978-88-99200-62-6","conference_name":"2nd Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Trento","conference_date":"3-4 dicembre 2015"},{"id":132089,"last_updated":"2023-11-06 19:33:12","id_people":340387,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"NLP-Based Readability Assessment of Health-Related Texts: a Case Study on Italian Informed Consent Forms","year":2015,"authors_people":"Giulia Venturi, Tommaso Bellandi, Felice Dell'Orletta, Simonetta Montemagni","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Venturi, G.","Bellandi, T.","Dell'Orletta, F.","Montemagni, S."],"abstract":"The paper illustrates the results of a case study aimed at investigating and enhancing the accessibility of Italian health-related documents by relying on advanced NLP techniques, with particular attention to informed consent forms. Results achieved show that the features automatically extracted from the linguistically annotated text and ranging across different levels of linguistic description have a high discriminative power in order to guarantee a reliable readability assessment.","keywords":["Readability assessment","health-related information"],"pages":"131-141","url":"http:\/\/www.aclweb.org\/anthology\/W15-2618","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-941643-32-7","conference_name":"Sixth International Workshop on Health Text Mining and Information Analysis (Louhi)","conference_place":"Lisbona","conference_date":"17 settembre 2015"},{"id":131881,"last_updated":"2023-11-06 19:33:16","id_people":285640,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Assessing document and sentence readability in less resourced languages and across textual genres","year":2014,"authors_people":"Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this paper, we tackle three underresearched issues of the automatic readability assessment literature, namely the evaluation of text readability in less resourced languages, with respect to sentences (as opposed to documents) as well as across textual genres. Different solutions to these issues have been tested by using and refining READ-IT, the first advanced readability assessment tool for Italian, which combines traditional raw text features with lexical, morpho-syntactic and syntactic information. In READ-IT readability assessment is carried out with respect to both documents and sentences, with the latter constituting an important novelty of the proposed approach: READ-IT shows a high accuracy in the document classification task and promising results in the sentence classification scenario. By comparing the results of two versions of READ-IT, adopting a classification- versus ranking-based approach, we also show that readability assessment is strongly influenced by textual genre; for this reason a genre-oriented notion of readability is needed. With classification-based approaches, reliable results can only be achieved with genre-specific models: Since this is far from being a workable solution, especially for less resourced languages, a new ranking method for readability assessment is proposed, based on the notion of distance.","keywords":["readability assessment","less resourced languages","multi-level linguistic annotation","textual genres"],"pages":"163-193","url":"http:\/\/www.ingentaconnect.com\/content\/jbp\/itl\/2014\/00000165\/00000002\/art00005","volume":"165","doi":"10.1075\/itl.165.2.03del","editors_people":"","editors":[""],"published":"ITL. Internationaler technischer Literaturanzeiger (Online)","publisher":"Peeters Publishers (Leuven, Belgio)","issn":"1783-1490","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132064,"last_updated":"2023-11-06 19:33:25","id_people":330112,"institutes":["ILC"],"type":"edited_volume","type_order":3,"type_people":"book","title":"Proceedings of the Fourth International Workshop EVALITA 2014","year":2014,"authors_people":"Cristina Bosco, Piero Cosi, Felice Dell'Orletta, Mauro Falcone, Simonetta Montemagni, Maria Simi","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048"],"authors":["Bosco, C.","Cosi, P.","Dell'Orletta, F.","Falcone, M.","Montemagni, S.","Simi, M."],"abstract":"","keywords":["Trattamento Automatico del Linguaggio","Speech Processing","Lingua Italiana"],"pages":"167","url":"http:\/\/clic.humnet.unipi.it\/proceedings\/Proceedings-EVALITA-2014.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"Pisa University Press (Pisa, ITA)","issn":"","isbn":"978-88-67414-72-7","conference_name":"","conference_place":"","conference_date":""},{"id":131990,"last_updated":"2023-11-06 19:33:27","id_people":294078,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Tecnologie del linguaggio e monitoraggio dell'evoluzione delle abilit\u00e0 di scrittura nella scuola secondaria di primo grado","year":2014,"authors_people":"Barbagli A., Lucisano P., Dell'Orletta F., Montemagni S., Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Barbagli, A.","Lucisano, P.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"L'ultimo decennio ha visto l'affermarsi a livello internazionale dell'uso di tecnologie del linguaggio per lo studio dei processi di apprendimento. Questo contributo, che si colloca all'interno di una ricerca pi\u00f9 ampia di pedagogia sperimentale, riporta i primi e promettenti risultati di uno studio finalizzato al monitoraggio dell'evoluzione del processo di apprendimento della lingua italiana condotto a partire dalle produzione scritte degli studenti con strumenti di annotazione linguistica automatica e di estrazione di conoscenza.","keywords":[""],"pages":"23-27","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2014\/12\/Tecnologie-del-linguaggio-per-la-scuola.pdf","volume":"","doi":"10.12871\/CLICIT201415","editors_people":"Roberto Basili, Alessandro Lenci, Bernardo Magnini","editors":["Basili, R.","Lenci, A.","Magnini, B."],"published":"Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)","publisher":"Pisa University Press srl (Pisa, ITA)","issn":"","isbn":"978-8-86741-472-7","conference_name":"First Italian Conference on Computational Linguistics (CLiC-it 2014)","conference_place":"Pisa","conference_date":"9-11 dicembre 2014"},{"id":131935,"last_updated":"2023-11-06 19:33:19","id_people":288050,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Computational Analysis of Historical Documents: An Application to Italian War Bulletins in World War I and II","year":2014,"authors_people":"Boschetti F., Cimino A., Dell'Orletta F., Lebani G.E., Passaro L., Picchi P., Venturi G., Montemagni S. Lenci A.","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Cimino, Andrea","Montemagni, Simonetta","Picchi, Paolo","Boschetti, Federico"],"authors_cnr_id":["5595","12761","14630"],"authors_cnr_institute":[""],"authors":["Boschetti, F.","Cimino, A.","Dell'Orletta, F.","Lebani, G. E.","Passaro, L.","Picchi, P.","Venturi, G.","Montemagni, S.","Lenci, A."],"abstract":"World War (WW) I and II represent crucial landmarks in the history on mankind: They have affected the destiny of whole generations and their consequences are still alive throughout Europe. In this paper we present an ongoing project to carry out a computational analysis of Italian war bulletins in WWI and WWII, by applying state-of-the-art tools for NLP and Information Extraction. The annotated texts and extracted information will be explored with a dedicated Web interface, allowing for multidimensional access and exploration of historical events through space and time.","keywords":["World War I"],"pages":"70-75","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/workshops\/LREC2014Workshop-LRT4HDA%20Proceedings.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of workshop on Language resources and technologies for processing and linking historical documents and archives-Deploying Linked Open Data in Cultural Heritage-LREC 2014, 26 May, Reykjavik, Iceland","publisher":"European language resources association (ELRA) (Paris, FRA)","issn":"","isbn":"","conference_name":"LREC 2014","conference_place":"Reykjavik","conference_date":"26 May"},{"id":131989,"last_updated":"2023-11-06 19:33:21","id_people":294073,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Defining an annotation scheme with a view to automatic text simplification","year":2014,"authors_people":"Brunato D., Dell'Orletta F., Venturi G., Montemagni S.","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"This paper presents the preliminary steps of ongoing research in the field of automatic text simplification. In line with current approaches, we propose here a new annotation scheme specifically conceived to identify the typologies of changes an original sentence undergoes when it is manually simplified. Such a scheme has been tested on a parallel corpus available for Italian, which we have first aligned at sentence level and then annotated with simplification rules.","keywords":[""],"pages":"87-92","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2014\/12\/Text-simplification.pdf","volume":"","doi":"10.12871\/CLICIT2014118","editors_people":"Roberto Basili, Alessandro Lenci, Bernardo Magnini","editors":["Basili, R.","Lenci, A.","Magnini, B."],"published":"Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)","publisher":"Pisa University Press srl (Pisa, ITA)","issn":"","isbn":"978-8-86741-472-7","conference_name":"First Italian Conference on Computational Linguistics (CLiC-it 2014)","conference_place":"Pisa","conference_date":"9-11 dicembre 2014"},{"id":128949,"last_updated":"2023-11-06 19:33:22","id_people":294105,"institutes":["IIT","ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linguistically-motivated and Lexicon Features for Sentiment Analysis of Italian Tweets","year":2014,"authors_people":"Andrea Cimino, Stefano Cresci, Felice Dell'Orletta, Maurizio Tesconi","authors_cnr":["Cresci, Stefano","Tesconi, Maurizio","Dell'Orletta, Felice"],"authors_cnr_id":["11029","14329"],"authors_cnr_institute":[""],"authors":["Cimino, A.","Cresci, S.","Dell'Orletta, F.","Tesconi, M."],"abstract":"In this paper we describe our approach to EVALITA 2014 SENTIment POLarity Classification (SENTIPOLC) task. We participated only in the Polarity Classification sub-task. By resorting to a wide set of general-purpose features qualifying the lexical and grammatical structure of a text, automatically created ad-hoc lexicons and existing free available resources, we achieved the second best accuracy.","keywords":["Lexicons resources"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/294105","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"The 4th Conference for Evaluation of NLP and Speech Tools for Italian (EVALITA)","conference_place":"Pisa","conference_date":"2014"},{"id":131883,"last_updated":"2023-11-06 19:33:26","id_people":285670,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"T2K: a System for Automatically Extracting and Organizing Knowledge from Texts","year":2014,"authors_people":"Felice Dell'Orletta, Giulia Venturi, Andrea Cimino, Simonetta Montemagni","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Dell'Orletta, F.","Venturi, G.","Cimino, A.","Montemagni, S."],"abstract":"In this paper, we present T2K, a suite of tools for automatically extracting domain-specific knowledge from collections of Italian and English texts. T2K (Text-To-Knowledge v2) relies on a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine learning which are dynamically integrated to provide an accurate and incremental representation of the content of vast repositories of unstructured documents. Extracted knowledge ranges from domain-specific entities and named entities to the relations connecting them and can be used for indexing document collections with respect to different information types. T2K also includes \"linguistic profiling\" functionalities aimed at supporting the user in constructing the acquisition corpus, e.g. in selecting texts belonging to the same genre or characterized by the same degree of specialization or in monitoring the \"added value\" of newly inserted documents. T2K is a web application which can be accessed from any browser through a personal account which has been tested in a wide range of domains.","keywords":["Natural Language Processing","Information Extraction","Knowledge Management"],"pages":"2062-2070","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/pdf\/590_Paper.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-2-9517408-8-4","conference_name":"International Conference on Language Resources and Evaluation (LREC)","conference_place":"Reykjavik","conference_date":"26-31 maggio 2014"},{"id":131991,"last_updated":"2023-11-06 19:33:17","id_people":294084,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Assessing the readability of sentences: which corpora and features?","year":2014,"authors_people":"Dell'Orletta F., Wieling M., Cimino A., Venturi G., Montemagni S.","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Dell'Orletta, F.","Wieling, M.","Cimino, A.","Venturi, G.","Montemagni, S."],"abstract":"The paper investigates the problem of sentence readability assessment, which is modelled as a classification task, with a specific view to text simplification. In particular, it addresses two open issues connected with it, i.e. the corpora to be used for training, and the identification of the most effective features to determine sentence readability. An existing readability assessment tool developed for Italian was specialized at the level of training corpus and learning algorithm. A maximum entropy-based feature selection and ranking algorithm (grafting) was used to identify to the most relevant features: it turned out that assessing the readability of sentences is a complex task, requiring a high number of features, mainly syntactic ones.","keywords":[""],"pages":"163-173","url":"http:\/\/acl2014.org\/acl2014\/W14-18\/pdf\/W14-1820.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of 9th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2014)","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-941643-03-7","conference_name":"9th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2014)","conference_place":"Baltimore, Maryland, USA","conference_date":"26 giugno 2014"},{"id":132044,"last_updated":"2023-11-06 19:33:23","id_people":294419,"institutes":["ILC","ISTI"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Measuring and improving the completeness of natural language requirements","year":2014,"authors_people":"Ferrari A.; Dell'Orletta F.; Spagnolo G.O.; Gnesi S.","authors_cnr":["Dell'Orletta, Felice","Ferrari, Alessio","Spagnolo, Giorgio Oronzo","Gnesi, Stefania"],"authors_cnr_id":["7405"],"authors_cnr_institute":["048","074","074","074"],"authors":["Ferrari, A.","Dell'Orletta, F.","Spagnolo, G. O.","Gnesi, S."],"abstract":"[Context and motivation] System requirements specifications are normally written in natural language. These documents are required to be complete with respect to the input documents of the requirements definition phase, such as preliminary specifications, transcripts of meetings with the customers, etc. In other terms, they shall include all the relevant concepts and all the relevant interactions among concepts expressed in the input documents. [Question\/Problem] Means are required to measure and improve the completeness of the requirements with respect to the input documents. [Principal idea\/results] To measure this completeness, we propose two metrics that take into account the relevant terms of the input documents, and the relevant relationships among terms. Furthermore, to improve the completeness, we present a natural language processing tool named Completeness Assistant for Requirements (CAR), which supports the definition of the requirements: the tool helps the requirements engineer in discovering relevant concepts and interactions. [Contribution] We have performed a pilot test with CAR, which shows that the tool can help improving the completeness of the requirements with respect to the input documents. The study has also shown that CAR is actually useful in the identification of specific\/alternative system behaviours that might be overseen without the tool. \u00a9 2014 Springer International Publishing Switzerland.","keywords":["natural language processing","relation extraction","Requirements analysis"],"pages":"23-38","url":"https:\/\/link.springer.com\/chapter\/10.1007%2F978-3-319-05843-6_3#citeas","volume":"8396","doi":"10.1007\/978-3-319-05843-6_3","editors_people":"Camille Salinesi, Inge van de Weerd","editors":["Salinesi, C.","Van De Weerd, I."],"published":"Requirements Engineering: Foundation for Software Quality 20th International Working Conference, REFSQ 2014, Essen, Germany, April 7-10, 2014. Proceedings","publisher":"","issn":"","isbn":"978-3-319-05843-6","conference_name":"REFSQ 2014, Requirements Engineering: Foundation for Software Quality. 20th International Working Conference","conference_place":"Essen, Germany","conference_date":"7-10 April 2014"},{"id":131979,"last_updated":"2023-11-06 19:33:28","id_people":289308,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"The PAIS\u00c0 Corpus of Italian Web Texts","year":2014,"authors_people":"Verena Lyding, Egon Stemle, Claudia Borghetti, Marco Brunello, Sara Castagnoli, Felice Dell'Orletta, Henrik Dittmann, Alessandro Lenci, Vito Pirrelli","authors_cnr":["Pirrelli, Vito","Dell'Orletta, Felice"],"authors_cnr_id":["326","14329"],"authors_cnr_institute":["048","048"],"authors":["Lyding, V.","Stemle, E.","Borghetti, C.","Brunello, M.","Castagnoli, S.","Dell'Orletta, F.","Dittmann, H.","Lenci, A.","Pirrelli, V."],"abstract":"PAIS`A is a Creative Commons licensed, large web corpus of contemporary Italian. We describe the design, harvesting, and processing steps involved in its creation.","keywords":[""],"pages":"36-43","url":"http:\/\/aclweb.org\/anthology\/W14-04","volume":"","doi":"","editors_people":"Felix Bildhauer, Roland Sch\u00e4fer","editors":["Bildhauer, F.","Sch\u00e4fer, R."],"published":"Proceedings of the 9th Web as Corpus Workshop (WaC-9)","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"","conference_name":"Corpus annotation, Tree-bank, Corpus design, Corpus harvesting","conference_place":"Gothenburg. Sweden","conference_date":"April 26, 2014"},{"id":132004,"last_updated":"2023-11-06 19:33:33","id_people":310619,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Linguistically-driven selection of correct arcs for dependency parsing","year":2013,"authors_people":"Dell'Orletta F.; Venturi G.; Montemagni S.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"LISCA is an unsupervised algorithm aimed at assigning a quality score to each arc generated by a dependency parser in order to produce a decreasing ranking of arcs from correct to incorrect ones. LISCA exploits statistics about a set of linguistically-motivated and dependency-based features extracted from a large corpus of automatically parsed sentences and uses them to assign a quality score to each arc of a parsed sentence belonging to the same domain of the automatically parsed corpus. LISCA has been successfully tested on two datasets belonging to two different domains and in all experiments it turned out to outperform different baselines, thus showing to be able to reliably detect correct arcs also representing domain-specific peculiarities.","keywords":["Correct arcs","Dependency parsing"],"pages":"125-136","url":"http:\/\/cys.cic.ipn.mx\/ojs\/index.php\/CyS\/article\/view\/1517","volume":"17","doi":"","editors_people":"","editors":[""],"published":"Computaci\u00f3n y Sistemas","publisher":"","issn":"1405-5546","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131887,"last_updated":"2023-11-06 19:33:29","id_people":285671,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Automatic extraction of function-behaviour-state information from patents","year":2013,"authors_people":"Fantoni G.; Apreda R.; Dell'Orletta F.; Monge M.","authors_cnr":["Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":["048"],"authors":["Fantoni, G.","Apreda, R.","Dell'Orletta, F.","Monge, M."],"abstract":"Patents contain a large quantity of technical information not available elsewhere and therefore very interesting for both academia and industry. The purpose of the research is to try to detect and extract information about the functions, the physical behaviours and the states of the system directly from the text of a patent in an automatic way. The above three categories constitute a well-known set of relevant entities in the theory of engineering design, and their study allows powerful analysis of individual artefacts as well as that of groups of products or technologies. The focus is in providing a handy tool that could speed up and facilitate human analysis and allow tackling also large corpora of documents. A second goal is to develop a protocol based on free software and database resources, so that it could be replicable with limited effort by everyone without having to rely on commercial databases. Extracting technical and design information from a document whose aim is more legal than technical, and that is written using a specific jargon, is not a trivial task. The approach chosen to overcome the various issues is to support state-of-the-art Computational Linguistic tools with a large Knowledge Base. The latter has been constructed both manually and automatically and comprises not only keywords but also concepts, relationships and regular expressions. A case study about a very recent patent describing a mechanical device has been included to show the functioning and output of the entire system. \u00a9 2013 Elsevier Ltd. All rights reserved.","keywords":["Function-Behaviour-Structure","Patent informatics","Product development","Semantic elaboration"],"pages":"317-334","url":"http:\/\/www.sciencedirect.com\/science\/article\/pii\/S1474034613000487","volume":"27","doi":"10.1016\/j.aei.2013.04.004","editors_people":"","editors":[""],"published":"Advanced engineering informatics","publisher":"Elsevier Science (Oxford, Regno Unito)","issn":"1474-0346","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":123661,"last_updated":"2023-11-06 19:33:30","id_people":266373,"institutes":["ILC","ITTIG","IGSG"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Domain Adaptation for Dependency Parsing at EVALITA 2011","year":2013,"authors_people":"F. Dell'Orletta and S. Marchi and S. Montemagni and G. Venturi and T. Agnoloni and E. Francesconi","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Marchi, Simone","Francesconi, Enrico","Agnoloni, Tommaso","Dell'Orletta, Felice"],"authors_cnr_id":["5595","10442","10498","11403","14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Marchi, S.","Montemagni, S.","Venturi, G.","Agnoloni, T.","Francesconi, E."],"abstract":"The domain adaptation task was aimed at investigating techniques for adapting state-of-the-art dependency parsing systems to new domains. Both the language dealt with, i.e. Italian, and the target do- main, namely the legal domain, represent two main novelties of the task organised at Evalita 2011 with respect to previous domain adaptation ini- tiatives. In this paper, we define the task and describe how the datasets were created from different resources. In addition, we characterize the different approaches of the participating systems, report the test results, and provide a first analysis of these results.","keywords":["Dependency Parsing","Domain Adaptation","Self-training","Active Learning","Legal-NLP"],"pages":"58-69","url":"https:\/\/publications.cnr.it\/doc\/266373","volume":"7689","doi":"","editors_people":"Bernardo Magnini, Francesco Cutugno, Mauro Falcone, Emanuele Pianta","editors":["Magnini, B.","Cutugno, F.","Falcone, M.","Pianta, E."],"published":"Evaluation of NLP and Speech Tools for Italian","publisher":"Springer (Berlin Heidelberg, DEU)","issn":"","isbn":"978-3-642-35827-2","conference_name":"","conference_place":"","conference_date":""},{"id":131888,"last_updated":"2023-11-06 19:33:31","id_people":285772,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linguistic Profiling based on General-purpose Features and Native Language Identification","year":2013,"authors_people":"Andrea Cimino, Felice Dell'Orletta, Giulia Venturi and Simonetta Montemagni","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Cimino, A.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"In this paper, we describe our approach to native language identification and discuss the results we submitted as participants to the First NLI Shared Task. By resorting to a wide set of general-purpose features qualifying the lexical and grammatical structure of a text, rather than to ad hoc features specifically selected for the NLI task, we achieved encouraging results, which show that the proposed approach is general-purpose and portable across different tasks, domains and languages.","keywords":["Native Language Identification","Linguistic Profiling"],"pages":"207-215","url":"http:\/\/www.aclweb.org\/anthology\/W13-1727","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-937284-47-3","conference_name":"8th workshop on \"Innovative Use of NLP for Building Educational Applications\"","conference_place":"Atlanta (Georgia)","conference_date":"13 giugno 2013"},{"id":131886,"last_updated":"2023-11-06 19:33:32","id_people":278421,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linguistic Profiling of Texts Across Textual Genre and Readability Level. An exploratory Study on Italian Fictional Prose","year":2013,"authors_people":"Dell'Orletta F and Montemagni S and VENTURI G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"","keywords":[""],"pages":"189-197","url":"https:\/\/publications.cnr.it\/doc\/278421","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of Recent Advances in Natural Language Processing (RANLP 2013)","publisher":"","issn":"","isbn":"","conference_name":"Recent Advances in Natural Language Processing (RANLP 2013)","conference_place":"Hissar, Bulgaria","conference_date":"7-13 settembre"},{"id":131889,"last_updated":"2023-11-06 19:33:36","id_people":285773,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Unsupervised Linguistically-Driven Reliable Dependency Parses Detection and Self-Training for Adaptation to the Biomedical Domain","year":2013,"authors_people":"Felice Dell'Orletta, Giulia Venturi, Simonetta Montemagni","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"In this paper, a new self-training method for domain adaptation is illustrated, where the selection of reliable parses is carried out by an unsupervised linguistically-driven algorithm, ULISSE. The method has been tested on biomedical texts with results showing a significant improvement with respect to considered baselines, which demonstrates its ability to capture both reliability of parses and domain-specificity of linguistic constructions.","keywords":["Self-training","Domain Adaptation","Biomedical Texts"],"pages":"45-53","url":"http:\/\/www.aclweb.org\/anthology\/W13-1906","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-937284-55-8","conference_name":"12th workshop on \"Biomedical Natural Language Processing\" (BioNLP)","conference_place":"Sofia (Bulgaria)","conference_date":"8-9 agosto 2013"},{"id":131848,"last_updated":"2023-11-06 19:33:34","id_people":277748,"institutes":["ILC","ISTI"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Mining commonalities and variabilities from natural language documents","year":2013,"authors_people":"Ferrari A., Spagnolo G.O., Dell'Orletta F.","authors_cnr":["Spagnolo, Giorgio Oronzo","Dell'Orletta, Felice","Ferrari, Alessio"],"authors_cnr_id":["14329","15226"],"authors_cnr_institute":["074","048","074"],"authors":["Ferrari, A.","Spagnolo, G. O.","Dell'Orletta, F."],"abstract":"A company who wishes to enter an established marked with a new, competitive product is required to analyse the product solutions of the competitors. Identifying and comparing the features provided by the other vendors might greatly help during the market analysis. However, mining common and variant features of from the publicly available documents of the competitors is a time consuming and error-prone task. In this paper, we suggest to employ a natural language processing approach based on textit{contrastive analysis} to identify commonalities and variabilities from the brochures of a group of vendors. We present a first step towards a practical application of the approach, in the the context of the market of Communications-Based Train Control (CBTC) systems.","keywords":["Software Product Lines","Variability Mining","CBTC","D. 2 SOFTWARE ENGINEERING","68N30"],"pages":"116-120","url":"http:\/\/dl.acm.org\/citation.cfm?id=2491634","volume":"","doi":"","editors_people":"Tomoji Kishi","editors":["Kishi, T."],"published":"","publisher":"","issn":"","isbn":"978-1-4503-1968-3","conference_name":"SPLC 2013-17th International Software Product Line Conference","conference_place":"Tokyo, Japan","conference_date":"26-30 August 2013"},{"id":132003,"last_updated":"2023-11-06 19:33:39","id_people":310580,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Lessico settoriale e lessico comune dell'estrazione di terminologia specialistica da corpora di dominio","year":2012,"authors_people":"Bonin F., Dell'Orletta F., Montemagni S., Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Bonin, F.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"","keywords":[""],"pages":"207-220","url":"https:\/\/publications.cnr.it\/doc\/310580","volume":"","doi":"","editors_people":"","editors":[""],"published":"Lessico e Lessicologia. Atti del XLIV congresso internazionale di studi della societ\u00e0 di linguistica italiana","publisher":"Bulzoni Editore (Roma, ITA)","issn":"","isbn":"978-88-7870-655-2","conference_name":"XLIV congresso internazionale di studi della societ\u00e0 di linguistica italiana","conference_place":"Viterbo","conference_date":"27-29 settembre 2010"},{"id":131746,"last_updated":"2023-11-06 19:33:41","id_people":219489,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"The SPLeT-2012 Shared Task on Dependency Parsing of Legal Texts","year":2012,"authors_people":"Dell'Orletta, Felice [1]; Marchi, Simone [1]; Montemagni, Simonetta [1]; Plank, Barbara [2]; Venturi, Giulia [3]","authors_cnr":["Montemagni, Simonetta","Marchi, Simone","Dell'Orletta, Felice"],"authors_cnr_id":["5595","10442","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Marchi, S.","Montemagni, S.","Plank, B.","Venturi, G."],"abstract":"The 4th Workshop on \"Semantic Processing of Legal Texts\" (SPLeT-2012) presents the first multilingual shared task on Dependency Parsing of Legal Texts. In this paper, we define the general task and its internal organization into sub-tasks, describe the datasets and the domain-specific linguistic peculiarities characterizing them. We finally report the results achieved by the participating systems, describe the underlying approaches and provide a first analysis of the final test results.","keywords":["Dependency Parsing","Domain Adaptation","Legal Text Processing"],"pages":"","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2012\/workshops\/27.LREC%202012%20Workshop%20Proceedings%20SPLeT.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Fourth Workshop on Semantic Processing of Legal Texts (SPLeT 2012)-First Shared Task on Dependency Parsing of Legal Texts (SPLeT 2012)","conference_place":"Istanbul","conference_date":"27 Maggio 2012"},{"id":131745,"last_updated":"2023-11-06 19:33:37","id_people":219483,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Domain Adaptation for Dependency Parsing at Evalita 2011","year":2012,"authors_people":"Dell'Orletta, Felice [1]; Marchi, Simone [1]; Montemagni, Simonetta [1]; Venturi, Giulia [2]; Agnoloni, Tommaso [3]; Francesconi, Enrico [3]","authors_cnr":["Agnoloni, Tommaso","Montemagni, Simonetta","Marchi, Simone","Francesconi, Enrico","Dell'Orletta, Felice"],"authors_cnr_id":["5595","10442","10498","14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Marchi, S.","Montemagni, S.","Venturi, G.","Agnoloni, T.","Francesconi, E."],"abstract":"The domain adaptation task was aimed at investigating techniques for adapting state-of-the-art dependency parsing systems to new domains. Both the language dealt with, i.e. Italian, and the target domain, namely the legal domain, represent two main novelties of the task organised at Evalita 2011. In this paper, we define the task and describe how the datasets were created from different resources. In addition, we characterize the different approaches of the participating systems, report the test results, and provide a first analysis of these results.","keywords":["Dependency Parsing","Domain Adaptation","Legal Text Processing"],"pages":"1-7","url":"http:\/\/www.evalita.it\/sites\/evalita.fbk.eu\/files\/working_notes2011\/Domain_Adaptation\/","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Evaluation of NLP and Speech Tools for Italian (EVALITA 2011): Domain Adaptation track","conference_place":"Roma","conference_date":"24-25 Gennaio 2012"},{"id":131885,"last_updated":"2023-11-06 19:33:38","id_people":278420,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Genre-oriented Readability Assessment: a Case Study","year":2012,"authors_people":"Dell'Orletta F and Montemagni S and VENTURI G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"","keywords":[""],"pages":"91-98","url":"https:\/\/publications.cnr.it\/doc\/278420","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of Workshop on \"Speech and Language Processing Tools in Education\" (SLP-TED)","publisher":"","issn":"","isbn":"978-1-62748-389-6","conference_name":"Workshop on \"Speech and Language Processing Tools in Education\" (SLP-TED)","conference_place":"Mumbai, India","conference_date":"15 December, 2012"},{"id":132015,"last_updated":"2023-11-06 19:33:43","id_people":138775,"institutes":["ILC","IRISS"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Tecnologie linguistico-computazionali per il monitoraggio della competenza linguistica italiana degli alunni stranieri nella scuola primaria e secondaria","year":2011,"authors_people":"Dell'Orletta Felice; Montemagni Simonetta; Vecchi Eva Maria; Venturi Giulia","authors_cnr":["Vecchi, Eva Maria","Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["071","071","048","048"],"authors":["Dell'Orletta, F.","Montemagni, S.","Vecchi, E. M.","Venturi, G."],"abstract":"La possibilit\u00e0 di disporre di tecnologie avanzate e innovative che permettano di monitorare la competenza linguistica degli alunni stranieri e, al contempo, valutare l'adeguatezza dei materiali didattici a loro offerti pu\u00f2 essere di supporto all'insegnante nell'orientare la propria azione formativa, rendendo cos\u00ec il processo di integrazione linguistico-culturale meno faticoso e traumatico. In tale ottica, questo studio, realizzato col supporto di una piattaforma ormai consolidata di metodi e strumenti per il trattamento automatico dell'italiano, costituisce il primo tentativo condotto in relazione alla lingua italiana, per mettere a punto una metodologia di monitoraggio linguistico rivolta specificamente agli studenti apprendenti la lingua italiana come L2 ed alle loro produzioni scritte.","keywords":["Trattamento Automatico del Linguaggio","Stranieri","Lingua italiana"],"pages":"319-336","url":"https:\/\/publications.cnr.it\/doc\/138775","volume":"","doi":"","editors_people":"Bruno Giovanni Carlo; Caruso Immacolata; Sanna Manuela; Vellecco Immacolata","editors":["Bruno, G. C.","Caruso, I.","Sanna, M.","Vellecco, I."],"published":"Percorsi Migranti","publisher":"Mc Graw-Hill (Milano, ITA)","issn":"","isbn":"978-88-386-7296-5","conference_name":"","conference_place":"","conference_date":""},{"id":131676,"last_updated":"2023-11-06 19:33:42","id_people":205510,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"READ-IT: assessing readability of Italian texts with a view to text simplification","year":2011,"authors_people":"Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this paper, we propose a new approach to readability assessment with a specific view to the task of text simplification: the intended audience includes people with low literacy skills and\/or with mild cognitive impairment. READ-IT represents the first advanced readability assessment tool for what concerns Italian, which combines traditional raw text features with lexical, morpho-syntactic and syntactic information. In READ-IT readability assessment is carried out with respect to both documents and sentences where the latter represents an important novelty of the proposed approach creating the prerequisites for aligning the readability assessment step with the text simplification process. READ-IT shows a high accuracy in the document classification task and promising results in the sentence classification scenario.","keywords":["Readability Assessment","Text Simplification"],"pages":"73-83","url":"http:\/\/dl.acm.org\/citation.cfm?id=2140511","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-937284-14-5","conference_name":"SLPAT '11 Proceedings of the Second Workshop on Speech and Language Processing for Assistive Technologies","conference_place":"Edimburgo, UK","conference_date":"30 Luglio 2011"},{"id":131675,"last_updated":"2023-11-06 19:33:46","id_people":205505,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"ULISSE: an unsupervised algorithm for detecting reliable dependency parses","year":2011,"authors_people":"Felice Dell'Orletta, Giulia Venturi and Simonetta Montemagni","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"In this paper we present ULISSE, an unsupervised linguistically--driven algorithm to select reliable parses from the output of a dependency parser. Different experiments were devised to show that the algorithm is robust enough to deal with the output of different parsers and with different languages, as well as to be used across different domains. In all cases, ULISSE appears to outperform the baseline algorithms.","keywords":["Dependency Parsing","Selection of Reliable Parses","Unsupervised Algorithm"],"pages":"115-124","url":"http:\/\/dl.acm.org\/citation.cfm?id=2018950","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-932432-92-3","conference_name":"CoNLL '11 Proceedings of the Fifteenth Conference on Computational Natural Language Learning","conference_place":"Portland, Oregon, USA","conference_date":"23-24 Giugno 2011"},{"id":131689,"last_updated":"2023-11-06 19:33:45","id_people":205737,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Towards an NLP-based approach for measuring syntactic complexity: preliminary experiments with Italian texts from different registers","year":2011,"authors_people":"Felice Dell'Orletta, Simonetta Montemagni","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048"],"authors":["Dell'Orletta, F.","Montemagni, S."],"abstract":"In this paper, we explore how NLP can be used to automatically identify relevant syntactic complexity features in texts with the aim of assessing their correlation with specific linguistic registers. Our final goal is twofold. On the one hand, we demonstrate that automatic morpho-syntactic and syntactic annotation of texts provides sufficiently accurate output for use in the automatic extraction and measurement of syntactic complexity features. On the other hand, we identify the set of syntactic features strongly correlating with considered linguistic registers.","keywords":["Language Variation","Natural Language Processing","Syntactic Complexity"],"pages":"","url":"http:\/\/www.benszm.net\/BSBWWS\/Dellorletta_Montemagni.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Workshop on \"Cross-linguistic and language-internal variation in text and speech: focus on the joint analysis of multiple characteristics\"","conference_place":"Freiburg Institute for Advanced Studies (FRIAS), University of Freiburg","conference_date":"29\/10\/2010"},{"id":131884,"last_updated":"2023-11-06 19:33:53","id_people":278419,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Singling out Legal Knowledge from World Knowledge","year":2010,"authors_people":"Bonin F and Dell'Orletta F and VENTURI G. and Montemagni S","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Bonin, F.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"","keywords":[""],"pages":"217-229","url":"https:\/\/publications.cnr.it\/doc\/278419","volume":"","doi":"","editors_people":"","editors":[""],"published":"Informatica e diritto","publisher":"Edizioni Scientifiche Italiane (Firenze, Italia)","issn":"0390-0975","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131238,"last_updated":"2023-11-06 19:33:47","id_people":84796,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"A Contrastive Approach to Multi-word Extraction from Domain-specific Corpora","year":2010,"authors_people":"Bonin F.; Dell'Orletta F.; Montemagni S.; Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Bonin, F.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this paper we present a novel approach to multi-word terminology extraction combining a well-known automatic term recognition approach, the C-NC value method, with a contrastive ranking technique, aimed at refining obtained results either by filtering noise due to common words or by discerning between semantically different types of terms within heterogeneous terminologies. The proposed methodology has been tested in two case studies carried out in the History of Art and Legal domains with promising results.","keywords":["Terminology Extraction","Domain-specific Corpora","Multi-word Expression"],"pages":"3222-3229","url":"https:\/\/publications.cnr.it\/doc\/84796","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"2-9517408-6-7","conference_name":"Seventh International Conference on Language Resources and Evaluation","conference_place":"Valletta, Malta","conference_date":"19-21 maggio 2010"},{"id":131244,"last_updated":"2023-11-06 19:33:50","id_people":84802,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Contrastive filtering of domain specific multi-word terms from different types of corpora","year":2010,"authors_people":"Bonin F.; Dell'Orletta F.; Venturi G.; Montemagni S.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Bonin, F.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"In this paper we tackle the challenging task of Multi-word term (MWT) extraction from different types of specialized corpora. Contrastive filtering of previously extracted MWTs results in a considerable increment of acquired domain-specific terms.","keywords":["multi-word terms extraction","corpora"],"pages":"76-79","url":"https:\/\/publications.cnr.it\/doc\/84802","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-7-900268-00-6","conference_name":"The 23rd International Conference on Computational Linguistics (COLING 2010). Multiword Expressions: from Theory to Applications (MWE 2010)","conference_place":"Beijing, China","conference_date":"28 agosto 2010"},{"id":131241,"last_updated":"1970-01-01 01:00:00","id_people":84799,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Evalita'09 Parsing Task: comparing dependency parsers and treebanks","year":2010,"authors_people":"Bosco C.; Montemagni S.; Mazzei A.; Dell'Orletta F.; Lenci A.","authors_cnr":["Montemagni, Simonetta"],"authors_cnr_id":["5595"],"authors_cnr_institute":[""],"authors":["Bosco, C.","Montemagni, S.","Mazzei, A.","Dell'Orletta, F.","Lenci, A."],"abstract":"","keywords":["dependency parsing","dependency treebank"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84799","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Evaluation of NLP and Speech Tools for Italian. EVALITA 2009","conference_place":"Reggio Emilia, Italy","conference_date":"2010"},{"id":131232,"last_updated":"1970-01-01 01:00:00","id_people":84789,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Comparing the Influence of Different Treebank Annotations on Dependency Parsing","year":2010,"authors_people":"Bosco C.; Montemagni S.; Mazzei A.; Lombardo V.; Dell'Orletta F.; Lenci A.; Lesmo L.; Attardi G.; Simi M.; Lavelli A.; Hall J.; Nilsson J.; Nivre J.","authors_cnr":["Montemagni, Simonetta"],"authors_cnr_id":["5595"],"authors_cnr_institute":[""],"authors":["Bosco, C.","Montemagni, S.","Mazzei, A.","Lombardo, V.","Dell'Orletta, F.","Lenci, A.","Lesmo, L.","Attardi, G.","Simi, M.","Lavelli, A.","Hall, J.","Nilsson, J.","Nivre, J."],"abstract":"","keywords":["Parsing","Corpus (creation, annotation, etc.)","Evaluation methodologies"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84789","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Seventh International Conference on Language Resources and Evaluation","conference_place":"Valletta, Malta","conference_date":"2010"},{"id":131224,"last_updated":"2023-11-06 19:33:51","id_people":84781,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Improvements in Parsing the Index Thomisticus Treebank. Revision, Combination and a Feature Model for Medieval Latin","year":2010,"authors_people":"Passarotti M.; Dell'Orletta F.","authors_cnr":["Dell'Orletta, Felice"],"authors_cnr_id":[""],"authors_cnr_institute":[""],"authors":["Passarotti, M.","Dell'Orletta, F."],"abstract":"","keywords":["Parsing","Corpus (creation, annotation, etc.)"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84781","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Seventh International Conference on Language Resources and Evaluation","conference_place":"Valletta, Malta","conference_date":""},{"id":131317,"last_updated":"2023-11-06 19:33:52","id_people":112966,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Lessico settoriale e lessico comune nell'estrazione di terminologia specialistica da corpora di dominio","year":2010,"authors_people":"Bonin F.; Dell'Orletta F.; Montemagni S.; Venturi G.","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Montemagni, Simonetta"],"authors_cnr_id":["5595"],"authors_cnr_institute":[""],"authors":["Bonin, F.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"","keywords":["Automatic Term Extraction"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/112966","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"XLIV Congresso Internazionale di Studi della Societ\u00e0 di Linguistica Italiana","conference_place":"Viterbo, Universit\u00e0 degli Stud","conference_date":""},{"id":131639,"last_updated":"2012-03-29 15:58:24","id_people":173723,"institutes":["ILC","IRISS"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Tecnologie linguistico-computazionali per il monitoraggio delle competenze linguistiche di apprendenti l'italiano come L2","year":2010,"authors_people":"Dell'Orletta F.; Montemagni S.; Vecchi E. M.; Venturi G.","authors_cnr":["Venturi, Giulia","Vecchi, Eva Maria","Venturi, Giulia","Montemagni, Simonetta"],"authors_cnr_id":["5595"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Montemagni, S.","Vecchi, E. M.","Venturi, G."],"abstract":"","keywords":["Natural Language Processing, Educational Linguistics, Language Learning"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/173723","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Congresso \"IT. L2: italiano lingua seconda nell'universit\u00e0, nella scuola e sul territorio. Esperienze didattiche e ricerche\" Universit\u00e0 del Piemonte Orientale \"Amedeo Avogadro\", Facolt\u00e0 di Lettere e Filosofia","conference_place":"Vercelli","conference_date":"2010"},{"id":132202,"last_updated":"2023-11-06 19:33:48","id_people":367784,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"ConnectToLife (modulo semantico)-Rapporto tecnico finale","year":2010,"authors_people":"Vito Pirrelli, Alessandro Lenci, Simonetta Montemagni, Felice Dell'Orletta, Emiliano Giovannetti, Simone Marchi","authors_cnr":["Lenci, Alessandro","Pirrelli, Vito","Montemagni, Simonetta","Marchi, Simone","Giovannetti, Emiliano","Dell'Orletta, Felice"],"authors_cnr_id":["326","5595","10442","11969","14329"],"authors_cnr_institute":[""],"authors":["Pirrelli, V.","Lenci, A.","Montemagni, S.","Dell'Orletta, F.","Giovannetti, E.","Marchi, S."],"abstract":"Il presente documento costituisce il rapporto tecnico finale del progetto Connect-To-Life (modulo semantico) relativo alle attivit\u00e0 svolte dall'unit\u00e0 ILC-CNR.","keywords":["annotazione linguistica","estrazione di termini","clustering semantico","trattamento automatico della lingua","costruzione di ontologie"],"pages":"16","url":"https:\/\/publications.cnr.it\/doc\/367784","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131879,"last_updated":"2023-11-06 19:33:59","id_people":184585,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Text-2-Knowledge: una piattaforma linguistico-computazionale per l'estrazione di conoscenza da testi","year":2009,"authors_people":"Dell'Orletta F., Lenci A., Marchi S., Montemagni S., Pirrelli V.","authors_cnr":["Pirrelli, Vito","Montemagni, Simonetta","Marchi, Simone","Dell'Orletta, Felice"],"authors_cnr_id":["326","5595","10442","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Dell'Orletta, F.","Lenci, A.","Marchi, S.","Montemagni, S.","Pirrelli, V."],"abstract":"The paper describes the automatic extraction of domain knowledge from Italian document collections and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.","keywords":["Term extraction","Ontology Learning"],"pages":"285-300","url":"https:\/\/publications.cnr.it\/doc\/184585","volume":"","doi":"","editors_people":"Giacomo Ferrari, Ruben Benatti, Monica Mosca","editors":["Ferrari, G.","Benatti, R.","Mosca, M."],"published":"","publisher":"Bulzoni (Roma, ITA)","issn":"","isbn":"978-88-7870-469-5","conference_name":"","conference_place":"","conference_date":""},{"id":131202,"last_updated":"2023-11-06 19:33:56","id_people":84753,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Reverse Revision and Linear Tree Combination for Dependency Parsing","year":2009,"authors_people":"Attardi G.; Dell'Orletta F.","authors_cnr":["Dell'Orletta, Felice"],"authors_cnr_id":[""],"authors_cnr_institute":[""],"authors":["Attardi, G.","Dell'Orletta, F."],"abstract":"","keywords":["Dependency parsing, revision parsing, dependency parsing combination"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84753","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"North American Chapter of the Association for Computational Linguistics-Human Language Technologies","conference_place":"Boulder, Colorado","conference_date":""},{"id":131209,"last_updated":"2023-11-06 19:34:00","id_people":84761,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"The Tanl Named Entity Recognizer at Evalita 2009","year":2009,"authors_people":"Attardi G.; Dell'Orletta F.; Simi M.; Dei Rossi S.; Vecchi E. M.","authors_cnr":["Dell'Orletta, Felice"],"authors_cnr_id":[""],"authors_cnr_institute":[""],"authors":["Attardi, G.","Dell'Orletta, F.","Simi, M.","Dei Rossi, S.","Vecchi, E. M."],"abstract":"","keywords":["Named Entity Recognizer"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84761","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Evaluation of NLP and Speech Tools for Italian 2009","conference_place":"Reggio Emilia, Italy","conference_date":""},{"id":131185,"last_updated":"2023-11-06 19:33:54","id_people":84734,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Accurate Dependency Parsing with a Stacked Multilayer Perceptron","year":2009,"authors_people":"Attardi G.; Dell'Orletta F.; Simi M.; Turian J.","authors_cnr":["Dell'Orletta, Felice"],"authors_cnr_id":[""],"authors_cnr_institute":[""],"authors":["Attardi, G.","Dell'Orletta, F.","Simi, M.","Turian, J."],"abstract":"","keywords":["Dependency Parsing","Parsing, Multilayer Perceptron"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84734","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Evaluation of NLP and Speech Tools for Italian 2009","conference_place":"Reggio Emilia, Italy","conference_date":""},{"id":131204,"last_updated":"2023-11-06 19:33:58","id_people":84755,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"TETI: a TimeML Compliant TimEx Tagger for Italian","year":2009,"authors_people":"Caselli T.; Dell'Orletta F.; Prodanof I.","authors_cnr":["Dell'Orletta, Felice","Caselli, Tommaso","Prodanof, Irina Raluca"],"authors_cnr_id":["16686"],"authors_cnr_institute":[""],"authors":["Caselli, T.","Dell'Orletta, F.","Prodanof, I."],"abstract":"","keywords":["temporal expression, information extraction"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84755","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"International Multiconference on Computer Science and Information Technology","conference_place":"Mragowo, Polonia","conference_date":""},{"id":131205,"last_updated":"2023-11-06 19:33:57","id_people":84756,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Temporal Relations with Signals: the Case of Italian Temporal Prepositions","year":2009,"authors_people":"Caselli T.; Dell'Orletta F.; Prodanof I.","authors_cnr":["Dell'Orletta, Felice","Caselli, Tommaso","Prodanof, Irina Raluca"],"authors_cnr_id":["16686"],"authors_cnr_institute":[""],"authors":["Caselli, T.","Dell'Orletta, F.","Prodanof, I."],"abstract":"","keywords":["temporal relations, taggers, information extraction"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84756","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"16th International Symposium on Temporal Representation and Reasoning","conference_place":"Brixen\/Bressanone, Italia","conference_date":""},{"id":131184,"last_updated":"2023-11-06 19:33:55","id_people":84733,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Ensemble system for Part-of-Speech tagging","year":2009,"authors_people":"Dell'Orletta F.","authors_cnr":["Dell'Orletta, Felice"],"authors_cnr_id":[""],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F."],"abstract":"","keywords":["Part-of-Speech tagging","Ensemble system"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84733","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Evaluation of NLP and Speech Tools for Italian, 2009","conference_place":"Reggio Emilia, Italy","conference_date":""},{"id":130998,"last_updated":"2023-11-06 19:34:03","id_people":64541,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio","year":2008,"authors_people":"Dell'Orletta F.; Lenci A.; Marchi S.; Montemagni S.; Pirrelli V.; Venturi G.","authors_cnr":["Dell'Orletta, Felice","Pirrelli, Vito","Montemagni, Simonetta","Marchi, Simone"],"authors_cnr_id":["326","5595","10442"],"authors_cnr_institute":["048","048","048","048"],"authors":["Dell'Orletta, F.","Lenci, A.","Marchi, S.","Montemagni, S.","Pirrelli, V.","Venturi, G."],"abstract":"The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.","keywords":["Natural Language Processing","Machine Learning","Knowledge extraction from texts","Ontology learning","Legal ontologies"],"pages":"197-218","url":"https:\/\/publications.cnr.it\/doc\/64541","volume":"26","doi":"","editors_people":"","editors":[""],"published":"Aida Informazioni (Online)","publisher":"Aida (Roma, Italia)","issn":"1594-2201","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131160,"last_updated":"2023-11-06 19:34:04","id_people":84707,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio","year":2008,"authors_people":"Dell'Orletta Felice; Lenci Alessando; Marchi Simone; Montemagni Simonetta; Pirrelli Vito; Venturi Giulia","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Pirrelli, Vito","Montemagni, Simonetta","Marchi, Simone"],"authors_cnr_id":["326","5595","10442"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Lenci, A.","Marchi, S.","Montemagni, S.","Pirrelli, V.","Venturi, G."],"abstract":"The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.","keywords":["Natural Language Processing","Machine Learning","Knowledge extraction from texts","Ontology learning","Legal ontologies"],"pages":"197-218","url":"http:\/\/www.assiterm91.it\/wp-content\/uploads\/2010\/11\/Convegno-2008.pdf","volume":"Anno 26, numero 1-2","doi":"","editors_people":"","editors":[""],"published":"Terminologia analisi testuale e documentazione nella citt\u00e0 digitale","publisher":"Aida (Roma, Italia)","issn":"1594-2201","isbn":"","conference_name":"Atti del Convegno Nazionale Ass. I. Term","conference_place":"Arcavacata di Rende (CS)","conference_date":"5-7\/06\/2008"},{"id":131151,"last_updated":"2023-11-06 19:34:01","id_people":84698,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Acquiring Legal Ontologies from Domain-specific Texts","year":2008,"authors_people":"Dell'Orletta F.; Lenci A.; Montemagni S.; Marchi S.; Pirrelli V.; Venturi G.","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Pirrelli, Vito","Montemagni, Simonetta","Marchi, Simone"],"authors_cnr_id":["326","5595","10442"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Lenci, A.","Montemagni, S.","Marchi, S.","Pirrelli, V.","Venturi, G."],"abstract":"The paper reports on methodology and preliminary results ofa case study in automatically extracting ontological knowledgefrom Italian legislative texts in the environmental domain. Weuse a fully-implemented ontology learning system (T2K) thatincludes a battery of tools for Natural Language Processing(NLP), statistical text analysis and machine language learn-ing. Tools are dynamically integrated to provide an incremen-tal representation of the content of vast repositories of unstruc-tured documents. Evaluated results, however preliminary, arevery encouraging, showing the great potential of NLP-poweredincremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.","keywords":["Ontology learning","Document management","knowledge extraction from texts","Natural Language Processing"],"pages":"98-101","url":"https:\/\/publications.cnr.it\/doc\/84698","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"LangTech 2008","conference_place":"Roma","conference_date":"28-29\/02\/2008"},{"id":130994,"last_updated":"2023-11-06 19:34:11","id_people":64537,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Maximum Entropy for Italian PoS Tagging","year":2007,"authors_people":"Dell'Orletta F., Federico M., Lenci A., Montemagni S., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Federico, Maria","Pirrelli, Vito","Montemagni, Simonetta"],"authors_cnr_id":["326","5595"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Federico, M.","Lenci, A.","Montemagni, S.","Pirrelli, V."],"abstract":"L'articolo illustra le prestazioni del ILC-UniPi MaxEnt PoS Tagger in Evalita 2007. The report contains a description of the ILC-UniPi MaxEnt PoS Tagger performance in Evalita 2007.","keywords":[""],"pages":"10-11","url":"https:\/\/publications.cnr.it\/doc\/64537","volume":"IV(2)","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131372,"last_updated":"2023-11-06 19:34:08","id_people":136459,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Corpus-based Modelling of Grammar Variation","year":2007,"authors_people":"Dell\u0092Orletta F., Lenci A., Montemagni, S., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Pirrelli, Vito","Montemagni, Simonetta"],"authors_cnr_id":["326","5595"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Lenci, A.","Montemagni, S.","Pirrelli, V."],"abstract":"","keywords":["Grammar variation","stochastic parsing","linguistic typology"],"pages":"38-55","url":"https:\/\/publications.cnr.it\/doc\/136459","volume":"","doi":"","editors_people":"Andrea Sans\u00f2","editors":["Sans\u00f2, A."],"published":"Language resources and linguistic theory","publisher":"Angeli (Milano, ITA)","issn":"","isbn":"9788846489449","conference_name":"","conference_place":"","conference_date":""},{"id":131150,"last_updated":"2023-11-06 19:34:12","id_people":84696,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Maximum Entropy for Italian PoS Tagging","year":2007,"authors_people":"Dell'Orletta F., Federico M., Lenci A., Montemagni S., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Federico, Maria","Pirrelli, Vito","Montemagni, Simonetta"],"authors_cnr_id":["326","5595"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Federico, M.","Lenci, A.","Montemagni, S.","Pirrelli, V."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84696","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Evaluation of NLP Tools for Italian-EVALITA 2007","conference_place":"Roma","conference_date":""},{"id":131142,"last_updated":"2023-11-06 19:34:21","id_people":84687,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Text-2-Knowledge: una piattaforma linguistico-computazionale per l'estrazione di conoscenza da testi","year":2007,"authors_people":"Dell'Orletta F., Lenci A., Marchi S., Motemagni S., Pirrelli S.","authors_cnr":["Dell'Orletta, Felice","Pirrelli, Vito","Montemagni, Simonetta","Marchi, Simone"],"authors_cnr_id":["326","5595","10442"],"authors_cnr_institute":["048","048","048","048"],"authors":["Dell'Orletta, F.","Lenci, A.","Marchi, S.","Montemagni, S.","Pirrelli, V."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84687","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"XL Congresso Internazionale di Studi della Societ\u00e0 di Linguistica Italiana (SLI 2006)","conference_place":"Roma","conference_date":""},{"id":131542,"last_updated":"2023-11-06 19:34:15","id_people":157412,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Segmentazione di un Testo Italiano in Token","year":2007,"authors_people":"Dell'Orletta F., Federico M., Giovannetti E., Lenci A., Marchi S., Trabucco A., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Giovannetti, Emiliano","Trabucco, Andrea","Federico, Maria","Pirrelli, Vito","Marchi, Simone"],"authors_cnr_id":["326","10442"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Federico, M.","Giovannetti, E.","Lenci, A.","Marchi, S.","Trabucco, A.","Pirrelli, V."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/157412","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131543,"last_updated":"2023-11-06 19:34:09","id_people":157413,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Language Recognition Tool, Specifiche di Implementazione","year":2007,"authors_people":"Dell'Orletta F., Federico M., Giovannetti E., Lenci A., Marchi S., Trabucco A., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Giovannetti, Emiliano","Trabucco, Andrea","Federico, Maria","Pirrelli, Vito","Marchi, Simone"],"authors_cnr_id":["326","10442"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Federico, M.","Giovannetti, E.","Lenci, A.","Marchi, S.","Trabucco, A.","Pirrelli, V."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/157413","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131544,"last_updated":"2023-11-06 19:34:06","id_people":157414,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Analisi Morfosintattica per l'Italiano","year":2007,"authors_people":"Dell'Orletta F., Federico M., Giovannetti E., Lenci A., Marchi S., Trabucco A., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Giovannetti, Emiliano","Trabucco, Andrea","Federico, Maria","Pirrelli, Vito","Marchi, Simone"],"authors_cnr_id":["326","10442"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Federico, M.","Giovannetti, E.","Lenci, A.","Marchi, S.","Trabucco, A.","Pirrelli, V."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/157414","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131545,"last_updated":"2023-11-06 19:34:17","id_people":157415,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Specifiche di Chunking per l'Italiano","year":2007,"authors_people":"Dell'Orletta F., Federico M., Giovannetti E., Lenci A., Marchi S., Trabucco A., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Giovannetti, Emiliano","Trabucco, Andrea","Federico, Maria","Pirrelli, Vito","Marchi, Simone"],"authors_cnr_id":["326","10442"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Federico, M.","Giovannetti, E.","Lenci, A.","Marchi, S.","Trabucco, A.","Pirrelli, V."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/157415","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131546,"last_updated":"2023-11-06 19:34:19","id_people":157416,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Specifiche di Named Entity Recognition per l'Italiano","year":2007,"authors_people":"Dell'Orletta F., Federico M., Giovannetti E., Lenci A., Marchi S., Trabucco A., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Giovannetti, Emiliano","Trabucco, Andrea","Federico, Maria","Pirrelli, Vito","Marchi, Simone"],"authors_cnr_id":["326","10442"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Federico, M.","Giovannetti, E.","Lenci, A.","Marchi, S.","Trabucco, A.","Pirrelli, V."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/157416","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131547,"last_updated":"2023-11-06 19:34:14","id_people":157417,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Segmentazione di un Testo Inglese in Token","year":2007,"authors_people":"Dell'Orletta F., Federico M., Giovannetti E., Lenci A., Marchi S., Trabucco A., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Giovannetti, Emiliano","Trabucco, Andrea","Federico, Maria","Pirrelli, Vito","Marchi, Simone"],"authors_cnr_id":["326","10442"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Federico, M.","Giovannetti, E.","Lenci, A.","Marchi, S.","Trabucco, A.","Pirrelli, V."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/157417","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131049,"last_updated":"2023-11-06 19:34:22","id_people":84586,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Computational linguistics meets philosophy: a Latent Semantic Analysis of Giordano Bruno's texts","year":2006,"authors_people":"Bassi S., Dell\u0092Orletta F., Esposito D., Lenci A.","authors_cnr":["Dell'Orletta, Felice"],"authors_cnr_id":[""],"authors_cnr_institute":[""],"authors":["Bassi, S.","Dell'Orletta, F.","Esposito, D.","Lenci, A."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84586","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"LREC 2006: 5th International Conference on Language Resources and Evaluation (LREC)","conference_place":"Genova","conference_date":""},{"id":131092,"last_updated":"2023-11-06 19:34:24","id_people":84630,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Searching treebanks for functional constraints: cross-lingual experiments in grammatical relation assignment","year":2006,"authors_people":"Dell\u0092Orletta F., Lenci A., Montemagni S., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Pirrelli, Vito","Montemagni, Simonetta"],"authors_cnr_id":["326","5595"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Lenci, A.","Montemagni, S.","Pirrelli, V."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84630","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"LREC 2006: 5th International Conference on Language Resources and Evaluation","conference_place":"Genoa","conference_date":""},{"id":131120,"last_updated":"2023-11-06 19:34:23","id_people":84660,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Probing the space of grammatical variation: induction of cross-lingual grammatical constraints from treebanks","year":2006,"authors_people":"Dell'Orletta F.; Lenci A.; Montemagni S.; Pirrelli","authors_cnr":["Dell'Orletta, Felice","Pirrelli, Vito","Montemagni, Simonetta"],"authors_cnr_id":["326","5595"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Lenci, A.","Montemagni, S.","Pirrelli, V."],"abstract":"The paper reports on a detailed quantitative analysis of distributional language data of both Italian and Czech, highlighting the relative contribution of a number of distributed grammatical factors to sentence-based identification of subjects and direct objects. The work uses a Maximum Entropy model of stochastic resolution of conflicting grammatical constraints and is demonstrably capable of putting explanatory theoretical accounts to the test of usage-based empirical verification.","keywords":[""],"pages":"21-28","url":"https:\/\/publications.cnr.it\/doc\/84660","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of the Workshop on Frontiers in Linguistically Annotated Corpora 2006 (LAC 06)","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"1-932432-78-7","conference_name":"Coling\/ACL 2006","conference_place":"Sydney (Australia)","conference_date":"22 July 2006"},{"id":131437,"last_updated":"2023-11-06 19:34:25","id_people":151563,"institutes":["ILC"],"type":"misc","type_order":12,"type_people":"other","title":"Text-to-Knowledge (T2K) Versione 2","year":2006,"authors_people":"Bartolini R., Dell'Orletta F., Lenci A., Marchi S., Montemagni S., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Pirrelli, Vito","Montemagni, Simonetta","Marchi, Simone"],"authors_cnr_id":["326","5595","10442"],"authors_cnr_institute":[""],"authors":["Bartolini, R.","Dell'Orletta, F.","Lenci, A.","Marchi, S.","Montemagni, S.","Pirrelli, V."],"abstract":"Versione 2. Text-to-Knowledge (T2K) \u00e8 una piattaforma software di supporto avanzato alla gestione documentale per la creazione dinamica di repertori terminologici e ontologie di dominio a partire da testi e per l'indicizzazione concettuale di documenti. Il sistema T2K si propone di offrire una batteria integrata di strumenti avanzati di analisi linguistica del testo, analisi statistica e apprendimento automatico del linguaggio, destinati a offrire una rappresentazione accurata del contenuto di una base documentale non strutturata, per scopi di indicizzazione avanzata e navigazione intelligente. I risultati di questo processo di acquisizione sono annotati in forma di metadati XML, offrendo in tal modo la prospettiva di una sempre crescente e diretta interoperabilit\u00e0 con sistemi automatici per la produzione di contenuti digitali selezionati e strutturati dinamicamente su misura, per diversi profili di utenza. Versioni prototipali di T2K sono gi\u00e0 operative su alcuni portali della pubblica amministrazione e sono state applicate per l'indicizzazione di contenuti didattici multimediali. E' in corso l'integrazione della tecnologia T2K nel sistema di gestione informatica di documentazione scientifica del CNR.","keywords":["text to knowledge","nlp","estrazione terminologica","ontology learning","indicizzazione terminologica"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/151563","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131042,"last_updated":"2023-11-06 19:34:26","id_people":84579,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Climbing the path to grammar: a maximum entropy model of subject\/object learning","year":2005,"authors_people":"Dell'Orletta F., Lenci A., Montemagni S., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Pirrelli, Vito","Montemagni, Simonetta"],"authors_cnr_id":["326","5595"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Lenci, A.","Montemagni, S.","Pirrelli, V."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/84579","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Psychocomputational Models of Human Language Acquisition (PsychoCompLA-2005)","conference_place":"Ann Arbour (USA)","conference_date":""},{"id":131425,"last_updated":"2023-11-06 19:34:27","id_people":151548,"institutes":["ILC"],"type":"misc","type_order":12,"type_people":"other","title":"Text-to-Knowledge (T2K)","year":2005,"authors_people":"Bartolini R., Dell'Orletta F., Giorgetti D., Marchi S., Lenci A., Montemagni S., Pirrelli V.","authors_cnr":["Dell'Orletta, Felice","Pirrelli, Vito","Montemagni, Simonetta","Bartolini, Roberto","Marchi, Simone"],"authors_cnr_id":["326","5595","10441","10442"],"authors_cnr_institute":[""],"authors":["Bartolini, R.","Dell'Orletta, F.","Giorgetti, D.","Marchi, S.","Lenci, A.","Montemagni, S.","Pirrelli, V."],"abstract":"Piattaforma di estrazione e indicizzazione terminologica.","keywords":["NLP","estrazione terminologica"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/151548","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""}]