[{"id":362241,"last_updated":"2024-01-30 16:37:08","id_people":488202,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Tell me how you write and I'll tell you what you read: a study on the writing style of book reviews","year":2023,"authors_people":"Chiara Alzetta, Felice Dell'Orletta, Alessio Miaschi, Elena Prat, Giulia Venturi","authors_cnr":["Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Miaschi, A.","Prat, E.","Venturi, G."],"abstract":"Purpose: The authors' goal is to investigate variations in the writing style of book reviews published on different social reading platforms and referring to books of different genres, which enables acquiring insights into communication strategies adopted by readers to share their reading experiences. Design\/methodology\/approach: The authors propose a corpus-based study focused on the analysis of A Good Review, a novel corpus of online book reviews written in Italian, posted on Amazon and Goodreads, and covering six literary fiction genres. The authors rely on stylometric analysis to explore the linguistic properties and lexicon of reviews and the authors conducted automatic classification experiments using multiple approaches and feature configurations to predict either the review's platform or the literary genre. Findings: The analysis of user-generated reviews demonstrates that language is a quite variable dimension across reading platforms, but not as much across book genres. The classification experiments revealed that features modelling the syntactic structure of the sentence are reliable proxies for discerning Amazon and Goodreads reviews, whereas lexical information showed a higher predictive role for automatically discriminating the genre. Originality\/value: The high availability of cultural products makes information services necessary to help users navigate these resources and acquire information from unstructured data. This study contributes to a better understanding of the linguistic characteristics of user-generated book reviews, which can support the development of linguistically-informed recommendation services. Additionally, the authors release a novel corpus of online book reviews meant to support the reproducibility and advancements of the research.","keywords":["Stylometric analysis","Genre detection","Natural language processing","Book reviews"],"pages":"23","url":"https:\/\/www.emerald.com\/insight\/content\/doi\/10.1108\/JD-04-2023-0073\/full\/html","volume":"79","doi":"10.1108\/JD-04-2023-0073","editors_people":"","editors":[""],"published":"Journal of documentation","publisher":"Emerald (Bingley, Regno Unito)","issn":"0022-0418","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":341607,"last_updated":"2023-07-08 17:42:56","id_people":470080,"institutes":["ILC","IGSG"],"type":"journal_article","type_order":0,"type_people":"article","title":"The ParlaMint corpora of parliamentary proceedings","year":2023,"authors_people":"Erjavec T.; Ogrodniczuk M.; Osenova P.; Ljubesic N.; Simov K.; Pancur A.; Rudolf M.; Kopp M.; Barkarson S.; Steingrimsson S.; Coltekin C.; de Does J.; Depuydt K.; Agnoloni T.; Venturi G.; Perez M.C.; de Macedo L.D.; Navarretta C.; Luxardo G.; Coole M.; Rayson P.; Morkevicius V.; Krilavicius T.; Dargis R.; Ring O.; van Heusden R.; Marx M.; Fiser D.","authors_cnr":["Agnoloni, Tommaso","Venturi, Giulia"],"authors_cnr_id":["11403","17692"],"authors_cnr_institute":[""],"authors":["Erjavec, T.","Ogrodniczuk, M.","Osenova, P.","Ljubesic, N.","Simov, K.","Pancur, A.","Rudolf, M.","Kopp, M.","Barkarson, S.","Steingrimsson, S.","Coltekin, C.","De Does, J.","Depuydt, K.","Agnoloni, T.","Venturi, G.","Perez, M. C.","De Macedo, L. D.","Navarretta, C.","Luxardo, G.","Coole, M.","Rayson, P.","Morkevicius, V.","Krilavicius, T.","Dargis, R.","Ring, O.","Van Heusden, R.","Marx, M.","Fiser, D."],"abstract":"This paper presents the ParlaMint corpora containing transcriptions of the sessions of the 17 European national parliaments with half a billion words. The corpora are uniformly encoded, contain rich meta-data about 11 thousand speakers, and are linguistically annotated following the Universal Dependencies formalism and with named entities. Samples of the corpora and conversion scripts are available from the project's GitHub repository, and the complete corpora are openly available via the CLARIN.SI repository for download, as well as through the NoSketch Engine and KonText concordancers and the Parlameter interface for on-line exploration and analysis.","keywords":["Parlamentary proceedings","Linguistic annotation","Universal Dependencies"],"pages":"1-34","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85124105199&origin=inward","volume":"","doi":"10.1007\/s10579-021-09574-0","editors_people":"","editors":[""],"published":"Language resources and evaluation (Print)","publisher":"Springer (Dordrecht, Paesi Bassi)","issn":"1574-020X","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":362242,"last_updated":"2024-01-30 16:38:01","id_people":488203,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Testing the Effectiveness of the Diagnostic Probing Paradigm on Italian Treebanks","year":2023,"authors_people":"Alessio Miaschi, Chiara Alzetta, Dominique Brunato, Felice Dell'Orletta, Giulia Venturi","authors_cnr":["Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Alzetta, C.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"The outstanding performance recently reached by neural language models (NLMs) across many natural language processing (NLP) tasks has steered the debate towards understanding whether NLMs implicitly learn linguistic competence. Probes, i.e., supervised models trained using NLM representations to predict linguistic properties, are frequently adopted to investigate this issue. However, it is still questioned if probing classification tasks really enable such investigation or if they simply hint at surface patterns in the data. This work contributes to this debate by presenting an approach to assessing the effectiveness of a suite of probing tasks aimed at testing the linguistic knowledge implicitly encoded by one of the most prominent NLMs, BERT. To this aim, we compared the performance of probes when predicting gold and automatically altered values of a set of linguistic features. Our experiments were performed on Italian and were evaluated across BERT's layers and for sentences with different lengths. As a general result, we observed higher performance in the prediction of gold values, thus suggesting that the probing model is sensitive to the distortion of feature values. However, our experiments also showed that the length of a sentence is a highly influential factor that is able to confound the probing model's predictions.","keywords":["Neural language model","Probing tasks","Treebanks"],"pages":"19","url":"https:\/\/www.mdpi.com\/2078-2489\/14\/3\/144","volume":"14","doi":"10.3390\/info14030144","editors_people":"","editors":[""],"published":"Information (Basel)","publisher":"MDPI (Basel, Svizzera)","issn":"2078-2489","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132500,"last_updated":"2023-11-06 19:31:16","id_people":464954,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Linguistically-Based Comparison of Different Approaches to Building Corpora for Text Simplification: A Case Study on Italian","year":2022,"authors_people":"Dominique Brunato, Felice Dell'Orletta, Giulia Venturi","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper, we present an overview of existing parallel corpora for Automatic Text Simplification (ATS) in different languages focusing on the approach adopted for their construction. We make the main distinction between manual and (semi)-automatic approaches in order to investigate in which respect complex and simple texts vary and whether and how the observed modifications may depend on the underlying approach. To this end, we perform a two-level comparison on Italian corpora, since this is the only language, with the exception of English, for which there are large parallel resources derived through the two approaches considered. The first level of comparison accounts for the main types of sentence transformations occurring in the simplification process, the second one examines the results of a linguistic profiling analysis based on Natural Language Processing techniques and carried out on the original and the simple version of the same texts. For both levels of analysis, we chose to focus our discussion mostly on sentence transformations and linguistic characteristics that pertain to the morpho-syntactic and syntactic structure of the sentence.","keywords":["linguistic complexity","Italian language","corpus construction","text simplification","aligned corpora"],"pages":"1-19","url":"https:\/\/www.frontiersin.org\/articles\/10.3389\/fpsyg.2022.707630\/full","volume":"13","doi":"10.3389\/fpsyg.2022.707630","editors_people":"","editors":[""],"published":"Frontiers in Psychology","publisher":"Frontiers Research Foundation (Switzerland)","issn":"1664-1078","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":343200,"last_updated":"2022-10-26 09:47:18","id_people":472409,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Why is this language complex? Cherry-pick the optimal set of features in multilingual treebanks","year":2022,"authors_people":"D. Brunato; G. Venturi","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["17692","21125"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Venturi, G."],"abstract":"This paper investigates linguistic complexity across natural languages from a corpus-based perspective and relies on the assumptions of linguistic profiling as a methodological framework. We focus in particular on the domain of syntactic complexity and analyze the distribution of a set of features taken as proxies of complexity phenomena at the sentence level, which were extracted from 63 treebanks annotated according to the Universal Dependencies formalism. This dataset guarantees that the features considered are modeling the same linguistic phenomena in different treebanks, allowing reliable comparison among languages. We show that our approach is able to identify tendencies of structural proximity between languages not necessarily in line with typologically-supported classification, thus shedding light on new corpus-based findings.","keywords":["Linguistic Complexity","Linguistic Profiling","Universal Dependencies","Syntactic Domain"],"pages":"1-14","url":"https:\/\/www.degruyter.com\/document\/doi\/10.1515\/lingvan-2021-0017\/html","volume":"","doi":"10.1515\/lingvan-2021-0017","editors_people":"","editors":[""],"published":"Linguistics vanguard","publisher":"De Gruyter Mouton (Berlin; New York NY, Germania)","issn":"2199-174X","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":351491,"last_updated":"2022-11-21 09:32:04","id_people":473649,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Higher readability of institutional websites drives the correct fruition of the abortion pathway: A cross-sectional study","year":2022,"authors_people":"Amerigo Ferrari, Luca Pirrotta, Manila Bonciani, Giulia Venturi, Milena Vainieri","authors_cnr":["Venturi, Giulia"],"authors_cnr_id":["17692"],"authors_cnr_institute":[""],"authors":["Ferrari, A.","Pirrotta, L.","Bonciani, M.","Venturi, G.","Vainieri, M."],"abstract":"In Italy, abortion services are public: therefore, health Institutions should provide clear and easily readable web-based information. We aimed to 1) assess variation in abortion services utilisation; 2) analyse the readability of institutional websites informing on induced abortion; 3) explore whether easier-to-read institutional websites influenced the correct fruition of abortion services. We identified from the 2021 administrative databases of Tuscany all women having an abortion, and-among them-women having an abortion with the certification provided by family counselling centres, following the pathway established by law. We assessed variation in total and certified abortion rates by computing the Systematic Component of Variation. We analysed the readability of the Tuscan health authorities' websites using the readability assessment tool READ-IT. We explored how institutional website readability influenced the odds of having certified abortions by running multilevel logistic models, considering health authorities as the highest-level variables. We observed high variation in the correct utilization of the abortion pathway in terms of certified abortion rates. The READ-IT scores showed that the most readable text was from the Florence Teaching Hospital website. Multilevel models revealed that higher READ-IT scores, corresponding to more difficult texts, resulted in lower odds of certified abortions. Large variation in the proper fruition of abortion pathways occurs in Tuscany, and such variation may depend on readability of institutional websites informing on induced abortion. Therefore, health Institutions should monitor and improve the readability of their websites to ensure proper and more equitable access to abortion.","keywords":["abortion services","readability assessment"],"pages":"1-13","url":"https:\/\/journals.plos.org\/plosone\/article?id=10.1371\/journal.pone.0277342","volume":"17","doi":"10.1371\/journal.pone.0277342","editors_people":"","editors":[""],"published":"PloS one","publisher":"Public Library of Science (San Francisco, CA, Stati Uniti d'America)","issn":"1932-6203","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":352735,"last_updated":"2023-11-09 18:10:01","id_people":475015,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"On Robustness and Sensitivity of a Neural Language Model: A Case Study on Italian L1 Learner Errors","year":2022,"authors_people":"Miaschi, Alessio and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper, we propose a comprehensive linguistic study aimed at assessing the implicit behavior of one of the most prominent Neural Language Models (NLM) based on Transformer architectures, BERT (Devlin et al., 2019), when dealing with a particular source of noisy data, namely essays written by L1 Italian learners containing a variety of errors targeting grammar, orthography and lexicon. Differently from previous works, we focus on the pre-training stage and we devise two complementary evaluation tasks aimed at assessing the impact of errors on sentence-level inner representations in terms of semantic robustness and linguistic sensitivity. While the first evaluation perspective is meant to probe the model's ability to encode the semantic similarity between sentences also in the presence of errors, the second type of probing task evaluates the influence of errors on BERT's implicit knowledge of a set of raw and morpho-syntactic properties of a sentence. Our experiments show that BERT's ability to compute sentence similarity and to correctly encode multi-leveled linguistic information of a sentence are differently modulated by the category of errors and that the error hierarchies in terms of robustness and sensitivity change across layer-wise representations.","keywords":["nlp","interpretability","transformers","learner errors"],"pages":"426-438","url":"https:\/\/doi.org\/10.1109\/TASLP.2022.3226333","volume":"","doi":"10.1109\/TASLP.2022.3226333","editors_people":"","editors":[""],"published":"IEEE\/ACM transactions on audio, speech, and language processing (Online)","publisher":"[Institute of Electrical and Electronics Engineers] ([Piscataway NJ], Stati Uniti d'America)","issn":"2329-9304","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":341004,"last_updated":"2023-11-06 19:31:21","id_people":469733,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Probing Linguistic Knowledge in Italian Neural Language Models across Language Varieties","year":2022,"authors_people":"Miaschi, Alessio and Sarti, Gabriele and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Sarti, G.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper, we present an in-depth investigation of the linguistic knowledge encoded by the transformer models currently available for the Italian language. In particular, we investigate how the complexity of two different architectures of probing models affects the performance of the Transformers in encoding a wide spectrum of linguistic features. Moreover, we explore how this implicit knowledge varies according to different textual genres and language varieties.","keywords":["nlp","transformer models","interpretability"],"pages":"25-44","url":"http:\/\/www.aaccademia.it\/ita\/scheda-libro?aaref=1518","volume":"","doi":"10.4000\/ijcol.965","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":343100,"last_updated":"2022-10-19 16:10:57","id_people":472171,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"COVID-19 vaccinations: An overview of the Italian national health system's online communication from a citizen perspective","year":2022,"authors_people":"L. Pirrotta; E. Guidotti; C. Tramontani; E. Bignardelli; G.Venturi; S. De Rosis","authors_cnr":["Venturi, Giulia"],"authors_cnr_id":["17692"],"authors_cnr_institute":[""],"authors":["Pirrotta, L.","Guidotti, E.","Tramontani, C.","Bignardelli, E.","Venturi, G.","De Rosis, S."],"abstract":"COVID-19 vaccine hesitancy is still widespread. During the pandemic, the internet has been the preferred channel for health-related information, especially for less-educated citizens who tend to be the most hesitant about vaccination. A well-structured web communication strategy could help both to overcome vaccine hesitancy and to ensure equity in healthcare service access. This study investigated how the various regional and local health authorities in Italy used their institutional websites to inform users about COVID-19 vaccinations between March and April 2021. We browsed 129 institutional websites, checking the availability, quality and quantity, actionability and readability of information using a literature-based common grid. Descriptive statistics and statistical tests were performed. The online public dissemination of COVID-19 vaccination information in Italy was fragmented, both across and within regions. The side effects of vaccinations, were often not reported on the websites, thus missing an opportunity to enhance vaccination uptake. More focus should also be placed on readability, since readability indexes showed that they were difficult to understand. Our research revealed that several actions could be implemented to enhance online communication on COVID-19 vaccination. For instance, simplifying texts can make them more understandable and the information reported actionable.","keywords":["Vaccinationa Communication","Readability Assessment","Online Information","Covid-19"],"pages":"970-979","url":"https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0168851022002184","volume":"10","doi":"10.1016\/j.healthpol.2022.08.001","editors_people":"","editors":[""],"published":"Health policy (Amst. Print)","publisher":"Elsevier (Amsterdam, Paesi Bassi)","issn":"0168-8510","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132501,"last_updated":"2023-11-06 19:31:13","id_people":464964,"institutes":["ILC"],"type":"book","type_order":1,"type_people":"book","title":"La fede dichiarata. Un'analisi linguistico-computazionale","year":2022,"authors_people":"Giulia Venturi, Andrea Cimino, Felice Dell'Orletta","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Venturi, G.","Cimino, A.","Dell'Orletta, F."],"abstract":"Il volume indaga l'apporto di tecnologie basate sul Natural Language Processing (NLP) all'analisi di un corpus di trascrizioni di 164 interviste orali raccolte durante la ricerca 2017 sulla \"Religiosit\u00e0 in Italia\". Gli autori illustrano metodologie e strumenti che permettono di trasformare l'informazione implicitamente contenuta nelle interviste in informazione esplicitamente strutturata. Il risultato finale di questo processo interpretativo spazia dall'acquisizione di conoscenze lessicali e terminologiche complesse alla loro organizzazione in strutture proto-concettuali, fino ad arrivare alla qualificazione dell'atteggiamento con il quale l'intervistato si esprime. Il lettore viene accompagnato a scoprire quale sia il valore aggiunto delle analisi basate su NLP e quali nuovi orizzonti di ricerca siano aperti da queste analisi.","keywords":["Knowledge Extraction","Knowledge Organization"],"pages":"1-181","url":"https:\/\/publications.cnr.it\/doc\/464964","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"Franco Angeli Editore (Milano, ITA)","issn":"","isbn":"978-88-351-2146-6","conference_name":"","conference_place":"","conference_date":""},{"id":343148,"last_updated":"2022-10-21 15:19:30","id_people":472294,"institutes":["ILC","IGSG"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Making Italian Parliamentary Records Machine-Actionable: the Construction of the ParlaMint-IT corpus","year":2022,"authors_people":"Tommaso Agnoloni, Roberto Bartolini, Francesca Frontini, Simonetta Montemagni, Carlo Marchetti, Valeria Quochi, Manuela Ruisi, Giulia Venturi","authors_cnr":["Montemagni, Simonetta","Bartolini, Roberto","Agnoloni, Tommaso","Quochi, Valeria","Frontini, Francesca","Venturi, Giulia"],"authors_cnr_id":["5595","10441","11403","11893","15911","17692"],"authors_cnr_institute":[""],"authors":["Agnoloni, T.","Bartolini, R.","Frontini, F.","Montemagni, S.","Marchetti, C.","Quochi, V.","Ruisi, M.","Venturi, G."],"abstract":"This paper describes the process of acquisition, cleaning, interpretation, coding and linguistic annotation of a collection of parliamentary debates from the Senate of the Italian Republic covering the COVID-19 pandemic emergency period and a former period for reference and comparison according to the CLARIN ParlaMint prescriptions. The corpus contains 1199 sessions and 79,373 speeches for a total of about 31 million words, and was encoded according to the ParlaCLARIN TEI XML format. It includes extensive metadata about the speakers, sessions, political parties and parliamentary groups. As required by the ParlaMint initiative, the corpus was also linguistically annotated for sentences, tokens, POS tags, lemmas and dependency syntax according to the universal dependencies guidelines. Named entity annotation and classification is also included. All linguistic annotation was performed automatically using state-of-the-art NLP technology with no manual revision. The Italian dataset is freely available as part of the larger ParlaMint 2.1 corpus deposited and archived in CLARIN repository together with all other national corpora. It is also available for direct analysis and inspection via various CLARIN services and has already been used both for research and educational purposes.","keywords":["parliamentary debates","CLARIN ParlaMint","corpus creation","corpus annotation"],"pages":"117-124","url":"https:\/\/aclanthology.org\/2022.parlaclarin-1.17\/","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"","conference_name":"Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference","conference_place":"Marseille, France","conference_date":"20\/06\/2022"},{"id":343042,"last_updated":"2023-11-06 19:31:20","id_people":472145,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"On the Nature of BERT: Correlating Fine-Tuning and Linguistic Competence","year":2022,"authors_people":"Merendi F., Dell'Orletta F., Venturi G.","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Merendi, F.","Dell'Orletta, F.","Venturi, G."],"abstract":"Several studies in the literature on the interpretation of Neural Language Models (NLM) focus on the linguistic generalization abilities of pre-trained models. However, little attention is paid to how the linguistic knowledge of the models changes during the fine-tuning steps. In this paper, we contribute to this line of research by showing to what extent a wide range of linguistic phenomena are forgotten across 50 epochs of fine-tuning, and how the preserved linguistic knowledge is correlated with the resolution of the fine-tuning task. To this end, we considered a quite understudied task where linguistic information plays the main role, i.e. the prediction of the evolution of written language competence of native language learners. In addition, we investigate whether it is possible to predict the fine-tuned NLM accuracy across the 50 epochs solely relying on the assessed linguistic competence. Our results are encouraging and show a high relationship between the model's linguistic competence and its ability to solve a linguistically-based downstream task.","keywords":["Natural Language Processing","Neural Language Models","Linguistic Generalization Abilities"],"pages":"","url":"https:\/\/aclanthology.org\/2022.coling-1.275","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"International Conference on Computational Linguistics (COLING)","conference_place":"Gyeongju, Republic of Kore","conference_date":"12-17 ottobre 2022"},{"id":341608,"last_updated":"2023-11-06 19:31:23","id_people":470081,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"SemEval-2022 Task 3: PreTENS-Evaluating Neural Networks on Presuppositional Semantic Knowledge","year":2022,"authors_people":"Roberto Zamparelli, Shammur A Chowdhury, Dominique Brunato, Cristiano Chesi, Felice Dell'Orletta, Arid Hasan, Giulia Venturi","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Zamparelli, R.","Chowdhury, S. A.","Brunato, D.","Chesi, C.","Dell'Orletta, F.","Hasan, A.","Venturi, G."],"abstract":"We report the results of the SemEval 2022 Task 3, PreTENS, on evaluation the acceptability of simple sentences containing constructions whose two arguments are presupposed to be or not to be in an ordered taxonomic relation. The task featured two sub-tasks articulated as: (i) binary prediction task and (ii) regression task, predicting the acceptability in a continuous scale. The sentences were artificially generated in three languages (English, Italian and French). 21 systems, with 8 system papers were submitted for the task, all based on various types of fine-tuned transformer systems, often with ensemble methods and various data augmentation techniques. The best systems reached an F1-macro score of 94.49 (sub-task1) and a Spearman correlation coefficient of 0.80 (sub-task2), with interesting variations in specific constructions and\/or languages.","keywords":["Neural Networks","Presuppositional Knowledge","Evaluation"],"pages":"228-238","url":"https:\/\/aclanthology.org\/2022.semeval-1.29.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"16th International Workshop on Semantic Evaluation (SemEval-2022)","conference_place":"Seattle","conference_date":"14-15\/07\/2022"},{"id":132487,"last_updated":"2023-11-06 19:31:30","id_people":463833,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Probing tasks under pressure","year":2021,"authors_people":"Miaschi A.; Alzetta C.; Brunato D.; Dell'Orletta F.; Venturi G.","authors_cnr":["Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice","Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":["14329","17692","21125"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Alzetta, C.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"Probing tasks are frequently used to evaluate whether the representations of Neural Language Models (NLMs) encode linguistic information. However, it is still questioned if probing classification tasks really enable such investigation or they simply hint for surface patterns in the data. We present a method to investigate this question by comparing the accuracies of a set of probing tasks on gold and automatically generated control datasets. Our results suggest that probing tasks can be used as reliable diagnostic methods to investigate the linguistic information encoded in NLMs representations.","keywords":["Neural Language Models","Linguistic probing","Treebanks"],"pages":"1-7","url":"http:\/\/ceur-ws.org\/Vol-3033\/paper29.pdf","volume":"3033","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"8th Italian Conference on Computational Linguistics (CLIC-it 2021)","conference_place":"Milano","conference_date":"29\/06-01\/07\/2022"},{"id":132451,"last_updated":"2023-11-06 19:31:35","id_people":454441,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"What Makes My Model Perplexed? A Linguistic Investigation on Neural Language Models Perplexity","year":2021,"authors_people":"Miaschi, Alessio and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"This paper presents an investigation aimed at studying how the linguistic structure of a sentence affects the perplexity of two of the most popular Neural Language Models (NLMs), BERT and GPT-2. We first compare the sentence-level likelihood computed with BERT and the GPT-2's perplexity showing that the two metrics are correlated. In addition, we exploit linguistic features capturing a wide set of morpho-syntactic and syntactic phenomena showing how they contribute to predict the perplexity of the two NLMs.","keywords":["nlp","interpretability","deep learning"],"pages":"40-47","url":"https:\/\/www.aclweb.org\/anthology\/2021.deelio-1.5","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-954085-30-5","conference_name":"2nd Workshop on Knowledge Extraction and Integrationfor Deep Learning Architectures","conference_place":"","conference_date":"10\/06\/2021"},{"id":132489,"last_updated":"2022-05-05 11:44:08","id_people":463861,"institutes":["ILC"],"type":"misc","type_order":12,"type_people":"other","title":"Linguistically annotated multilingual comparable corpora of parliamentary debates ParlaMint. ana 2. 1","year":2021,"authors_people":"Toma? Erjavec, Maciej Ogrodniczuk, Petya Osenova, Nikola Ljube?i?, Kiril Simov, Vladislava Grigorova, Micha? Rudolf, Andrej Pan?ur, Maty\u00e1? Kopp, Starka\u00f0ur Barkarson, Stein\u00feor Steingr\u00edmsson, Henk van der Pol, Griet Depoorter, Jesse de Does, Bart Jongejan, Dorte Haltrup Hansen, Costanza Navarretta, Mar\u00eda Calzada P\u00e9rez, Luciana D. de Macedo, Ruben van Heusden, Maarten Marx, \u00c7a?r? \u00c7\u00f6ltekin, Matthew Coole, Tommaso Agnoloni, Francesca Frontini, Simonetta Montemagni, Valeria Quochi, Giulia Venturi, Manuela Ruisi, Carlo Marchetti, Roberto Battistoni, Mikl\u00f3s Seb?k, Orsolya Ring, Roberts Dar?is, Andrius Utka, Mindaugas Petkevi?ius, Monika Briedien\u00e9, Tomas Krilavi?ius, Vaidas Morkevi?ius, Roberto Bartolini, Andrea Cimino, Sascha Diwersy, Giancarlo Luxardo, Paul Rayson","authors_cnr":["Cimino, Andrea","Montemagni, Simonetta","Bartolini, Roberto","Agnoloni, Tommaso","Quochi, Valeria","Frontini, Francesca","Venturi, Giulia"],"authors_cnr_id":["5595","10441","11403","11893","15911","17692"],"authors_cnr_institute":[""],"authors":["Erjavec, T.","Ogrodniczuk, M.","Osenova, P.","Ljube\u0161i\u0107, N.","Simov, K.","Grigorova, V.","Rudolf, M.","Pan\u010dur, A.","Kopp, M.","Barkarson, S.","Steingr\u00edmsson, S.","Van Der Pol, H.","Depoorter, G.","De Does, J.","Jongejan, B.","Haltrup Hansen, D.","Navarretta, C.","Calzada P\u00e9rez, M.","De Macedo, L. D.","Van Heusden, R.","Marx, M.","\u00c7\u00f6ltekin, \u00c7.","Coole, M.","Agnoloni, T.","Frontini, F.","Montemagni, S.","Quochi, V.","Venturi, G.","Ruisi, M.","Marchetti, C.","Battistoni, R.","Seb\u0151k, M.","Ring, O.","Dar\u0123is, R.","Utka, A.","Petkevi\u010dius, M.","Briedien\u0117, M.","Krilavi\u010dius, T.","Morkevi\u010dius, V.","Diwersy, S.","Luxardo, G.","Rayson, P."],"abstract":"ParlaMint 2.1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (from November 1st 2019), or being \"reference\" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition\/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e.g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e.g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https:\/\/clarin-eric.github.io\/parla-clarin\/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the linguistically marked-up version of the corpus, while the text version is available at http:\/\/hdl.handle.net\/11356\/1432. The ParlaMint.ana linguistic annotation includes tokenization, sentence segmentation, lemmatisation, Universal Dependencies part-of-speech, morphological features, and syntactic dependencies, and the 4-class CoNLL-2003 named entities. Some corpora also have further linguistic annotations, such as PoS tagging or named entities according to language-specific schemes, with their corpus TEI headers giving further details on the annotation vocabularies and tools.","keywords":["dibattiti parlamentari","covid-19","ParlaCLARIN","parlamenti","discorso politico","CLARIN","linguistic annotation","pos-tagging","ner","linguistic dependency annotation","UD"],"pages":"","url":"http:\/\/hdl.handle.net\/11356\/1432","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132490,"last_updated":"2022-05-05 11:44:28","id_people":463865,"institutes":["ILC"],"type":"misc","type_order":12,"type_people":"other","title":"Multilingual comparable corpora of parliamentary debates ParlaMint 2. 1","year":2021,"authors_people":"Toma? Erjavec, Maciej Ogrodniczuk, Petya Osenova, Nikola Ljube?i?, Kiril Simov, Vladislava Grigorova, Micha? Rudolf, Andrej Pan?ur, Maty\u00e1? Kopp, Starka\u00f0ur Barkarson, Stein\u00feor Steingr\u00edmsson, Henk van der Pol, Griet Depoorter, Jesse de Does, Bart Jongejan, Dorte Haltrup Hansen, Costanza Navarretta, Mar\u00eda Calzada P\u00e9rez, Luciana D. de Macedo, Ruben van Heusden, Maarten Marx, \u00c7a?r? \u00c7\u00f6ltekin, Matthew Coole, Tommaso Agnoloni, Francesca Frontini, Simonetta Montemagni, Valeria Quochi, Giulia Venturi, Manuela Ruisi, Carlo Marchetti, Roberto Battistoni, Mikl\u00f3s Seb?k, Orsolya Ring, Roberts Dar?is, Andrius Utka, Mindaugas Petkevi?ius, Monika Briedien\u00e9, Tomas Krilavi?ius, Vaidas Morkevi?ius, Roberto Bartolini, Andrea Cimino, Sascha Diwersy, Giancarlo Luxardo, Paul Rayson","authors_cnr":["Montemagni, Simonetta","Agnoloni, Tommaso","Quochi, Valeria","Frontini, Francesca","Venturi, Giulia"],"authors_cnr_id":["5595","11403","11893","15911","17692"],"authors_cnr_institute":[""],"authors":["Erjavec, T.","Ogrodniczuk, M.","Osenova, P.","Ljube\u0161i\u0107, N.","Simov, K.","Grigorova, V.","Rudolf, M.","Pan\u010dur, A.","Kopp, M.","Barkarson, S.","Steingr\u00edmsson, S.","Van Der Pol, H.","Depoorter, G.","De Does, J.","Jongejan, B.","Haltrup Hansen, D.","Navarretta, C.","Calzada P\u00e9rez, M.","De Macedo, L. D.","Van Heusden, R.","Marx, M.","\u00c7\u00f6ltekin, \u00c7.","Coole, M.","Agnoloni, T.","Frontini, F.","Montemagni, S.","Quochi, V.","Venturi, G.","Ruisi, M.","Marchetti, C.","Battistoni, R.","Seb\u0151k, M.","Ring, O.","Dar\u0123is, R.","Utka, A.","Petkevi\u010dius, M.","Briedien\u0117, M.","Krilavi\u010dius, T.","Morkevi\u010dius, V.","Diwersy, S.","Luxardo, G.","Rayson, P."],"abstract":"ParlaMint 2.1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (after November 1st 2019), or being \"reference\" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition\/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e.g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e.g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https:\/\/clarin-eric.github.io\/parla-clarin\/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the ParlaMint TEI-encoded corpora with the derived plain text version of the corpus along with TSV metadata on the speeches. Also included is the 2.0 release of the data and scripts available at the GitHub repository of the ParlaMint project. Note that there also exists the linguistically marked-up version of the corpus, which is available at http:\/\/hdl.handle.net\/11356\/1431.","keywords":["dibattiti parlamentari","covid-19","discorso politico","CLARIN","parlamenti","ParlaCLARIN"],"pages":"","url":"http:\/\/hdl.handle.net\/11356\/1431","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132486,"last_updated":"2023-11-06 19:31:48","id_people":463828,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Linguistically-driven Selection of Difficult-to-Parse Dependency Structures","year":2020,"authors_people":"Chiara Alzetta, Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"The paper illustrates a novel methodology meeting a twofold goal, namely quantifying the reliability of automatically generated dependency relations without using gold data on the one hand, and identifying which are the linguistic constructions negatively affecting the parser performance on the other hand. These represent objectives typically investigated in different lines of research, with different methods and techniques. Our methodology, at the crossroads of these perspectives, allows not only to quantify the parsing reliability of individual dependency types but also to identify and weight the contextual properties making relation instances more or less difficult to parse. The proposed methodology was tested in two different and complementary experiments, aimed at assessing the degree of parsing difficulty across (a) different dependency relation types, and (b) different instances of the same relation. The results show that the proposed methodology is able to identify difficult-to-parse dependency relations without relying on gold data and by taking into account a variety of intertwined linguistic factors. These findings pave the way to novel applications of the methodology, both in the direction of defining new evaluation metrics based purely on automatically parsed data and towards the automatic creation of challenge sets.","keywords":["Linguistic Complexity","Syntactic Parsing","Evaluation metrics"],"pages":"37-60","url":"https:\/\/journals.openedition.org\/ijcol\/719","volume":"6","doi":"10.4000\/ijcol.719","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132396,"last_updated":"2020-11-16 10:09:40","id_people":435971,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Waiting time information in the Italian NHS: A citizen perspective","year":2020,"authors_people":"De Rosis, Sabina; Guidotti, Elisa; Zuccarino, Sara; Venturi, Giulia; Ferre, Francesca","authors_cnr":["Venturi, Giulia"],"authors_cnr_id":["17692"],"authors_cnr_institute":[""],"authors":["De Rosis, S.","Guidotti, E.","Zuccarino, S.","Venturi, G.","Ferre, F."],"abstract":"Public involvement in the management and communication of waiting times is known to support initiatives to reduce waiting times, as well as increase fairness and promote transparency and accountability. In order to improve transparency and communication to citizens, Italy recently updated the National Regulatory Plan for Waiting Lists (2019-2021), which calls for the disclosure of waiting time information on healthcare provider webpages. This study analyses waiting time information for outpatient visits and digital services available on the institutional website pages of 144 public healthcare organisations in nine regions and two autonomous provinces of Italy. Web pages were analysed both in terms of the available information\/services, using a grid, and in terms of the quality of the text using an advanced readability assessment tool (READ-IT). This information was complemented and validated by regional healthcare key informants during research-specific workshops. Waiting time information disclosure, digital services and text readability varied both within and between the regional healthcare systems and organisations. The types and characteristics of waiting time information and statistics vary considerably with a negative impact on their use for benchmarking and their readability and usability for booking purposes. Overall, communication weaknesses due to low harmonization and clarity of information can undermine efforts in effectively informing and involving the public through online waiting time data disclosure. (C) 2020 The Author(s). Published by Elsevier B.V.","keywords":["Waiting times","Healthcare","Online information","Readability","Italy"],"pages":"796-804","url":"https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0168851020301111?via%3Dihub","volume":"124","doi":"10.1016\/j.healthpol.2020.05.012","editors_people":"","editors":[""],"published":"Health policy (Amst. Print)","publisher":"Elsevier (Amsterdam, Paesi Bassi)","issn":"0168-8510","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132413,"last_updated":"2023-11-06 19:31:50","id_people":441971,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Metodi e Tecniche di Trattamento Automatico della Lingua per l'Estrazione di Conoscenza dalla Documentazione Scolastica","year":2020,"authors_people":"Venturi G., Dell'Orletta F., Montemagni S., Morini E. e Sagri M.T.","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Venturi, G.","Dell'Orletta, F.","Montemagni, S.","Morini, E.","Sagri, M. T."],"abstract":"Il contributo riguarda la creazione di un sistema integrato di \"knowledge management\", per la gestione e condivisione della conoscenza prodotta e utilizzata dalla scuola.","keywords":["Estrazione di informazione","Documenti scolastici","Indicizzazione","Terminology extraction"],"pages":"49-68","url":"https:\/\/publications.cnr.it\/doc\/441971","volume":"2","doi":"10.3280\/CAD2020-002005","editors_people":"","editors":[""],"published":"Cadmo (Testo stamp.)","publisher":"Franco Angeli (Napoli, Italia)","issn":"1122-5165","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132411,"last_updated":"2023-11-06 19:32:01","id_people":441967,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Verba et Acta. Un esperimento per promuovere l'evoluzione delle compe-tenze linguistiche degli studenti degli istituti professionali","year":2020,"authors_people":"Vertecchi, Benedetto; Agrusti, Francesco; Dell'Orletta, Felice; Montemagni, Simonetta; Venturi, Giulia","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Vertecchi, B.","Agrusti, F.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"Ricerche in corso. Verba et Acta. Un esperimento per promuovere l'evoluzione delle competenze linguistiche degli studenti degli istituti professionali","keywords":["Evoluzione competenze linguistiche","Annotazione linguistica","Previsione dello sviluppo delle competenze di scrittura"],"pages":"109-117","url":"https:\/\/publications.cnr.it\/doc\/441967","volume":"","doi":"10.3280\/CAD2020-001008","editors_people":"","editors":[""],"published":"Cadmo (Testo stamp.)","publisher":"Franco Angeli (Napoli, Italia)","issn":"1122-5165","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132426,"last_updated":"2023-11-06 19:31:55","id_people":444113,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Quantitative linguistic investigations across universal dependencies treebanks","year":2020,"authors_people":"Alzetta C.; Dell'Orletta F.; Montemagni S.; Osenova P.; Simov K.; Venturi G.","authors_cnr":["Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Osenova, P.","Simov, K.","Venturi, G."],"abstract":"The paper illustrates a case study aimed at identifying cross-lingual quantitative trends in the distribution of dependency relations in treebanks for typologically different languages. Preliminary results show interesting differences rooted either in language-specific peculiarities or cross-lingual annotation inconsistencies, with a potential impact on different application scenarios.","keywords":["Universal Dependencies Treebanks","Cross-linguistic analysis","Typology"],"pages":"1-7","url":"http:\/\/ceur-ws.org\/Vol-2769\/paper_59.pdf","volume":"2769","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"979-12-80136-28-2","conference_name":"7th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Online","conference_date":"1-3\/03\/2021"},{"id":132427,"last_updated":"2023-11-06 19:31:38","id_people":444114,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"AcCompl-it @ EVALITA2020: Overview of the acceptability & complexity evaluation task for Italian","year":2020,"authors_people":"Brunato D.; Chesi C.; Dell'Orletta F.; Montemagni S.; Venturi G.; Zamparelli R.","authors_cnr":["Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Chesi, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G.","Zamparelli, R."],"abstract":"The Acceptability and Complexity evaluation task for Italian (AcCompl-it) was aimed at developing and evaluating methods to classify Italian sentences according to Acceptability and Complexity. It consists of two independent tasks asking participants to predict either the acceptability or the complexity rate (or both) of a given set of sentences previously scored by native speakers on a 1-to-7 points Likert scale. In this paper, we introduce the datasets distributed to the participants, we describe the different approaches of the participating systems and provide a first analysis of the obtained results.","keywords":["Shared Task","Linguistic Complexity","Acceptability"],"pages":"1-8","url":"http:\/\/ceur-ws.org\/Vol-2765\/paper163.pdf","volume":"2765","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"EVALITA '20, Evaluation of NLP and Speech Tools for Italian","conference_place":"Online","conference_date":"17\/12\/2020"},{"id":132394,"last_updated":"2023-11-06 19:31:54","id_people":435966,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Profiling-UD: a Tool for Linguistic Profiling of Texts","year":2020,"authors_people":"Dominique Brunato, Andrea Cimino, Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Cimino, Andrea","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Cimino, A.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this paper, we introduce Profiling-UD, a new text analysis tool inspired to the principles of linguistic profiling that can support language variation research from different perspectives. It allows the extraction of more than 130 features, spanning across different levels of linguistic description. Beyond the large number of features that can be monitored, a main novelty of Profiling-UD is that it has been specifically devised to be multilingual since it is based on the Universal Dependencies framework. In the second part of the paper, we demonstrate the effectiveness of these features in a number of theoretical and applicative studies in which they were successfully used for text and author profiling.","keywords":["Computational Language Variation Analysis","Linguistic Profiling","Universal Dependencies"],"pages":"7145-7151","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2020\/pdf\/2020.lrec-1.883.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"979-10-95546-34-4","conference_name":"Conference on Language Resources and Evaluation (LREC)","conference_place":"","conference_date":"11-16\/05\/2020"},{"id":132393,"last_updated":"2023-11-06 19:32:02","id_people":435958,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Voices of the Great War: A Richly Annotated Corpus of Italian Texts on the First World War","year":2020,"authors_people":"Alessandro Lenci, Simonetta Montemagni, Federico Boschetti, Irene De Felice, Stefano dei Rossi, Felice Dell'Orletta, Michele Di Giorgio, Martina Miliani, Lucia C. Passaro, Angelica Puddu, Giulia Venturi, Nicola Labanca","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Boschetti, Federico","Venturi, Giulia"],"authors_cnr_id":["5595","14329","14630","17692"],"authors_cnr_institute":[""],"authors":["Lenci, A.","Montemagni, S.","Boschetti, F.","De Felice, I.","Dei Rossi, S.","Dell'Orletta, F.","Di Giorgio, M.","Miliani, M.","Passaro, L. C.","Puddu, A.","Venturi, G.","Labanca, N."],"abstract":"Voci della Grande Guerra (\"Voices of the Great War\") is the first large corpus of Italian historical texts dating back to the period of First World War. This corpus differs from other existing resources in several respects. First, from the linguistic point of view it gives account of the wide range of varieties in which Italian was articulated in that period, namely from a diastratic (educated vs. uneducated writers), diaphasic (low\/informal vs. high\/formal registers) and diatopic (regional varieties, dialects) points of view. From the historical perspective, through a collection of texts belonging to different genres it represents different views on the war and the various styles of narrating war events and experiences. The final corpus is balanced along various dimensions, corresponding to the textual genre, the language variety used, the author type and the typology of conveyed contents. The corpus is annotated with lemmas, part-of-speech, terminology, and named entities. Significant corpus samples representative of the different \"voices\" have also been enriched with meta-linguistic and syntactic information. The layer of syntactic annotation forms the first nucleus of an Italian historical treebank complying with the Universal Dependencies standard. The paper illustrates the final resource, the methodology and tools used to build it, and the Web Interface for navigating it.","keywords":["Historical Corpora","Linguistic and Meta-linguistic Annotation","Information Extraction"],"pages":"911-918","url":"https:\/\/www.aclweb.org\/anthology\/2020.lrec-1.114.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"979-10-95546-34-4","conference_name":"Conference on Language Resources and Evaluation (LREC)","conference_place":"","conference_date":"11-16\/05\/2020"},{"id":132416,"last_updated":"2023-11-06 19:31:44","id_people":442040,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Is Neural Language Model Perplexity Related to Readability?","year":2020,"authors_people":"Miaschi, Alessio and Alzetta, Chiara and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Alzetta, Chiara","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Alzetta, C.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"This paper explores the relationship between Neural Language Model (NLM) perplexity and sentence readability. Starting from the evidence that NLMs implicitly acquire sophisticated linguistic knowledge from a huge amount of training data, our goal is to investigate whether perplexity is affected by linguistic features used to automatically assess sentence readability and if there is a correlation between the two metrics. Our findings suggest that this correlation is actually quite weak and the two metrics are affected by different linguistic phenomena.","keywords":["nlp","neural language models","readability"],"pages":"","url":"http:\/\/ceur-ws.org\/Vol-2769\/paper_57.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"979-12-80136-28-2","conference_name":"Seventh Italian Conference on Computational Linguistics","conference_place":"","conference_date":"01-03\/03\/2021"},{"id":132391,"last_updated":"2023-11-06 19:31:46","id_people":438491,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linguistic Profiling of a Neural Language Model","year":2020,"authors_people":"Miaschi A., Brunato D., Dell'Orletta F., Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we investigate the linguistic knowledge learned by a Neural Language Model (NLM) before and after a fine-tuning process and how this knowledge affects its predictions during several classification problems. We use a wide set of probing tasks, each of which corresponds to a distinct sentence-level feature extracted from different levels of linguistic annotation. We show that BERT is able to encode a wide range of linguistic characteristics, but it tends to lose this information when trained on specific downstream tasks. We also find that BERT's capacity to encode different kind of linguistic properties has a positive influence on its predictions: the more it stores readable linguistic information of a sentence, the higher will be its capacity of predicting the expected label assigned to that sentence.","keywords":["Linguistic Profiling","Neural Language Model","Interpretability"],"pages":"745-756","url":"https:\/\/www.aclweb.org\/anthology\/2020.coling-main.65\/","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-952148-27-9","conference_name":"International Conference on Computational Linguistics (COLING)","conference_place":"Online","conference_date":"8-13\/12\/2020"},{"id":132395,"last_updated":"2023-11-06 19:31:58","id_people":435969,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Tracking the Evolution of Written Language Competence in L2 Spanish Learners","year":2020,"authors_people":"Miaschi, Alessio; Davidson, Sam; Brunato, Dominique; Dell'Orletta, Felice; Sagae, Kenji; Sanchez-Gutierrez, Claudia H.; Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Davidson, S.","Brunato, D.","Dell'Orletta, F.","Sagae, K.","Sanchez Gutierrez, C. H.","Venturi, G."],"abstract":"In this paper we present an NLP-based approach for tracking the evolution of written language competence in L2 Spanish learners using a wide range of linguistic features automatically extracted from students' written productions. Beyond reporting classification results for different scenarios, we explore the connection between the most predictive features and the teaching curriculum, finding that our set of linguistic features often reflects the explicit instruction that students receive during each course.","keywords":["Evolution of Language Competence","Natural Language Processing","Linguistic Profiling"],"pages":"92-101","url":"https:\/\/www.aclweb.org\/anthology\/2020.bea-1.9.pdf","volume":"","doi":"10.18653\/v1\/W16-05","editors_people":"","editors":[""],"published":"","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-941643-83-9","conference_name":"15th Workshop on Innovative Use of NLP for Building Educational Applications","conference_place":"","conference_date":"10\/07\/2020"},{"id":132417,"last_updated":"2023-11-06 19:31:45","id_people":442038,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Italian Transformers Under the Linguistic Lens","year":2020,"authors_people":"Miaschi, Alessio and Sarti, Gabriele and Brunato, Dominique and Dell'Orletta, Felice and Venturi, Giulia","authors_cnr":["Brunato, Dominique Pierina","Miaschi, Alessio","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Miaschi, A.","Sarti, G.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we present an in-depth investigation of the linguistic knowledge encoded by the transformer models currently available for the Italian language. In particular, we investigate whether and how using different architectures of probing models affects the performance of Italian transformers in encoding a wide spectrum of linguistic features. Moreover, we explore how this implicit knowledge varies according to different textual genres.","keywords":["nlp","neural language models","interpretability"],"pages":"","url":"http:\/\/ceur-ws.org\/Vol-2769\/paper_56.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"979-12-80136-28-2","conference_name":"Seventh Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"","conference_date":"01-03\/03\/2021"},{"id":132356,"last_updated":"2023-11-06 19:32:08","id_people":423880,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"INFERRING QUANTITATIVE TYPOLOGICAL TRENDS FROM MULTILINGUAL TREEBANKS. A CASE STUDY","year":2019,"authors_people":"Alzetta, Chiara; Dell'Orletta, Felice; Montemagni, Simonetta; Venturi, Giulia","authors_cnr":["Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In the past decades, linguistic typology went through a renewing phase that involved a significant change in the research questions and methods of the discipline, which is now interested in fine-grained features underlying language diversity. In this paper, we propose a novel approach to address the newly defined needs of linguistic typology by extracting qualitative and quantitative information about a wide range of features from multilingual annotated corpora based on Natural Language Processing methods and techniques. We tested our method in a case study focusing on word order variation in two widely investigated constructions, VERB-SUBJ(ect) and NOUN-ADJ(ective), with a specific view to structural and functional factors underlying the preference for one or the other order, both intra- and cross-linguistically, and their interaction. Preliminary experiments have been carried out aimed at acquiring typological evidence from a selection of linguistically annotated treebanks for three different languages, namely Italian, Spanish and English. Our results show the effectiveness of the method in letting similarities and differences also emerge from typologically close languages.","keywords":["language typology","multilingual annotated corpora","linguistic knowledge extraction and modelling","word order variation"],"pages":"209-242","url":"https:\/\/www.rivisteweb.it\/doi\/10.1418\/95391","volume":"18","doi":"10.1418\/95391","editors_people":"","editors":[""],"published":"Lingue e linguaggio","publisher":"Il Mulino, Bologna (Italia)","issn":"1720-9331","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132354,"last_updated":"2023-11-06 19:32:10","id_people":423874,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Le parole del miglioramento. Come le scuole descrivono il cambiamento","year":2019,"authors_people":"Dell'Orletta F., Greco S., Montemagni S., Morini E., Rossi F., Sagri M.T., Venturi G.","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Greco, S.","Montemagni, S.","Morini, E.","Rossi, F.","Sagri, M. T.","Venturi, G."],"abstract":"Il presente contributo intende illustrare i risultati di una ricerca condotta con l'uso di strumenti di trattamento automatico del linguaggio (Natural Language Processing: nlp) su quanto dichiarato dalle scuole in circa 2500 Piani di Miglioramento (modello indire ) con l'obiettivo di comprendere le scelte strategiche in un'ottica di miglioramento continuo. Il disegno d'analisi permette di restituire sia una visione complessiva dei Piani di Miglioramento che approfondimenti qualitativi di confronto tra tipologie di scuola e aree geografiche e relativi a tematiche strategiche quali formazione e innovazione.","keywords":["Piano di Miglioramento","Natural Language Processing","Formazione","Innovazione"],"pages":"47-68","url":"https:\/\/www.rivistainfanzia.it\/pvw\/app\/default\/pvw_sito.php?sede_codice=1PWPSE01&page=2432193","volume":"1\/2019","doi":"","editors_people":"","editors":[""],"published":"Psicologia dell'educazione","publisher":"Edizioni Centro Studi Erickson (Gardolo (TN), Italia)","issn":"1971-3711","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132355,"last_updated":"2023-11-06 19:32:04","id_people":423878,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Analytics dei testi riflessivi scritti dai docenti neoassunti nel portfolio digitale","year":2019,"authors_people":"Della Gala V., Chiriatti G., Dell'Orletta F., Pettenati M.C., Venturi G.","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Della Gala, V.","Chiriatti, G.","Dell'Orletta, F.","Pettenati, M. C.","Venturi, G."],"abstract":"Presentiamo i risultati preliminari e l'analisi svolta su circa 50.000 testi scritti dai docenti neo nominati in ruolo per riflettere su due attivit\u00e0 didattiche svolte con gli studenti, nel contesto del percorso dell'anno di formazione e prova 2016\/17. Il percorso prevede attivit\u00e0 in presenza e attivit\u00e0 a distanza completate sul portfolio digitale, ospitato nell'ambiente online gestito dall'Indire. Nell'ambito del monitoraggio della formazione, con il fine di ottimizzare gli strumenti e il supporto fornito, abbiamo interrogato i dati testuali prodotti dai docenti nell'interazione con l'ambiente per capire se i testi presentassero evidenze riconducibili alle scritture riflessive. Obiettivi dell'indagine sono stati la definizione di uno schema per la classificazione dei testi sulla base del livello di riflessivit\u00e0 evidenziato e l'impiego di strumenti di Trattamento Automatico del Linguaggio (TAL) per l'analisi dell'interocorpus testuale prodotto dai docenti. Descriveremo il contesto scientifico e progettuale,le caratteristiche dei dati analizzati, come questo abbia determinato il disegno d'indagine;descriveremo inoltre la sua implementazione e dunque le procedure, gli strumenti e le metriche adottate o elaborate per rappresentare il contenuto dei dati; infine discuteremo i primi risultati e alcuni vantaggi e limiti dell'approccio adottato.","keywords":["Teacher professional development","Natural Language Processing","Reflective writing","Linguistic Profiling","Document Classification"],"pages":"187-204","url":"https:\/\/ojs.pensamultimedia.it\/index.php\/sird\/article\/view\/3454\/3360","volume":"Special issue","doi":"10.7346\/SIRD-2S2019-P189","editors_people":"","editors":[""],"published":"Giornale italiano della ricerca educativa (Online)","publisher":"Pensa Multimedia (Lecce, Italia)","issn":"2038-9744","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132357,"last_updated":"2023-11-06 19:32:06","id_people":423881,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Dissecting Treebanks to Uncover Typological Trends. A Multilingual Comparative Approach","year":2019,"authors_people":"Alzetta C., Dell'Orletta F., Montemagni S., Venturi G.","authors_cnr":["Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"Over the last years, linguistic typology started attracting the interest of the community working on cross- and multi-lingual NLP as a way to tackle the bottleneck deriving from the lack of annotated data for many languages. Typological information is mostly acquired from publicly accessible typological databases, manually constructed by linguists. As reported in Ponti et al. (2018), despite the abundant information contained in them for many languages, these resources suffer from two main shortcomings, i.e. their limited coverage and the discrete nature of features (only \"the majority value rather than the full range of possible values and their corresponding frequencies\" is reported). Corpus-based studies can help to automatically acquire quantitative typological evidence which might be exploited for polyglot NLP. Recently, the availability of corpora annotated following a cross-linguistically consistent annotation scheme such as the one developed in the Universal Dependencies project is prompting new comparative linguistic studies aimed to identify similarities as well as idiosyncrasies among typologically different languages (Nivre, 2015). The line of research described here is aimed at acquiring quantitative typological evidence from UD treebanks through a multilingual contrastive approach.","keywords":["Natural Language Processing","Linguistic Typology"],"pages":"1-3","url":"https:\/\/typology-and-nlp.github.io\/2019\/assets\/2019\/papers\/5.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-950737-29-1","conference_name":"1st TyP-NLP: The Workshop on Typology for Polyglot NLP, ACL workshop","conference_place":"Firenze","conference_date":"01\/08\/2019"},{"id":132343,"last_updated":"2019-12-04 12:31:40","id_people":409872,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Nove Anni di jTEI: What's New?","year":2019,"authors_people":"Federico Boschetti, Gabriella Pardelli, Giulia Venturi","authors_cnr":["Boschetti, Federico","Pardelli, Gabriella","Venturi, Giulia"],"authors_cnr_id":["14630","16333","17692"],"authors_cnr_institute":[""],"authors":["Boschetti, F.","Pardelli, G.","Venturi, G."],"abstract":"Questo contributo illustra metodi e strumenti per studiare il cambiamento diacronico degli interessi di ricerca della comunit\u00e0 TEI grazie all'uso di metodi di estrazione automatica della terminologia da corpora di dominio.","keywords":["Natural Language Processing","Digital Humanities"],"pages":"1-6","url":"http:\/\/ceur-ws.org\/Vol-2481","volume":"Vol-2481 urn: nbn: de: 0074-2481-7","doi":"","editors_people":"Raffaella Bernardi, Roberto Navigli, Giovanni Semeraro","editors":["Bernardi, R.","Navigli, R.","Semeraro, G."],"published":"CLiC-it 2019 Italian Conference on Computational Linguistics","publisher":"CEUR-WS. org (Aachen, DEU)","issn":"","isbn":"","conference_name":"CLiC-it 2019-Sesta Conferenza Italiana di Linguistica Computazionale","conference_place":"Bari","conference_date":"13-15\/11\/2019"},{"id":132359,"last_updated":"2023-11-06 19:32:17","id_people":423885,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"What makes a review helpful? Predicting the helpfulness of Italian tripadvisor reviews","year":2019,"authors_people":"Chiriatti G.; Brunato D.; Dell'Orletta F.; Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Chiriatti, G.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we introduce a classification system devoted to predict the helpfulness of Italian online reviews. It is based on a wide set of features reflecting the different factors involved and tested on different categories of TripAdvisor reviews. For this purpose, we collected the first Italian corpus of online reviews enriched with metadata related to their helpfulness and we carried out an in-depth analysis of the most predictive features.","keywords":["Natural Language Processing","Documenti Classification","Linguistic Profiling"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85074834351&origin=inward","volume":"2481","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"6th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Bari","conference_date":"13-15\/11\/2019"},{"id":132358,"last_updated":"2023-11-06 19:32:09","id_people":423883,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Italian and English sentence simplification: How many differences?","year":2019,"authors_people":"Fieromonte M.; Brunato D.; Dell'Orletta F.; Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Fieromonte, M.","Brunato, D.","Dell'Orletta, F.","Venturi, G."],"abstract":"The paper proposes a cross-linguistic analysis of two parallel monolingual corpora conceived for automatic text simplification in two languages, Italian and English. The aim is to find similarities and differences in the process of simplification in two typologically different languages. To carry out the comparison, 1,000 sentences were extracted from the two corpora and annotated with a scheme previously used to annotate simplification phenomena..","keywords":["Natural Language Processing","Automatic Text Simplification"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85074816689&origin=inward","volume":"2481","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"6th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Bari","conference_date":"13-15\/11\/2019"},{"id":121217,"last_updated":"2021-03-01 16:11:51","id_people":423867,"institutes":["ILC","IGSG"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Semantic processing of legal texts","year":2018,"authors_people":"Agnoloni T.; Venturi G.","authors_cnr":["Agnoloni, Tommaso","Venturi, Giulia"],"authors_cnr_id":["11403","17692"],"authors_cnr_institute":[""],"authors":["Agnoloni, T.","Venturi, G."],"abstract":"The paper provides an overview of the field of semantic processing of legal texts, combining views and perspectives from the computational linguistic and Artificial Intelligence and Law (AI & Law) communities. The last few years have seen a growing body of research and practice in the field of AI & Law which addresses a range of topics: semantic and cross-language legal Information Retrieval, document classification, legal drafting, legal knowledge extraction, automated legal argumentation, as well as the construction of legal ontologies and their application. The increasing availability of legal corpora accessible as processable data is making viable their partially automated conversion into legal knowledge bases. In this context, it is of paramount importance the use of Natural Language Processing (NLP) techniques and tools that automate the process of knowledge extraction from legal texts. Accordingly, the paper aims at discussing how the two research communities can benefit from the interaction of the different perspectives: the legal artificial intelligence community can gain insight into state-of-the-art linguistic technologies, tools and resources, and the computational linguists can take advantage of the large and often multilingual legal resources (corpora as well as lexicons and ontologies) for training, domain adaptation and evaluation of current NLP technologies and tools. The authors will present an overview on semantic resources for legal texts annotation and processing. Different kind of resources (linguistic, lexical, conceptual, formal) will be introduced and their differences, methodological premises, intended use and possible integration will be highlighted. The peculiarities of the legal domain and legal language will be discussed in relation with the construction and use of legal semantic resources. The issue of multilingualism, multilingual and multi-legal system access to legal information will be also discussed showing how formalized lexical, linguistic and conceptual legal resources can support the task. How NLP tools and techniques can be fruitfully exploited to semantically process collections of legal texts will be introduced in the second part of the paper. In particular, the authors will show how they can be used to automatically extract the relevant knowledge contained in legal text corpora, to structure the extracted knowledge in semantic resources (such as domain-specific ontologies or thesauri), and to semantically annotate the texts with the extracted information to pave the way to content-based access and querying.","keywords":["Semantic Processing","Natural Language Processing","Ontology Learning","Legal Texts"],"pages":"109-137","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85061292435&origin=inward","volume":"","doi":"10.1515\/9781614514664-006","editors_people":"","editors":[""],"published":"","publisher":"Walter De Gruyter Inc (Boston\/Berlin\/Munich, USA)","issn":"","isbn":"978-1-61451-669-9","conference_name":"","conference_place":"","conference_date":""},{"id":132253,"last_updated":"2023-11-06 19:32:24","id_people":385339,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Extracting dependency relations from digital learning content","year":2018,"authors_people":"Adorni G.; Dell'Orletta F.; Koceva F.; Torre I.; Venturi G.","authors_cnr":["Venturi, Giulia","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Adorni, G.","Dell'Orletta, F.","Koceva, F.","Torre, I.","Venturi, G."],"abstract":"Digital Libraries present tremendous potential for developing e-learning applications, such as text comprehension and question-answering tools. A way to build this kind of tools is structuring the digital content into relevant concepts and dependency relations among them. While the literature offers several approaches for the former, the identification of dependencies, and specifically of prerequisite relations, is still an open issue. We present an approach to manage this task.","keywords":["Prerequisite relationship","Concept extraction","Graph mining"],"pages":"114-119","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85041860435&origin=inward","volume":"806","doi":"10.1007\/978-3-319-73165-0_11","editors_people":"","editors":[""],"published":"Communications in computer and information science (Print)","publisher":"Springer (Heidelberg, Germania)","issn":"1865-0929","isbn":"","conference_name":"14th Italian Research Conference on Digital Libraries (IRCDL 2018)","conference_place":"Udine","conference_date":"25-26 gennaio 2018"},{"id":132307,"last_updated":"2023-11-06 19:32:19","id_people":391617,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Assessing the Impact of Iterative Error Detection and Correction. A Case Study on the Italian Universal Dependency Treebank","year":2018,"authors_people":"Alzetta C., Dell'Orletta F., Montemagni S., Simi M., Venturi G.","authors_cnr":["Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Simi, M.","Venturi, G."],"abstract":"Detection and correction of errors and inconsistencies in \"gold treebanks\" are becoming more and more central topics of corpus annotation. The paper illustrates a new incremental method for enhancing treebanks, with particular emphasis on the extension of error patterns across different textual genres and registers. Impact and role of corrections have been assessed in a dependency parsing experiment carried out with four different parsers, whose results are promising. For both evaluation datasets, the performance of parsers increases, in terms of the standard LAS and UAS measures and of a more focused measure taking into account only relations involved in error patterns, and at the level of individual dependencies.","keywords":["Error Detection","Universal Dependency Treebanks","Syntactic parsing"],"pages":"1-7","url":"http:\/\/universaldependencies.org\/udw18\/PDFs\/39_Paper.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-948087-84-1","conference_name":"Universal Dependencies Workshop 2018 (UDW 2018)","conference_place":"Brussels","conference_date":"01\/11\/2018"},{"id":132241,"last_updated":"2023-11-06 19:32:22","id_people":382333,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Dangerous Relations in Dependency Treebanks","year":2018,"authors_people":"Chiara Alzetta, Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Venturi, Giulia","Alzetta, Chiara","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"The paper illustrates an effective and innovative method for detecting erroneously annotated arcs in gold dependency treebanks based on an algorithm originally developed to measure the reliability of automatically produced dependency relations. The method permits to significantly restrict the error search space and, more importantly, to reliably identify patterns of systematic recurrent errors which represent dangerous evidence to a parser which tendentially will replicate them. Achieved results demonstrate effectiveness and reliability of the method.","keywords":["Dependency treebanks","Error Detection","Linguistic Annotation"],"pages":"201-210","url":"http:\/\/aclweb.org\/anthology\/W\/W17\/W17-7624.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-80-88132-04-2","conference_name":"16th International Workshop on Treebanks and Linguistic Theories","conference_place":"Praga","conference_date":"23-24 gennaio 2018"},{"id":132252,"last_updated":"2023-11-06 19:32:36","id_people":385342,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Universal Dependencies and Quantitative Typological Trends. A Case Study on Word Order","year":2018,"authors_people":"Chiara Alzetta, Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Alzetta, C.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"The paper presents a new methodology aimed at acquiring typological evidence from \"gold\" treebanks for different languages. In particular, it investigates whether and to what extent algorithms developed for assessing the plausibility of automatically produced syntactic annotations could contribute to shed light on key issues of the linguistic typological literature. It reports the first and promising results of a case study focusing on word order patterns carried out on three different languages (English, Italian and Spanish).","keywords":["Linguistic Knowledge Extraction","Dependency Treebanks","Linguistic Typology"],"pages":"4540-4549","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2018\/pdf\/1109.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"979-10-95546-00-9","conference_name":"Proceedings of the 11th Edition of the Language Resources and Evaluation Conference (LREC 2018)","conference_place":"Miyazaki (Japan)","conference_date":"7-12 maggio 2018"},{"id":132308,"last_updated":"2023-11-06 19:32:26","id_people":391619,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Is this sentence difficult? Do you agree?","year":2018,"authors_people":"Brunato D., De Mattei L., Dell'Orletta F., Iavarone B., Venturi G.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Brunato, D.","De Mattei, L.","Dell'Orletta, F.","Iavarone, B.","Venturi, G."],"abstract":"In this paper, we present a crowdsourcing-based approach to model the human perception of sentence complexity. We collect a large corpus of sentences rated with judgments of complexity for two typologically-different languages, Italian and English. We test our approach in two experimental scenarios aimed to investigate the contribution of a wide set of lexical, morpho-syntactic and syntactic phenomena in predicting i) the degree of agreement among annotators independently from the assigned judgment and ii) the perception of sentence complexity.","keywords":["Linguistic complexity","Crowdsourcing","Human perception"],"pages":"1-10","url":"https:\/\/www.aclweb.org\/anthology\/D18-1289\/","volume":"","doi":"10.18653\/v1\/D18-1289","editors_people":"","editors":[""],"published":"","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-948087-84-1","conference_name":"Conference on Empirical Methods in Natural Language Processing (EMNLP)","conference_place":"Brussels","conference_date":"31\/10\/2018-04\/11\/2018"},{"id":132351,"last_updated":"2023-11-06 19:32:18","id_people":423871,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"A NLP-based analysis of reflective writings by Italian teachers","year":2018,"authors_people":"Chiriatti G.; Della Gala V.; Dell'Orletta F.; Montemagni S.; Pettenati M.C.; Sagri M.T.; Venturi G.","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["Chiriatti, G.","Della Gala, V.","Dell'Orletta, F.","Montemagni, S.","Pettenati, M. C.","Sagri, M. T.","Venturi, G."],"abstract":"This paper reports first results of a wider study devoted to exploit the potentialities of a NLP-based approach to the analysis of a corpus of reflective writings on teaching activities. We investigate how a wide set of linguistic features allows reconstructing the linguistic profile of the texts written by the Italian teachers and predicting whether are reflective.","keywords":["Natural Language Processing","Reflective Writings","Linguistic Profiling","Document Classification"],"pages":"1-7","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85057733802&origin=inward","volume":"2253","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"5th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Torino","conference_date":"10-12\/12\/2018"},{"id":132350,"last_updated":"2023-11-06 19:32:33","id_people":423870,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Sentences and documents in native language identification","year":2018,"authors_people":"Cimino A.; Dell'Orletta F.; Brunato D.; Venturi G.","authors_cnr":["Cimino, Andrea","Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Cimino, A.","Dell'Orletta, F.","Brunato, D.","Venturi, G."],"abstract":"Starting from a wide set of linguistic features, we present the first in depth feature analysis in two different Native Language Identification (NLI) scenarios. We compare the results obtained in a traditional NLI document classification task and in a newly introduced sentence classification task, investigating the different role played by the considered features. Finally, we study the impact of a set of selected features extracted from the sentence classifier in document classification.","keywords":["Natural Language Processing","Native Language Identification"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85057749754&origin=inward","volume":"2253","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"5th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Torino","conference_date":"10-12\/12\/2018"},{"id":132353,"last_updated":"2023-11-06 19:32:25","id_people":423873,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Gender and Genre Linguistic profiling: A case study on female and male journalistic and diary prose","year":2018,"authors_people":"Cocciu E.; Brunato D.; Venturi G.; Dell'Orletta F.","authors_cnr":["Brunato, Dominique Pierina","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["14329","17692"],"authors_cnr_institute":[""],"authors":["Cocciu, E.","Brunato, D.","Venturi, G.","Dell'Orletta, F."],"abstract":"This paper intends to investigate the linguistic profile of male- and female-authored texts belonging to two very different textual genres: newspaper articles and diary prose. By using a wide set of linguistic features automatically extracted from text and spanning across different levels of linguistic description, from lexicon to syntax, our analysis highlights the peculiarities of the two examined genres and how the genre dimension is influenced by variation depending on author's gender (and vice versa).","keywords":["Natural Language Processing","Genre Classification","Linguistic Profiling"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85057759773&origin=inward","volume":"2253","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"5th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Torino","conference_date":"10-12\/12\/2018"},{"id":132352,"last_updated":"2023-11-06 19:32:27","id_people":423872,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Italian in the Trenches: Linguistic annotation and analysis of texts of the great war","year":2018,"authors_people":"De Felice I.; Dell'Orletta F.; Venturi G.; Lenci A.; Montemagni S.","authors_cnr":["Montemagni, Simonetta","Dell'Orletta, Felice","Venturi, Giulia"],"authors_cnr_id":["5595","14329","17692"],"authors_cnr_institute":[""],"authors":["De Felice, I.","Dell'Orletta, F.","Venturi, G.","Lenci, A.","Montemagni, S."],"abstract":"The paper illustrates the design and development of a textual corpus representative of the historical variants of Italian during the Great War, which was enriched with linguistic (lemmatization and pos-tagging) and meta-linguistic annotation. The corpus, after a manual revision of the linguistic annotation, was used for specializing existing NLP tools to process historical texts with promising results.","keywords":["Natural Language Processing","Automatic Linguistic Annotation"],"pages":"1-5","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85057734451&origin=inward","volume":"2253","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"5th Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Pisa","conference_date":"10-12\/12\/2018"},{"id":132236,"last_updated":"2023-11-06 19:32:43","id_people":382249,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"La qualit\u00e0 dei consensi informati. Un'analisi linguistico-computazionale della leggibilit\u00e0 dei testi","year":2017,"authors_people":"Venturi G., Dell'Orletta F., Montemagni S., Flore E., Bellandi T.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Venturi, G.","Dell'Orletta, F.","Montemagni, S.","Flore, E.","Bellandi, T."],"abstract":"La leggibilit\u00e0 dei testi delle informative di consenso per le procedure diagnostico-terapeutiche \u00e8 un requisito fondamentale, per offrire alle persone assistite l'accesso alle informazioni necessarie a una scelta consapevole delle opzioni disponibili per curare i diversi problemi di salute. La disponibilit\u00e0 di un testo leggibile \u00e8 inoltre un aiuto per i medici responsabili della comunicazione e della raccolta del consenso, che possono impiegarlo come un ausilio alle informazioni presentate in forma verbale durante il colloquio, in modo tale da poter condividere una base di conoscenze minime da condividere con il paziente e i suoi familiari. Seppure le evidenze siano limitate in merito alla relazione tra la qualit\u00e0 del consenso e l'attitudine al contenzioso da parte dei pazienti in caso di trattamenti che esitano in un danno attribuibile alle cure (Durand et al., 2015), si tratta di un ambito di ricerca di crescente interesse nella letteratura sulla sicurezza (Wu et al., 2005; Manta et al., 2017). Nella casistica regionale della Toscana sulle richieste di risarcimento, solo l'1% dei sinistri include problemi di consenso informato (dati Centro GRC), probabilmente anche a causa di una sottovalutazione del diritto all'informazione da parte dei cittadini che si sottopongono a interventi programmati, connessa con una limitata consapevolezza del potere di scegliere le proprie cure che ogni persona dovrebbe poter esercitare posta di fronte alle opzioni terapeutiche disponibili per i propri problemi di salute.","keywords":["Consenso informato","valutazione automatica della leggibilit\u00e0","Trattamento Automatico del Linguaggio"],"pages":"35-39","url":"http:\/\/www.formas.toscana.it\/rivistadellasalute\/fileadmin\/files\/fascicoli\/2017\/212\/SeT_fascicolo_212.pdf","volume":"212","doi":"","editors_people":"","editors":[""],"published":"Salute e territorio","publisher":"ETS (Pisa, Italia)","issn":"0392-4505","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132237,"last_updated":"2023-11-06 19:32:42","id_people":382252,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Identifying predictive features for textual genre classification: The key role of syntax","year":2017,"authors_people":"Cimino A.; Wieling M.; Dell'Orletta F.; Montemagni S.; Venturi G.","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Cimino, A.","Wieling, M.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"The paper investigates impact and role of different feature types for the specific task of Automatic Genre Classification with the final aim of identifying the most predictive ones. The goal was pursued by carrying out incremental feature selection through Grafting using different sets of linguistic features. Achieved results for discriminating among four traditional textual genres show the key role played by syntactic features, whose impact turned out to vary across genres.","keywords":["Textual Genre Classification","Feature Selection","Syntactic Features"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85037370866&origin=inward","volume":"2006","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Roma","conference_date":"11-12 dicembre 2017"},{"id":132251,"last_updated":"2023-11-06 19:32:57","id_people":385220,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Monitoraggio linguistico di Scritture Brevi: aspetti metodologici e primi risultati","year":2016,"authors_people":"D. BRUNATO, F. DELL'ORLETTA, S. MONTEMAGNI, G. VENTURI","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"Se da un lato le tecnologie del linguaggio svolgono un ruolo ormai indiscusso per l'accesso al contenuto testuale, ci\u00f2 non appare scontato quando si va a considerare il loro ruolo nella valutazione delle strutture linguistiche sottostanti al testo. Questo contributo si focalizza sulla definizione di una metodologia innovativa di monitoraggio linguistico della lingua italiana che a partire dall'output di strumenti di annotazione linguistica automatica permette di ricostruire un profilo linguistico di una collezione di testi rappresentativa di una specifica variet\u00e0 d'uso della lingua. Tale metodologia \u00e8 stata applicata a un corpus di tweet allo scopo di far luce su interrogativi aperti quali la possibilit\u00e0 di rintracciare tendenze lessicali, morfo-sintattiche e sintattiche peculiari all'interno di questa tipologia testuale; di studiare come queste tendenze si rapportino ai tratti caratterizzanti della lingua scritta e parlata; di individuare possibili differenze nella forma linguistica in cui si twittano contenuti di natura diversa.","keywords":["Trattamento Automatico del Linguaggio","Monitoraggio Linguistico","Variet\u00e0 d'Uso della Lingua","Lingua del Web"],"pages":"149-176","url":"https:\/\/publications.cnr.it\/doc\/385220","volume":"N. S. 5","doi":"","editors_people":"","editors":[""],"published":"Quaderni Aion","publisher":"Universit\u00e0 degli Studi di Napoli \"L'Orientale\" (Napoli, Italia)","issn":"1825-2796","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132171,"last_updated":"2017-02-28 15:50:00","id_people":366759,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Le tecnologie linguistico-computazionali per la leggibilit\u00e0 della comunicazione istituzionale","year":2016,"authors_people":"Dominique Brunato, Giulia Venturi","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":[""],"authors_cnr_institute":[""],"authors":["Brunato, D.","Venturi, G."],"abstract":"Il contributo illustra il ruolo delle tecnologie linguistico-computazionali per la valutazione automatica della leggibilit\u00e0 dei testi della comunicazione istituzionale e propone alcuni esempi di semplificazione semi-automatica di testi amministrativi e normativi.","keywords":["tecnologie linguistico-computazionali","valutazione automatica della leggibilit\u00e0","comunicazione istituzionale"],"pages":"119-157","url":"https:\/\/publications.cnr.it\/doc\/366759","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"Pisa University Press (Pisa, ITA)","issn":"","isbn":"978-88-6741-627-1","conference_name":"","conference_place":"","conference_date":""},{"id":132176,"last_updated":"2023-11-06 19:33:00","id_people":367760,"institutes":["ILC"],"type":"edited_volume","type_order":3,"type_people":"book","title":"Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC 2016)","year":2016,"authors_people":"Dominique Brunato, Felice Dell'Orletta, Giulia Venturi, Thomas Fran\u00e7ois, Philippe Blache","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G.","Fran\u00e7ois, T.","Blache, P."],"abstract":"Introduzione agli atti della prima edizione del workshop \"Computational Linguistics for Linguistic Complexity\" che raccoglie lavori che studiano da prospettive diverse il tema della complessit\u00e0 linguistica workshop allo scopo di promuovere una riflessione comune su approcci diversi all'indagine, al trattamento e alla valutazione di aspetti che rendono complessa la lingua.","keywords":["Linguistic Complexity","Computational Linguistics"],"pages":"1-245","url":"https:\/\/aclweb.org\/anthology\/W\/W16\/W16-41.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-4-87974-709-9","conference_name":"","conference_place":"","conference_date":""},{"id":132166,"last_updated":"2023-11-06 19:32:51","id_people":366749,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"CItA: an L1 Italian Learners Corpus to Study the Development of Writing Competence","year":2016,"authors_people":"Barbagli A., Lucisano P., Dell'Orletta F., Montemagni S., Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Barbagli, A.","Lucisano, P.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this paper, we present the CItA corpus (Corpus Italiano di Apprendenti L1), a collection of essays written by Italian L1 learners collected during the first and second year of lower secondary school. The corpus was built in the framework of an interdisciplinary study jointly carried out by computational linguistics and experimental pedagogists and aimed at tracking the development of written language competence over the years and students' background information.","keywords":["Italian Learner Corpus","Diachronic Evolution of Written Language Competence","Error Annotation"],"pages":"88-95","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2016\/pdf\/536_Paper.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"978-2-9517408-9-1","conference_name":"Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)","conference_place":"Portoroz (Slovenia)","conference_date":"23-28 maggio 2016"},{"id":132165,"last_updated":"2023-11-06 19:32:58","id_people":366726,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"PaCCSS-IT: A Parallel Corpus of Complex-Simple Sentences for Automatic Text Simplification","year":2016,"authors_people":"Dominique Brunato, Andrea Cimino, Felice Dell'Orletta, Giulia Venturi","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Brunato, Dominique Pierina","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Brunato, D.","Cimino, A.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper we present PaCCSS-IT, a Parallel Corpus of Complex-Simple Sentences for ITalian. To build the resource we develop a new method for automatically acquiring a corpus of complex-simple paired sentences able to intercept structural transformations and particularly suitable for text simplification. The method requires a wide amount of texts that can be easily extracted from the web making it suitable also for less-resourced languages. We test it on the Italian language making available the biggest Italian corpus for automatic text simplification.","keywords":["Automatic Text Simplification","Sentence alignment","Italian corpus"],"pages":"351-361","url":"https:\/\/www.aclweb.org\/anthology\/D\/D16\/D16-1034.pdf","volume":"","doi":"10.18653\/v1\/d16-1034","editors_people":"","editors":[""],"published":"","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-945626-25-8","conference_name":"Conference on Empirical Methods in Natural Language Processing (EMNLP 2016)","conference_place":"Austin, Texas","conference_date":"01-05\/11\/2016"},{"id":132170,"last_updated":"2023-11-06 19:32:53","id_people":366757,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Esplorazioni computazionali nello spazio dell'interlingua: verso una nuova metodologia di indagine","year":2016,"authors_people":"Dell'Orletta F., Montemagni S. e Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"Il presente contributo intende proporre un innovativo approccio all'identificazione delle caratteristiche linguistiche che aiutano a definire l'interlingua. Tale approccio consiste nella ricostruzione del profilo linguistico di corpora di produzioni scritte da apprendenti una lingua seconda basato su strumenti di trattamento automatico del linguaggio.","keywords":["interlingua","annotazione linguistica automatica","monitoraggio linguistico"],"pages":"143-161","url":"https:\/\/www.bulzoni.it\/it\/catalogo\/lingue-in-contatto-contact-linguistics.html","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"Bulzoni Editore (Roma, ITA)","issn":"","isbn":"978-88-6897-029-1","conference_name":"XLVIII Congresso Internazionale di Studi della Societ\u00e0 di Linguistica Italiana (SLI 2014)","conference_place":"Udine","conference_date":"25-27 settembre 2014"},{"id":132167,"last_updated":"2023-11-06 19:33:04","id_people":366752,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"ULISSE: una strategia di adattamento al dominio per l'annotazione sintattica automatica","year":2016,"authors_people":"Dell'Orletta F., Venturi G.","authors_cnr":["Venturi, Giulia","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Venturi, G."],"abstract":"This paper deals with Domain Adaptation for automatic syntactic annotation. Until the half of the 1980s, automatic linguistic annotation was based on algorithms built on groups of hand-written rules, defined a priori on the basis of the knowledge of the system to formalise. Subsequently, thanks to the progress of research in the field of Artificial Intelligence and to the development of linguistic resources, algorithms based on machine learning techniques began to be employed. The major difficulties of those algorithms were due to certain aspects of natural language such as ambiguities, diachronic evolutions, or language variations from the original domain of knowledge. More specifically, the issue of Domain Adaptation can be put in the following terms: \"can an annotated corpus [which is representative of a specific linguistic variety] be used for the syntactic analysis of a second corpus [which is representative of a different linguistic variety]?\". The author answer presenting an algorithm called ULISSE (Unsupervised LInguistically-driven Selection of dEpendency parses), which selects in an optima way the most representative sentences of a new target domain and feed them to the parser in addition to the original training set.","keywords":["Domain Adaptation","annotazione sintattica automatica"],"pages":"55-79","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2016\/10\/Compter_Parler_Soigner_ULISSE.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-88-6952-038-9","conference_name":"Atti del convegno \"Compter parler soigner: tra linguistica e intelligenza artificiale\"","conference_place":"Pavia","conference_date":"15-17 dicembre 2014"},{"id":132168,"last_updated":"2023-11-06 19:32:52","id_people":366754,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Dieci sfumature di marcatezza sintattica: Verso una nozione computazionale di complessita","year":2016,"authors_people":"Tusa E.; Dell'orletta F.; Montemagni S.; Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Tusa, E.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this work, we will investigate whether and to what extent algorithms typically used to assess the reliability of the output of syntactic parsers can be used to study the correlation between processing complexity and the linguistic notion of markedness. Although still preliminary, achieved results show the key role of features such as dependency direction and length in defining the markedness degrees of a given syntactic construction.","keywords":["marcatezza sintattica","complessit\u00e0 linguistica","annotazione linguistica automatica"],"pages":"1-6","url":"http:\/\/www.scopus.com\/record\/display.url?eid=2-s2.0-85009279517&origin=inward","volume":"1749","doi":"","editors_people":"","editors":[""],"published":"CEUR workshop proceedings","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Napoli","conference_date":"5-6 dicembre 2016"},{"id":132127,"last_updated":"2023-11-06 19:33:10","id_people":357152,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Il ruolo delle tecnologie del linguaggio nel monitoraggio dell'evoluzione delle abilit\u00e0 di scrittura: primi risultati","year":2015,"authors_people":"Barbagli A., Lucisano P., Dell'Orletta F., Montemagni S., Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Barbagli, A.","Lucisano, P.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"L'ultimo decennio ha visto l'affermarsi a livello internazionale dell'uso di tecnologie del linguaggio per lo studio dei processi di apprendimento. Questo contributo riporta i primi e promettenti risultati di uno studio interdisciplinare che si \u00e8 avvalso di metodi e tecniche di analisi propri della linguistica computazionale, della linguistica e della pedagogia sperimentale. Lo studio, finalizzato al monitoraggio dell'evoluzione del processo di apprendimento della lingua italiana, \u00e8 stato condotto a partire dalle produzione scritte di studenti della scuola secondaria di primo grado con strumenti di annotazione linguistica automatica e di estrazione di conoscenza e ha portato all'identificazione di un insieme di tratti qualificanti il processo di apprendimento linguistico.","keywords":["evoluzione delle competenze linguistiche","Didattica Sperimentale","Estrazione di conoscenza","Annotazione linguistica automatica"],"pages":"99-117","url":"https:\/\/journals.openedition.org\/ijcol\/326","volume":"","doi":"10.4000\/ijcol.326","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132126,"last_updated":"2023-11-06 19:33:05","id_people":357146,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"CItA: un Corpus di Produzioni Scritte di Apprendenti l'Italiano L1 Annotato con Errori","year":2015,"authors_people":"Alessia Barbagli, Pietro Lucisano, Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Barbagli, A.","Lucisano, P.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In questo articolo presentiamo CItA il primo corpus di produzioni scritte di apprendenti l'italiano L1 del primo e del secondo anno della scuola secondaria di primo grado annotato con errori grammaticali, ortografici e lessicali. Le specificit\u00e0 del corpus e la sua natura diacronica lo rendono particolarmente utile sia per applicazioni linguistico-computazionali sia per studi socio-pedagogici.","keywords":["Apprendiemento della lingua madre","evoluzione delle competenze linguistiche"],"pages":"31-35","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2016\/03\/CItA_errori.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"Accademia University Press (Torino, ITA)","issn":"","isbn":"978-88-99200-62-6","conference_name":"2nd Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Trento","conference_date":"3-4 dicembre 2015"},{"id":132076,"last_updated":"2023-11-06 19:33:09","id_people":332693,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Design and Annotation of the First Italian Corpus for Text Simplification","year":2015,"authors_people":"Brunato D., Dell'Orletta F., Venturi G., Montemagni S.","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"In this paper, we present design and construction of the first Italian corpus for automatic and semi--automatic text simplification. In line with current approaches, we propose a new annotation scheme specifically conceived to identify the typology of changes an original sentence undergoes when it is manually simplified. Such a scheme has been applied to two aligned Italian corpora, containing original texts with corresponding simplified versions, selected as representative of two different manual simplification strategies and addressing different target reader populations. Each corpus was annotated with the operations foreseen in the annotation scheme, covering different levels of linguistic description. Annotation results were analysed with the final aim of capturing peculiarities and differences of the different simplification strategies pursued in the two corpora.","keywords":["Annotation Scheme","Automatic Text Simplification"],"pages":"31-34","url":"https:\/\/aclweb.org\/anthology\/W\/W15\/W15-1604.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-941643-47-1","conference_name":"Proceedings of LAW IX-The 9th Linguistic Annotation Workshop","conference_place":"Denver, Colorado","conference_date":"5 giugno 2015"},{"id":132125,"last_updated":"2023-11-06 19:33:15","id_people":357144,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Tracking the Evolution of Written Language Competence: an NLP-based Approach","year":2015,"authors_people":"Richter S., Cimino A., Dell'Orletta F., Venturi G.","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Dell'Orletta, Felice"],"authors_cnr_id":["14329"],"authors_cnr_institute":["048","048","048"],"authors":["Richter, S.","Cimino, A.","Dell'Orletta, F.","Venturi, G."],"abstract":"In this paper, we present an NLP-based innovative approach for tracking the evolution of written language competence relying on different sets of linguistic features that predict text quality. This approach was tested on a corpus essays written by Italian L1 learners of the first and second year of the lower secondary school.","keywords":["Evolution of Written Language Competence","multi-level linguistic analysis"],"pages":"236-240","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2016\/03\/tracking-language-competence.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"Accademia University Press (Torino, ITA)","issn":"","isbn":"978-88-99200-62-6","conference_name":"2nd Italian Conference on Computational Linguistics (CLiC-it)","conference_place":"Trento","conference_date":"3-4 dicembre 2015"},{"id":132089,"last_updated":"2023-11-06 19:33:12","id_people":340387,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"NLP-Based Readability Assessment of Health-Related Texts: a Case Study on Italian Informed Consent Forms","year":2015,"authors_people":"Giulia Venturi, Tommaso Bellandi, Felice Dell'Orletta, Simonetta Montemagni","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Venturi, G.","Bellandi, T.","Dell'Orletta, F.","Montemagni, S."],"abstract":"The paper illustrates the results of a case study aimed at investigating and enhancing the accessibility of Italian health-related documents by relying on advanced NLP techniques, with particular attention to informed consent forms. Results achieved show that the features automatically extracted from the linguistically annotated text and ranging across different levels of linguistic description have a high discriminative power in order to guarantee a reliable readability assessment.","keywords":["Readability assessment","health-related information"],"pages":"131-141","url":"http:\/\/www.aclweb.org\/anthology\/W15-2618","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-941643-32-7","conference_name":"Sixth International Workshop on Health Text Mining and Information Analysis (Louhi)","conference_place":"Lisbona","conference_date":"17 settembre 2015"},{"id":132090,"last_updated":"2015-12-17 17:39:07","id_people":340388,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Language technologies for automatic readability assessment of health-related Information: a preliminary investigation into the informed consent forms used in a regional health service","year":2015,"authors_people":"Giulia Venturi, Sabrina Rinnone, Simonetta Montemagni, Manuela Sassi, Giuseppina Terranova, Elisabetta Flore, Tommaso Bellandi","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Sassi, Manuela"],"authors_cnr_id":["5595","19592"],"authors_cnr_institute":["048","048","048"],"authors":["Venturi, G.","Rinnone, S.","Montemagni, S.","Sassi, M.","Terranova, G.","Flore, E.","Bellandi, T."],"abstract":"Rationale: Within an information society, where everyone should be able to access all available information, improving access to written language is becoming more and more a central issue. This is the case for health-related information which should be accessible to all members of the society, including people who have reading difficulties as a result of a low education level or of language-based learning disabilities or because the language of the text is not their native language. Moreover, the breakdown of doctor-patient communication is one of the most frequent cause of adverse events. Research questions: We conducted a preliminary investigation to assess the readability of a corpus of informed consent forms used before a clinical procedure in the hospitals of a Regional Healthcare Service. Secondary goals include the comparison of readability across specialties and healthcare trusts. Methods: Providing complex scientific information in a way that is comprehensible to a lay person is a challenge that nowadays can be addressed by resorting to advanced Natural Language Processing (NLP) techniques, which make it possible to monitor the linguistic complexity of texts at the syntactic and lexical levels and to support their simplification, whenever needed. The study has been carried out by combining NLP-enabled feature extraction and state-of-the-art machine learning algorithms. To this end we used READ-IT, the first NLP-based readability assessment tool for Italian. Results: We analysed 584 documents, covering 29 specialties, for a total of 607.790 word tokens, currently used at the 36 public hospitals in Tuscany. Although the readability level of all documents in the corpus is low, both at the lexical and syntactic level, significant differences can be observed between specialties and healthcare trust releasing the forms. With the readability level ranging between 0 (easy-to-read) and 100 (difficult-to-read), it resulted that the pediatric informed consent documents are the most easy-to-read forms (with an average score of 75) while the most difficult-to read documents are documents of the surgical area (whose average score is 80) (standard deviation 2). Discussion: The state of the art resulting from this preliminary study shows that NLP-based readability assessment tools can help to measure the linguistic complexity of informed consent forms and guide the editor to identify linguistically complex passages that need to be simplified, either syntactically or lexically. The use of an assessment tool designed for the general language is the main limitation of the study and should be addressed through the customization of the tool to assess the readability of the healthcare jargon. A further step of the research consider also the design of a guidance to prepare readable informed consent forms.","keywords":["Readability assessment","health-related information"],"pages":"","url":"http:\/\/static1.squarespace.com\/static\/561c0d01e4b0b5ad2e65cc48\/t\/561d44dfe4b089431662d174\/1444758751213\/LibrettoProgramma.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"ISCOME 2015 Conference: \"The Golden Bridge: Communication and Patient Safety\"","conference_place":"Montecatini Terme","conference_date":"15-16 giugno 2015"},{"id":132005,"last_updated":"2015-02-23 16:50:00","id_people":311157,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Le tecnologie linguistico-computazionali nella misura della leggibilit\u00e0 di testi giuridici","year":2014,"authors_people":"Brunato D., Venturi G.","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina"],"authors_cnr_id":[""],"authors_cnr_institute":["048","048"],"authors":["Brunato, D.","Venturi, G."],"abstract":"Il presente contributo illustra una innovativa metodologia per il calcolo della leggibilit\u00e0 di un testo giuridico basata su strumenti di Trattamento Automatico del Linguaggio ed espressamente rivolta alla sua semplificazione. Inserendoci nel pi\u00f9 ampio filone di ricerche che affronta il tema dell'accessibilit\u00e0 della lingua del diritto, discutiamo con esempi tratti da testi reali, il caso specifico della prosa burocratico-amministrativa dal momento che l'accessibilit\u00e0 a tali documenti costituisce un elemento chiave della comunicazione istituzioni-cittadini. A nostra conoscenza, tale studio rappresenta il primo tentativo volto a mostrare come tecnologie linguistico-computazionali allo stato dell'arte per la lingua italiana incomincino ad essere mature per costituire non solo un ausilio per definire automaticamente la leggibilit\u00e0 di testi giuridici ma anche una guida per una loro stesura semplificata. Tali funzionalit\u00e0 saranno illustrate grazie a READ-IT, il primo e al momento unico strumento di valutazione della leggibilit\u00e0 oggi esistente per la lingua italiana basato su strumenti di Trattamento Automatico del Linguaggio.","keywords":[""],"pages":"111-142","url":"https:\/\/publications.cnr.it\/doc\/311157","volume":"XXIII","doi":"","editors_people":"","editors":[""],"published":"Informatica e diritto","publisher":"Edizioni Scientifiche Italiane (Firenze, Italia)","issn":"0390-0975","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131881,"last_updated":"2023-11-06 19:33:16","id_people":285640,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Assessing document and sentence readability in less resourced languages and across textual genres","year":2014,"authors_people":"Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this paper, we tackle three underresearched issues of the automatic readability assessment literature, namely the evaluation of text readability in less resourced languages, with respect to sentences (as opposed to documents) as well as across textual genres. Different solutions to these issues have been tested by using and refining READ-IT, the first advanced readability assessment tool for Italian, which combines traditional raw text features with lexical, morpho-syntactic and syntactic information. In READ-IT readability assessment is carried out with respect to both documents and sentences, with the latter constituting an important novelty of the proposed approach: READ-IT shows a high accuracy in the document classification task and promising results in the sentence classification scenario. By comparing the results of two versions of READ-IT, adopting a classification- versus ranking-based approach, we also show that readability assessment is strongly influenced by textual genre; for this reason a genre-oriented notion of readability is needed. With classification-based approaches, reliable results can only be achieved with genre-specific models: Since this is far from being a workable solution, especially for less resourced languages, a new ranking method for readability assessment is proposed, based on the notion of distance.","keywords":["readability assessment","less resourced languages","multi-level linguistic annotation","textual genres"],"pages":"163-193","url":"http:\/\/www.ingentaconnect.com\/content\/jbp\/itl\/2014\/00000165\/00000002\/art00005","volume":"165","doi":"10.1075\/itl.165.2.03del","editors_people":"","editors":[""],"published":"ITL. Internationaler technischer Literaturanzeiger (Online)","publisher":"Peeters Publishers (Leuven, Belgio)","issn":"1783-1490","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":123736,"last_updated":"2015-02-17 13:54:19","id_people":310637,"institutes":["ILC","ITTIG","IGSG"],"type":"edited_volume","type_order":3,"type_people":"book","title":"Proceedings of the Fourth Workshop on Semantic Processing of Legal Texts","year":2014,"authors_people":"Francesconi E., Montemagni S., Peters W., Venturi G., Wyner A.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Francesconi, Enrico"],"authors_cnr_id":["5595","10498"],"authors_cnr_institute":["048","048","104"],"authors":["Francesconi, E.","Montemagni, S.","Peters, W.","Venturi, G.","Wyner, A."],"abstract":"","keywords":[""],"pages":"33","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/workshops\/LREC2014Workshop-SPLeT%20Proceedings.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"PARIGI: ELRA (Parigi, FRA)","issn":"","isbn":"978-2-9517408-8-4","conference_name":"","conference_place":"","conference_date":""},{"id":131990,"last_updated":"2023-11-06 19:33:27","id_people":294078,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Tecnologie del linguaggio e monitoraggio dell'evoluzione delle abilit\u00e0 di scrittura nella scuola secondaria di primo grado","year":2014,"authors_people":"Barbagli A., Lucisano P., Dell'Orletta F., Montemagni S., Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Barbagli, A.","Lucisano, P.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"L'ultimo decennio ha visto l'affermarsi a livello internazionale dell'uso di tecnologie del linguaggio per lo studio dei processi di apprendimento. Questo contributo, che si colloca all'interno di una ricerca pi\u00f9 ampia di pedagogia sperimentale, riporta i primi e promettenti risultati di uno studio finalizzato al monitoraggio dell'evoluzione del processo di apprendimento della lingua italiana condotto a partire dalle produzione scritte degli studenti con strumenti di annotazione linguistica automatica e di estrazione di conoscenza.","keywords":[""],"pages":"23-27","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2014\/12\/Tecnologie-del-linguaggio-per-la-scuola.pdf","volume":"","doi":"10.12871\/CLICIT201415","editors_people":"Roberto Basili, Alessandro Lenci, Bernardo Magnini","editors":["Basili, R.","Lenci, A.","Magnini, B."],"published":"Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)","publisher":"Pisa University Press srl (Pisa, ITA)","issn":"","isbn":"978-8-86741-472-7","conference_name":"First Italian Conference on Computational Linguistics (CLiC-it 2014)","conference_place":"Pisa","conference_date":"9-11 dicembre 2014"},{"id":131935,"last_updated":"2023-11-06 19:33:19","id_people":288050,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Computational Analysis of Historical Documents: An Application to Italian War Bulletins in World War I and II","year":2014,"authors_people":"Boschetti F., Cimino A., Dell'Orletta F., Lebani G.E., Passaro L., Picchi P., Venturi G., Montemagni S. Lenci A.","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Cimino, Andrea","Montemagni, Simonetta","Picchi, Paolo","Boschetti, Federico"],"authors_cnr_id":["5595","12761","14630"],"authors_cnr_institute":[""],"authors":["Boschetti, F.","Cimino, A.","Dell'Orletta, F.","Lebani, G. E.","Passaro, L.","Picchi, P.","Venturi, G.","Montemagni, S.","Lenci, A."],"abstract":"World War (WW) I and II represent crucial landmarks in the history on mankind: They have affected the destiny of whole generations and their consequences are still alive throughout Europe. In this paper we present an ongoing project to carry out a computational analysis of Italian war bulletins in WWI and WWII, by applying state-of-the-art tools for NLP and Information Extraction. The annotated texts and extracted information will be explored with a dedicated Web interface, allowing for multidimensional access and exploration of historical events through space and time.","keywords":["World War I"],"pages":"70-75","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/workshops\/LREC2014Workshop-LRT4HDA%20Proceedings.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of workshop on Language resources and technologies for processing and linking historical documents and archives-Deploying Linked Open Data in Cultural Heritage-LREC 2014, 26 May, Reykjavik, Iceland","publisher":"European language resources association (ELRA) (Paris, FRA)","issn":"","isbn":"","conference_name":"LREC 2014","conference_place":"Reykjavik","conference_date":"26 May"},{"id":131989,"last_updated":"2023-11-06 19:33:21","id_people":294073,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Defining an annotation scheme with a view to automatic text simplification","year":2014,"authors_people":"Brunato D., Dell'Orletta F., Venturi G., Montemagni S.","authors_cnr":["Venturi, Giulia","Brunato, Dominique Pierina","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Brunato, D.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"This paper presents the preliminary steps of ongoing research in the field of automatic text simplification. In line with current approaches, we propose here a new annotation scheme specifically conceived to identify the typologies of changes an original sentence undergoes when it is manually simplified. Such a scheme has been tested on a parallel corpus available for Italian, which we have first aligned at sentence level and then annotated with simplification rules.","keywords":[""],"pages":"87-92","url":"http:\/\/www.italianlp.it\/wp-content\/uploads\/2014\/12\/Text-simplification.pdf","volume":"","doi":"10.12871\/CLICIT2014118","editors_people":"Roberto Basili, Alessandro Lenci, Bernardo Magnini","editors":["Basili, R.","Lenci, A.","Magnini, B."],"published":"Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)","publisher":"Pisa University Press srl (Pisa, ITA)","issn":"","isbn":"978-8-86741-472-7","conference_name":"First Italian Conference on Computational Linguistics (CLiC-it 2014)","conference_place":"Pisa","conference_date":"9-11 dicembre 2014"},{"id":131883,"last_updated":"2023-11-06 19:33:26","id_people":285670,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"T2K: a System for Automatically Extracting and Organizing Knowledge from Texts","year":2014,"authors_people":"Felice Dell'Orletta, Giulia Venturi, Andrea Cimino, Simonetta Montemagni","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Dell'Orletta, F.","Venturi, G.","Cimino, A.","Montemagni, S."],"abstract":"In this paper, we present T2K, a suite of tools for automatically extracting domain-specific knowledge from collections of Italian and English texts. T2K (Text-To-Knowledge v2) relies on a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine learning which are dynamically integrated to provide an accurate and incremental representation of the content of vast repositories of unstructured documents. Extracted knowledge ranges from domain-specific entities and named entities to the relations connecting them and can be used for indexing document collections with respect to different information types. T2K also includes \"linguistic profiling\" functionalities aimed at supporting the user in constructing the acquisition corpus, e.g. in selecting texts belonging to the same genre or characterized by the same degree of specialization or in monitoring the \"added value\" of newly inserted documents. T2K is a web application which can be accessed from any browser through a personal account which has been tested in a wide range of domains.","keywords":["Natural Language Processing","Information Extraction","Knowledge Management"],"pages":"2062-2070","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/pdf\/590_Paper.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-2-9517408-8-4","conference_name":"International Conference on Language Resources and Evaluation (LREC)","conference_place":"Reykjavik","conference_date":"26-31 maggio 2014"},{"id":131991,"last_updated":"2023-11-06 19:33:17","id_people":294084,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Assessing the readability of sentences: which corpora and features?","year":2014,"authors_people":"Dell'Orletta F., Wieling M., Cimino A., Venturi G., Montemagni S.","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Dell'Orletta, F.","Wieling, M.","Cimino, A.","Venturi, G.","Montemagni, S."],"abstract":"The paper investigates the problem of sentence readability assessment, which is modelled as a classification task, with a specific view to text simplification. In particular, it addresses two open issues connected with it, i.e. the corpora to be used for training, and the identification of the most effective features to determine sentence readability. An existing readability assessment tool developed for Italian was specialized at the level of training corpus and learning algorithm. A maximum entropy-based feature selection and ranking algorithm (grafting) was used to identify to the most relevant features: it turned out that assessing the readability of sentences is a complex task, requiring a high number of features, mainly syntactic ones.","keywords":[""],"pages":"163-173","url":"http:\/\/acl2014.org\/acl2014\/W14-18\/pdf\/W14-1820.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of 9th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2014)","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-941643-03-7","conference_name":"9th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2014)","conference_place":"Baltimore, Maryland, USA","conference_date":"26 giugno 2014"},{"id":123731,"last_updated":"2015-02-17 13:31:15","id_people":310539,"institutes":["ILC","ITTIG","IGSG"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Investigating the relationship between neuroscience and law: a case study on a corpus of Italian case law texts","year":2014,"authors_people":"M.T.Sagri, D. Tiscornia, S. Montemagni, G. Venturi,","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Sagri, Maria Teresa","Tiscornia, Daniela"],"authors_cnr_id":["5595","11040","20276"],"authors_cnr_institute":["048","048","104","104"],"authors":["Sagri, M. T.","Tiscornia, D.","Montemagni, S.","Venturi, G."],"abstract":"","keywords":["Neuroscience linguistic and lexico-semantic analysis"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/310539","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Language and Law in Social Practice 3rd International Conference","conference_place":"Florence","conference_date":"14-15-16-17 May 2014"},{"id":132004,"last_updated":"2023-11-06 19:33:33","id_people":310619,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Linguistically-driven selection of correct arcs for dependency parsing","year":2013,"authors_people":"Dell'Orletta F.; Venturi G.; Montemagni S.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"LISCA is an unsupervised algorithm aimed at assigning a quality score to each arc generated by a dependency parser in order to produce a decreasing ranking of arcs from correct to incorrect ones. LISCA exploits statistics about a set of linguistically-motivated and dependency-based features extracted from a large corpus of automatically parsed sentences and uses them to assign a quality score to each arc of a parsed sentence belonging to the same domain of the automatically parsed corpus. LISCA has been successfully tested on two datasets belonging to two different domains and in all experiments it turned out to outperform different baselines, thus showing to be able to reliably detect correct arcs also representing domain-specific peculiarities.","keywords":["Correct arcs","Dependency parsing"],"pages":"125-136","url":"http:\/\/cys.cic.ipn.mx\/ojs\/index.php\/CyS\/article\/view\/1517","volume":"17","doi":"","editors_people":"","editors":[""],"published":"Computaci\u00f3n y Sistemas","publisher":"","issn":"1405-5546","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132022,"last_updated":"2015-02-23 10:20:29","id_people":316376,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Evalita 2011: the Frame Labeling over Italian Texts Task","year":2013,"authors_people":"Basili R., Lenci A., De Cao, D., Moschitti A., Venturi G.","authors_cnr":["Venturi, Giulia"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Basili, R.","Lenci, A.","De Cao, D.","Moschitti, A.","Venturi, G."],"abstract":"The Frame Labeling over Italian Texts (FLaIT) task held within the EvalIta 2011 challenge is here described. It focuses on the automatic annotation of free texts according to frame semantics. Systems were asked to label all semantic frames and their arguments, as evoked by predicate words occurring in plain text sentences. Proposed systems are based on a variety of learning techniques and achieve very good results, over 80% of accuracy, in most subtasks.","keywords":["NLP System Evaluation","Shallow Semantic Parsing","Frame Semantics"],"pages":"195-204","url":"https:\/\/publications.cnr.it\/doc\/316376","volume":"7689","doi":"","editors_people":"Bernardo Magnini, Francesco Cutugno, Mauro Falcone, Emanuele Pianta","editors":["Magnini, B.","Cutugno, F.","Falcone, M.","Pianta, E."],"published":"Evaluation of Natural Language and Speech Tools for Italian","publisher":"Springer (Berlin Heidelberg, DEU)","issn":"","isbn":"978-3-642-35827-2","conference_name":"","conference_place":"","conference_date":""},{"id":123661,"last_updated":"2023-11-06 19:33:30","id_people":266373,"institutes":["ILC","ITTIG","IGSG"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Domain Adaptation for Dependency Parsing at EVALITA 2011","year":2013,"authors_people":"F. Dell'Orletta and S. Marchi and S. Montemagni and G. Venturi and T. Agnoloni and E. Francesconi","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Marchi, Simone","Francesconi, Enrico","Agnoloni, Tommaso","Dell'Orletta, Felice"],"authors_cnr_id":["5595","10442","10498","11403","14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Marchi, S.","Montemagni, S.","Venturi, G.","Agnoloni, T.","Francesconi, E."],"abstract":"The domain adaptation task was aimed at investigating techniques for adapting state-of-the-art dependency parsing systems to new domains. Both the language dealt with, i.e. Italian, and the target do- main, namely the legal domain, represent two main novelties of the task organised at Evalita 2011 with respect to previous domain adaptation ini- tiatives. In this paper, we define the task and describe how the datasets were created from different resources. In addition, we characterize the different approaches of the participating systems, report the test results, and provide a first analysis of these results.","keywords":["Dependency Parsing","Domain Adaptation","Self-training","Active Learning","Legal-NLP"],"pages":"58-69","url":"https:\/\/publications.cnr.it\/doc\/266373","volume":"7689","doi":"","editors_people":"Bernardo Magnini, Francesco Cutugno, Mauro Falcone, Emanuele Pianta","editors":["Magnini, B.","Cutugno, F.","Falcone, M.","Pianta, E."],"published":"Evaluation of NLP and Speech Tools for Italian","publisher":"Springer (Berlin Heidelberg, DEU)","issn":"","isbn":"978-3-642-35827-2","conference_name":"","conference_place":"","conference_date":""},{"id":131882,"last_updated":"2015-05-09 19:55:27","id_people":285645,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Semantic annotation of Italian legal texts: a FrameNet-based approach","year":2013,"authors_people":"Giulia Venturi","authors_cnr":["Venturi, Giulia"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Venturi, G."],"abstract":"The FrameNet approach to text semantic annotation can be a reliable model to make explicit the linguistic information and the semantic content of legal texts. This hypothesis is discussed and empirically demonstrated through an experiment of annotation of a corpus of Italian legal texts. This study is aimed at showing how FrameNet is particularly appropriate in order to provide new perspectives for legal language studies and for legal knowledge representation tasks. Moreover, by relying on the output of an automatic dependency parser, the FrameNet-based annotation methodology presented here is meant to be succesfully used in automatic semantic processing tasks of legal texts.","keywords":["Legal Language","Semantic Annotation","Legal Ontologies","Natural Language Processing"],"pages":"51-84","url":"https:\/\/publications.cnr.it\/doc\/285645","volume":"58","doi":"10.1075\/bct.58","editors_people":"Mirjam Fried and Kiki Nikiforidou","editors":["Fried, M.","Nikiforidou, K."],"published":"Advances in Frame Semantics","publisher":"John Benjamins Publishing Company (Amsterdam\/Philadelphia, USA)","issn":"","isbn":"9789027202772","conference_name":"","conference_place":"","conference_date":""},{"id":131888,"last_updated":"2023-11-06 19:33:31","id_people":285772,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linguistic Profiling based on General-purpose Features and Native Language Identification","year":2013,"authors_people":"Andrea Cimino, Felice Dell'Orletta, Giulia Venturi and Simonetta Montemagni","authors_cnr":["Venturi, Giulia","Cimino, Andrea","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048","048"],"authors":["Cimino, A.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"In this paper, we describe our approach to native language identification and discuss the results we submitted as participants to the First NLI Shared Task. By resorting to a wide set of general-purpose features qualifying the lexical and grammatical structure of a text, rather than to ad hoc features specifically selected for the NLI task, we achieved encouraging results, which show that the proposed approach is general-purpose and portable across different tasks, domains and languages.","keywords":["Native Language Identification","Linguistic Profiling"],"pages":"207-215","url":"http:\/\/www.aclweb.org\/anthology\/W13-1727","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-937284-47-3","conference_name":"8th workshop on \"Innovative Use of NLP for Building Educational Applications\"","conference_place":"Atlanta (Georgia)","conference_date":"13 giugno 2013"},{"id":131886,"last_updated":"2023-11-06 19:33:32","id_people":278421,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linguistic Profiling of Texts Across Textual Genre and Readability Level. An exploratory Study on Italian Fictional Prose","year":2013,"authors_people":"Dell'Orletta F and Montemagni S and VENTURI G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"","keywords":[""],"pages":"189-197","url":"https:\/\/publications.cnr.it\/doc\/278421","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of Recent Advances in Natural Language Processing (RANLP 2013)","publisher":"","issn":"","isbn":"","conference_name":"Recent Advances in Natural Language Processing (RANLP 2013)","conference_place":"Hissar, Bulgaria","conference_date":"7-13 settembre"},{"id":131889,"last_updated":"2023-11-06 19:33:36","id_people":285773,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Unsupervised Linguistically-Driven Reliable Dependency Parses Detection and Self-Training for Adaptation to the Biomedical Domain","year":2013,"authors_people":"Felice Dell'Orletta, Giulia Venturi, Simonetta Montemagni","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"In this paper, a new self-training method for domain adaptation is illustrated, where the selection of reliable parses is carried out by an unsupervised linguistically-driven algorithm, ULISSE. The method has been tested on biomedical texts with results showing a significant improvement with respect to considered baselines, which demonstrates its ability to capture both reliability of parses and domain-specificity of linguistic constructions.","keywords":["Self-training","Domain Adaptation","Biomedical Texts"],"pages":"45-53","url":"http:\/\/www.aclweb.org\/anthology\/W13-1906","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-937284-55-8","conference_name":"12th workshop on \"Biomedical Natural Language Processing\" (BioNLP)","conference_place":"Sofia (Bulgaria)","conference_date":"8-9 agosto 2013"},{"id":132091,"last_updated":"2015-12-11 13:16:13","id_people":340389,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Investigating legal language peculiarities across different types of Italian legal texts: an NLP-based approach","year":2013,"authors_people":"Giulia Venturi","authors_cnr":["Venturi, Giulia"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Venturi, G."],"abstract":"In this paper, the author carried out the linguistic profiling of a corpus of different types of Italian legal texts exemplifying different sub-varieties of Italian legal language by relying on a wide range of different linguistic features (lexical, morpho-syntactic and syntactic) automatically extracted from the output of a multi-level automatic linguistic analysis of texts. The devised comparative approach allowed investigating the linguistic variation i) between the considered corpus of legal texts and a corpus of newspaper articles representative of Italian ordinary language and ii) among the considered types of legal texts (legislative acts, administrative acts, the Italian Constitution and legal cases). Achieved results can provide the starting point to identify areas of lexical, morpho-syntactic and\/or syntactic complexity within a legal text in order to assess its readability as well to perform a number of different computational forensic linguistics tasks.","keywords":["Legal language analysis","linguistic profiling","legal genres"],"pages":"1-19","url":"http:\/\/ler.letras.up.pt\/uploads\/ficheiros\/13624.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-989-8648-14-3","conference_name":"3rd European Conference of the International Association of Forensic Linguists","conference_place":"Porto","conference_date":"15-18 ottobre 2012"},{"id":123730,"last_updated":"2015-02-17 13:27:32","id_people":310522,"institutes":["ILC","ITTIG","IGSG"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Exploring the use of neuroscience in the Italian courtrooms: the linguistic and lexico-semantic analysis of a corpus of Italian case law texts","year":2013,"authors_people":"M.T.Sagri, G.Venturi,","authors_cnr":["Venturi, Giulia","Sagri, Maria Teresa"],"authors_cnr_id":["11040"],"authors_cnr_institute":["048","104"],"authors":["Sagri, M. T.","Venturi, G."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/310522","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"International Seminar of Neuroethics","conference_place":"Roma","conference_date":"27\/11\/2013"},{"id":132003,"last_updated":"2023-11-06 19:33:39","id_people":310580,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Lessico settoriale e lessico comune dell'estrazione di terminologia specialistica da corpora di dominio","year":2012,"authors_people":"Bonin F., Dell'Orletta F., Montemagni S., Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Bonin, F.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"","keywords":[""],"pages":"207-220","url":"https:\/\/publications.cnr.it\/doc\/310580","volume":"","doi":"","editors_people":"","editors":[""],"published":"Lessico e Lessicologia. Atti del XLIV congresso internazionale di studi della societ\u00e0 di linguistica italiana","publisher":"Bulzoni Editore (Roma, ITA)","issn":"","isbn":"978-88-7870-655-2","conference_name":"XLIV congresso internazionale di studi della societ\u00e0 di linguistica italiana","conference_place":"Viterbo","conference_date":"27-29 settembre 2010"},{"id":131746,"last_updated":"2023-11-06 19:33:41","id_people":219489,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"The SPLeT-2012 Shared Task on Dependency Parsing of Legal Texts","year":2012,"authors_people":"Dell'Orletta, Felice [1]; Marchi, Simone [1]; Montemagni, Simonetta [1]; Plank, Barbara [2]; Venturi, Giulia [3]","authors_cnr":["Montemagni, Simonetta","Marchi, Simone","Dell'Orletta, Felice"],"authors_cnr_id":["5595","10442","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Marchi, S.","Montemagni, S.","Plank, B.","Venturi, G."],"abstract":"The 4th Workshop on \"Semantic Processing of Legal Texts\" (SPLeT-2012) presents the first multilingual shared task on Dependency Parsing of Legal Texts. In this paper, we define the general task and its internal organization into sub-tasks, describe the datasets and the domain-specific linguistic peculiarities characterizing them. We finally report the results achieved by the participating systems, describe the underlying approaches and provide a first analysis of the final test results.","keywords":["Dependency Parsing","Domain Adaptation","Legal Text Processing"],"pages":"","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2012\/workshops\/27.LREC%202012%20Workshop%20Proceedings%20SPLeT.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Fourth Workshop on Semantic Processing of Legal Texts (SPLeT 2012)-First Shared Task on Dependency Parsing of Legal Texts (SPLeT 2012)","conference_place":"Istanbul","conference_date":"27 Maggio 2012"},{"id":131745,"last_updated":"2023-11-06 19:33:37","id_people":219483,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Domain Adaptation for Dependency Parsing at Evalita 2011","year":2012,"authors_people":"Dell'Orletta, Felice [1]; Marchi, Simone [1]; Montemagni, Simonetta [1]; Venturi, Giulia [2]; Agnoloni, Tommaso [3]; Francesconi, Enrico [3]","authors_cnr":["Agnoloni, Tommaso","Montemagni, Simonetta","Marchi, Simone","Francesconi, Enrico","Dell'Orletta, Felice"],"authors_cnr_id":["5595","10442","10498","14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Marchi, S.","Montemagni, S.","Venturi, G.","Agnoloni, T.","Francesconi, E."],"abstract":"The domain adaptation task was aimed at investigating techniques for adapting state-of-the-art dependency parsing systems to new domains. Both the language dealt with, i.e. Italian, and the target domain, namely the legal domain, represent two main novelties of the task organised at Evalita 2011. In this paper, we define the task and describe how the datasets were created from different resources. In addition, we characterize the different approaches of the participating systems, report the test results, and provide a first analysis of these results.","keywords":["Dependency Parsing","Domain Adaptation","Legal Text Processing"],"pages":"1-7","url":"http:\/\/www.evalita.it\/sites\/evalita.fbk.eu\/files\/working_notes2011\/Domain_Adaptation\/","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Evaluation of NLP and Speech Tools for Italian (EVALITA 2011): Domain Adaptation track","conference_place":"Roma","conference_date":"24-25 Gennaio 2012"},{"id":131885,"last_updated":"2023-11-06 19:33:38","id_people":278420,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Genre-oriented Readability Assessment: a Case Study","year":2012,"authors_people":"Dell'Orletta F and Montemagni S and VENTURI G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"","keywords":[""],"pages":"91-98","url":"https:\/\/publications.cnr.it\/doc\/278420","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of Workshop on \"Speech and Language Processing Tools in Education\" (SLP-TED)","publisher":"","issn":"","isbn":"978-1-62748-389-6","conference_name":"Workshop on \"Speech and Language Processing Tools in Education\" (SLP-TED)","conference_place":"Mumbai, India","conference_date":"15 December, 2012"},{"id":131878,"last_updated":"2014-10-24 00:38:01","id_people":285544,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Enriching the ISST-TANL Corpus with Semantic Frames","year":2012,"authors_people":"Lenci, Alessandro; Montemagni, Simonetta; Venturi, Giulia; Cutrulla, Maria Rosaria","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta"],"authors_cnr_id":["5595"],"authors_cnr_institute":["048","048"],"authors":["Lenci, A.","Montemagni, S.","Venturi, G.","Cutrulla, M. R."],"abstract":"The paper describes the design and the results of a manual annotation methodology devoted to enrich the ISST-TANL Corpus with Semantic Frames information. The main issues encountered in applying the English FrameNet annotation criteria to a corpus of Italian language are discussed together with the choice of anchoring the semantic annotation layer to the underlying dependency syntactic structure. We also describe an experiment to measure inter-annotator agreement and a first case study to extend and specialise FrameNet annotation to a corpus of legislative texts.","keywords":["Semantic annotation","FrameNet","Multi-layer annotated corpus"],"pages":"3719-3726","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2012\/pdf\/986_Paper.pdf","volume":"","doi":"","editors_people":"Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Mehmet U?ur Do?an and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis","editors":["Calzolari, N.","Choukri, K.","Declerck, T.","Do\u011fan, M. U.","Maegaard, B.","Mariani, J.","Moreno, A.","Odijk, J.","Piperidis, S."],"published":"Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)","publisher":"European language resources association (ELRA) (Paris, FRA)","issn":"","isbn":"978-2-9517408-7-7","conference_name":"Eight International Conference on Language Resources and Evaluation (LREC'12)","conference_place":"Istanbul, Turkey","conference_date":"23-25 May 2012"},{"id":131669,"last_updated":"2016-03-18 14:44:40","id_people":205232,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"The BioLexicon: a large-scale terminological resource for biomedical text mining","year":2011,"authors_people":"Paul Thompson, John McNaught, Simonetta Montemagni, Nicoletta Calzolari, Riccardo del Gratta, Vivian Lee, Simone Marchi, Monica Monachini, Piotr Pezik, Valeria Quochi, CJ Rupp, Yutaka Sasaki, Giulia Venturi, Dietrich Rebholz-Schuhmann, Sophia Ananiadou","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Monachini, Monica","Marchi, Simone","Quochi, Valeria","Del Gratta, Riccardo","Zamorani, Nicoletta"],"authors_cnr_id":["5595","8945","10442","11893","11933","26123"],"authors_cnr_institute":["048","048","048","048","048","048","048"],"authors":["Thompson, P.","McNaught, J.","Montemagni, S.","Calzolari, N.","Del Gratta, R.","Lee, V.","Marchi, S.","Monachini, M.","Pezik, P.","Quochi, V.","Rupp, C.","Sasaki, Y.","Venturi, G.","Rebholz Schuhmann, D.","Ananiadou, S."],"abstract":"Background Due to the rapidly expanding body of biomedical literature, biologists require increasingly sophisticated and efficient systems to help them to search for relevant information. Such systems should account for the multiple written variants used to represent biomedical concepts, and allow the user to search for specific pieces of knowledge (or events) involving these concepts, e.g., protein-protein interactions. Such functionality requires access to detailed information about words used in the biomedical literature. Existing databases and ontologies often have a specific focus and are oriented towards human use. Consequently, biological knowledge is dispersed amongst many resources, which often do not attempt to account for the large and frequently changing set of variants that appear in the literature. Additionally, such resources typically do not provide information about how terms relate to each other in texts to describe events. Results This article provides an overview of the design, construction and evaluation of a large-scale lexical and conceptual resource for the biomedical domain, the BioLexicon. The resource can be exploited by text mining tools at several levels, e.g., part-of-speech tagging, recognition of biomedical entities, and the extraction of events in which they are involved. As such, the BioLexicon must account for real usage of words in biomedical texts. In particular, the BioLexicon gathers together different types of terms from several existing data resources into a single, unified repository, and augments them with new term variants automatically extracted from biomedical literature. Extraction of events is facilitated through the inclusion of biologically pertinent verbs (around which events are typically organized) together with information about typical patterns of grammatical and semantic behaviour, which are acquired from domain-specific texts. In order to foster interoperability, the BioLexicon is modelled using the Lexical Markup Framework, an ISO standard. Conclusions The BioLexicon contains over 2.2 M lexical entries and over 1.8 M terminological variants, as well as over 3.3 M semantic relations, including over 2 M synonymy relations. Its exploitation can benefit both application developers and users. We demonstrate some such benefits by describing integration of the resource into a number of different tools, and evaluating improvements in performance that this can bring.","keywords":["Text Mining","Information Extraction","Computational Lexicon"],"pages":"1-29","url":"http:\/\/www.biomedcentral.com\/1471-2105\/12\/397","volume":"12","doi":"10.1186\/1471-2105-12-397","editors_people":"","editors":[""],"published":"BMC bioinformatics","publisher":"BioMed Central ([London], Regno Unito)","issn":"1471-2105","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132039,"last_updated":"2015-02-27 10:07:52","id_people":320343,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Semantic annotation of Italian legal texts: a FrameNet-based approach","year":2011,"authors_people":"Venturi G.","authors_cnr":["Venturi, Giulia"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Venturi, G."],"abstract":"","keywords":[""],"pages":"46-79","url":"https:\/\/publications.cnr.it\/doc\/320343","volume":"3","doi":"10.1075\/cf.3.1.02ven","editors_people":"","editors":[""],"published":"Constructions and frames (Print)","publisher":"Benjamins (Amsterdam, Paesi Bassi)","issn":"1876-1933","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132015,"last_updated":"2023-11-06 19:33:43","id_people":138775,"institutes":["ILC","IRISS"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Tecnologie linguistico-computazionali per il monitoraggio della competenza linguistica italiana degli alunni stranieri nella scuola primaria e secondaria","year":2011,"authors_people":"Dell'Orletta Felice; Montemagni Simonetta; Vecchi Eva Maria; Venturi Giulia","authors_cnr":["Vecchi, Eva Maria","Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["071","071","048","048"],"authors":["Dell'Orletta, F.","Montemagni, S.","Vecchi, E. M.","Venturi, G."],"abstract":"La possibilit\u00e0 di disporre di tecnologie avanzate e innovative che permettano di monitorare la competenza linguistica degli alunni stranieri e, al contempo, valutare l'adeguatezza dei materiali didattici a loro offerti pu\u00f2 essere di supporto all'insegnante nell'orientare la propria azione formativa, rendendo cos\u00ec il processo di integrazione linguistico-culturale meno faticoso e traumatico. In tale ottica, questo studio, realizzato col supporto di una piattaforma ormai consolidata di metodi e strumenti per il trattamento automatico dell'italiano, costituisce il primo tentativo condotto in relazione alla lingua italiana, per mettere a punto una metodologia di monitoraggio linguistico rivolta specificamente agli studenti apprendenti la lingua italiana come L2 ed alle loro produzioni scritte.","keywords":["Trattamento Automatico del Linguaggio","Stranieri","Lingua italiana"],"pages":"319-336","url":"https:\/\/publications.cnr.it\/doc\/138775","volume":"","doi":"","editors_people":"Bruno Giovanni Carlo; Caruso Immacolata; Sanna Manuela; Vellecco Immacolata","editors":["Bruno, G. C.","Caruso, I.","Sanna, M.","Vellecco, I."],"published":"Percorsi Migranti","publisher":"Mc Graw-Hill (Milano, ITA)","issn":"","isbn":"978-88-386-7296-5","conference_name":"","conference_place":"","conference_date":""},{"id":131676,"last_updated":"2023-11-06 19:33:42","id_people":205510,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"READ-IT: assessing readability of Italian texts with a view to text simplification","year":2011,"authors_people":"Felice Dell'Orletta, Simonetta Montemagni, Giulia Venturi","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this paper, we propose a new approach to readability assessment with a specific view to the task of text simplification: the intended audience includes people with low literacy skills and\/or with mild cognitive impairment. READ-IT represents the first advanced readability assessment tool for what concerns Italian, which combines traditional raw text features with lexical, morpho-syntactic and syntactic information. In READ-IT readability assessment is carried out with respect to both documents and sentences where the latter represents an important novelty of the proposed approach creating the prerequisites for aligning the readability assessment step with the text simplification process. READ-IT shows a high accuracy in the document classification task and promising results in the sentence classification scenario.","keywords":["Readability Assessment","Text Simplification"],"pages":"73-83","url":"http:\/\/dl.acm.org\/citation.cfm?id=2140511","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-937284-14-5","conference_name":"SLPAT '11 Proceedings of the Second Workshop on Speech and Language Processing for Assistive Technologies","conference_place":"Edimburgo, UK","conference_date":"30 Luglio 2011"},{"id":131675,"last_updated":"2023-11-06 19:33:46","id_people":205505,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"ULISSE: an unsupervised algorithm for detecting reliable dependency parses","year":2011,"authors_people":"Felice Dell'Orletta, Giulia Venturi and Simonetta Montemagni","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"In this paper we present ULISSE, an unsupervised linguistically--driven algorithm to select reliable parses from the output of a dependency parser. Different experiments were devised to show that the algorithm is robust enough to deal with the output of different parsers and with different languages, as well as to be used across different domains. In all cases, ULISSE appears to outperform the baseline algorithms.","keywords":["Dependency Parsing","Selection of Reliable Parses","Unsupervised Algorithm"],"pages":"115-124","url":"http:\/\/dl.acm.org\/citation.cfm?id=2018950","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-1-932432-92-3","conference_name":"CoNLL '11 Proceedings of the Fifteenth Conference on Computational Natural Language Learning","conference_place":"Portland, Oregon, USA","conference_date":"23-24 Giugno 2011"},{"id":131884,"last_updated":"2023-11-06 19:33:53","id_people":278419,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Singling out Legal Knowledge from World Knowledge","year":2010,"authors_people":"Bonin F and Dell'Orletta F and VENTURI G. and Montemagni S","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":[""],"authors":["Bonin, F.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"","keywords":[""],"pages":"217-229","url":"https:\/\/publications.cnr.it\/doc\/278419","volume":"","doi":"","editors_people":"","editors":[""],"published":"Informatica e diritto","publisher":"Edizioni Scientifiche Italiane (Firenze, Italia)","issn":"0390-0975","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":130912,"last_updated":"2014-10-27 18:26:08","id_people":30893,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Legal Language and Legal Knowledge Management Applications","year":2010,"authors_people":"Giulia Venturi","authors_cnr":["Venturi, Giulia"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Venturi, G."],"abstract":"This work is an investigation into the peculiarities of legal language with respect to ordinary language. Based on the idea that a shallow parsing approach can help to provide enough detailed linguistic information, this work presents the results obtained by shallow parsing (i.e. chunking) corpora of Italian and English legal texts and comparing them with corpora of ordinary language. In particular, this paper puts the emphasis of how understanding the syntactic and lexical characteristics of this specialised language has practical importance in the development of domain-specific Knowledge Management applications.","keywords":["Parsing Legal Texts","Natural Language Processing","Legal Language","Knowledge Management Applications"],"pages":"3-26","url":"https:\/\/publications.cnr.it\/doc\/30893","volume":"6036","doi":"","editors_people":"Francesconi E., Montemagni S., Peters W. and Tiscornia D.","editors":["Francesconi, E.","Montemagni, S.","Peters, W.","Tiscornia, D."],"published":"Semantic Processing of Legal Texts. Where the Language of Law Meets the Law of Language","publisher":"Springer-Verlag (Berlin Heidelberg, DEU)","issn":"","isbn":"3-642-12836-X","conference_name":"","conference_place":"","conference_date":""},{"id":131238,"last_updated":"2023-11-06 19:33:47","id_people":84796,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"A Contrastive Approach to Multi-word Extraction from Domain-specific Corpora","year":2010,"authors_people":"Bonin F.; Dell'Orletta F.; Montemagni S.; Venturi G.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Bonin, F.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"In this paper we present a novel approach to multi-word terminology extraction combining a well-known automatic term recognition approach, the C-NC value method, with a contrastive ranking technique, aimed at refining obtained results either by filtering noise due to common words or by discerning between semantically different types of terms within heterogeneous terminologies. The proposed methodology has been tested in two case studies carried out in the History of Art and Legal domains with promising results.","keywords":["Terminology Extraction","Domain-specific Corpora","Multi-word Expression"],"pages":"3222-3229","url":"https:\/\/publications.cnr.it\/doc\/84796","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"2-9517408-6-7","conference_name":"Seventh International Conference on Language Resources and Evaluation","conference_place":"Valletta, Malta","conference_date":"19-21 maggio 2010"},{"id":131244,"last_updated":"2023-11-06 19:33:50","id_people":84802,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Contrastive filtering of domain specific multi-word terms from different types of corpora","year":2010,"authors_people":"Bonin F.; Dell'Orletta F.; Venturi G.; Montemagni S.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Dell'Orletta, Felice"],"authors_cnr_id":["5595","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Bonin, F.","Dell'Orletta, F.","Venturi, G.","Montemagni, S."],"abstract":"In this paper we tackle the challenging task of Multi-word term (MWT) extraction from different types of specialized corpora. Contrastive filtering of previously extracted MWTs results in a considerable increment of acquired domain-specific terms.","keywords":["multi-word terms extraction","corpora"],"pages":"76-79","url":"https:\/\/publications.cnr.it\/doc\/84802","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-7-900268-00-6","conference_name":"The 23rd International Conference on Computational Linguistics (COLING 2010). Multiword Expressions: from Theory to Applications (MWE 2010)","conference_place":"Beijing, China","conference_date":"28 agosto 2010"},{"id":131317,"last_updated":"2023-11-06 19:33:52","id_people":112966,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Lessico settoriale e lessico comune nell'estrazione di terminologia specialistica da corpora di dominio","year":2010,"authors_people":"Bonin F.; Dell'Orletta F.; Montemagni S.; Venturi G.","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Montemagni, Simonetta"],"authors_cnr_id":["5595"],"authors_cnr_institute":[""],"authors":["Bonin, F.","Dell'Orletta, F.","Montemagni, S.","Venturi, G."],"abstract":"","keywords":["Automatic Term Extraction"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/112966","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"XLIV Congresso Internazionale di Studi della Societ\u00e0 di Linguistica Italiana","conference_place":"Viterbo, Universit\u00e0 degli Stud","conference_date":""},{"id":131639,"last_updated":"2012-03-29 15:58:24","id_people":173723,"institutes":["ILC","IRISS"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Tecnologie linguistico-computazionali per il monitoraggio delle competenze linguistiche di apprendenti l'italiano come L2","year":2010,"authors_people":"Dell'Orletta F.; Montemagni S.; Vecchi E. M.; Venturi G.","authors_cnr":["Venturi, Giulia","Vecchi, Eva Maria","Venturi, Giulia","Montemagni, Simonetta"],"authors_cnr_id":["5595"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Montemagni, S.","Vecchi, E. M.","Venturi, G."],"abstract":"","keywords":["Natural Language Processing, Educational Linguistics, Language Learning"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/173723","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Congresso \"IT. L2: italiano lingua seconda nell'universit\u00e0, nella scuola e sul territorio. Esperienze didattiche e ricerche\" Universit\u00e0 del Piemonte Orientale \"Amedeo Avogadro\", Facolt\u00e0 di Lettere e Filosofia","conference_place":"Vercelli","conference_date":"2010"},{"id":123415,"last_updated":"2012-03-28 13:17:59","id_people":173012,"institutes":["ILC","ITTIG","IGSG"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"A two-level Knowledge approach to support multilingual legislative drafting","year":2009,"authors_people":"Agnoloni T.; Bacci L.; Francesconi E.; Peters W.; Montemagni S.; Venturi G.","authors_cnr":["Bacci, Lorenzo","Agnoloni, Tommaso","Venturi, Giulia","Montemagni, Simonetta","Francesconi, Enrico"],"authors_cnr_id":["5595","10498"],"authors_cnr_institute":[""],"authors":["Agnoloni, T.","Bacci, L.","Francesconi, E.","Peters, W.","Montemagni, S.","Venturi, G."],"abstract":"","keywords":["DALOS project","Ontological-linguistic"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/173012","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131377,"last_updated":"2015-02-16 10:56:35","id_people":136465,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Ontology learning from Italian legal texts","year":2009,"authors_people":"Lenci A.; Montemagni S.; Pirrelli V.; Giulia V.","authors_cnr":["Pirrelli, Vito","Montemagni, Simonetta"],"authors_cnr_id":["326","5595"],"authors_cnr_institute":["048","048"],"authors":["Lenci, A.","Montemagni, S.","Pirrelli, V.","Venturi, G."],"abstract":"The paper reports on the methodology and preliminary results of a case study in automatically extracting ontological knowledge from Italian legislative texts. We use a fully-implemented ontology learning system (T2K) that includes a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine language learning. Tools are dynamically integrated to provide an incremental representation of the content of vast repositories of unstructured documents. Evaluated results, however preliminary, show the great potential of NLP-powered incremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.","keywords":["Ontology Learning","document management","legal knowledge extraction"],"pages":"75-94","url":"https:\/\/publications.cnr.it\/doc\/136465","volume":"188","doi":"10.3233\/978-1-58603-942-4-75","editors_people":"Joost Breuker; Pompeu Casanovas; Michel C.A. Klein; Enrico Francesconi","editors":["Breuker, J.","Casanovas, P.","Klein, M. C. A.","Francesconi, E."],"published":"Law, Ontologies and the Semantic Web-Channelling the Legal Information Flood","publisher":"","issn":"","isbn":"978-1-58603-942-4","conference_name":"","conference_place":"","conference_date":""},{"id":123422,"last_updated":"2012-03-29 15:40:44","id_people":173712,"institutes":["ILC","ITTIG","IGSG"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Towards a FrameNet Resource for the Legal Domain","year":2009,"authors_people":"Venturi G.; Lenci A.; Montemagni S.; Vecchi E. M.; Sagri M. T.; Tiscornia D.; Agnoloni T.","authors_cnr":["Sagri, Maria Teresa","Agnoloni, Tommaso","Venturi, Giulia","Montemagni, Simonetta","Tiscornia, Daniela"],"authors_cnr_id":["5595","20276"],"authors_cnr_institute":[""],"authors":["Venturi, G.","Lenci, A.","Montemagni, S.","Vecchi, E. M.","Sagri, M. T.","Tiscornia, D.","Agnoloni, T."],"abstract":"","keywords":["Frame Semantics","Legal Ontologies","Knowledge Representation","Corpus Annotation"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/173712","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"3rd Workshop on Legal Ontologies and Artificial Intelligence Techniques joint with 2nd Workshop on Semantic Processing of Legal text","conference_place":"Barcelona, Spain","conference_date":"2009"},{"id":131187,"last_updated":"2023-07-09 16:06:59","id_people":84736,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Bootstrapping a Verb Lexicon for Biomedical Information Extraction","year":2009,"authors_people":"Venturi G.; Montemagni S.; Marchi S.; Sasaki Y.; Thompson P.; McNaught J.; Ananiadou S.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta","Marchi, Simone"],"authors_cnr_id":["5595","10442"],"authors_cnr_institute":[""],"authors":["Venturi, G.","Montemagni, S.","Marchi, S.","Sasaki, Y.","Thompson, P.","McNaught, J.","Ananiadou, S."],"abstract":"The extraction of information from texts requires resources that contain both syntactic and semantic properties of lexical units. As the use of language in specialized domains, such as biology, can be very different to the general domain, there is a need for domain-specific resources to ensure that the information extracted is as accurate as possible. We are building a large-scale lexical resource for the biology domain, providing information about predicate-argument structure that has been bootstrapped from a biomedical corpus on the subject of E. Coli. The lexicon is currently focussed on verbs, and includes both automatically-extracted syntactic subcategorization frames, as well as semantic event frames that are based on annotation by domain experts. In addition, the lexicon contains manually-added explicit links between semantic and syntactic slots in corresponding frames. To our knowledge, this lexicon currently represents a unique resource within in the biomedical domain.","keywords":["domain-specific lexical resources","Biological Language Processing","syntax-semantic linking"],"pages":"137-148","url":"https:\/\/publications.cnr.it\/doc\/84736","volume":"","doi":"10.1007\/978-3-642-00382-0_11","editors_people":"","editors":[""],"published":"","publisher":"Springer-Verlag (Berlin Heidelberg, DEU)","issn":"","isbn":"9783642003813","conference_name":"10th International Conference on Intelligent Text Processing and Computational Linguistics","conference_place":"Mexico City, Mexico","conference_date":"1-7\/03\/2009"},{"id":122044,"last_updated":"2017-03-02 16:29:45","id_people":130118,"institutes":["ILC","ITTIG","IGSG"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"NLP\u0096based Metadata Extraction for Legal Text Consolidation","year":2009,"authors_people":"Spinosa P., Giardiello G., Cherubini M., Marchi S., Venturi G., Montemagni S.","authors_cnr":["Giardiello, Gerardo","Venturi, Giulia","Montemagni, Simonetta","Marchi, Simone","Cherubini, Manola","Spinosa, Pierluigi"],"authors_cnr_id":["5595","10442","11042","19619"],"authors_cnr_institute":[""],"authors":["Spinosa, P.","Giardiello, G.","Cherubini, M.","Marchi, S.","Venturi, G.","Montemagni, S."],"abstract":"","keywords":["Natural Language Processing","textual amendments","XML representation","metadata extraction","consolidation of legal text"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/130118","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Twelfth International Conference on Artificial Intelligence and Law (ICAIL 2009)","conference_place":"Barcelona","conference_date":"June 8-12, 2009"},{"id":131309,"last_updated":"2023-11-06 19:33:54","id_people":112956,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Bootstrapping a Verb Lexicon for Biomedical Information Extraction","year":2009,"authors_people":"Venturi, Giulia; Montemagni, Simonetta; Marchi, Simone; Sasaki, Yutaka; Thompson, Paul; McNaught, John; Ananiadou, Sophia","authors_cnr":["Montemagni, Simonetta","Marchi, Simone","Dell'Orletta, Felice"],"authors_cnr_id":["5595","10442","14329"],"authors_cnr_institute":["048","048","048"],"authors":["Venturi, G.","Montemagni, S.","Marchi, S.","Sasaki, Y.","Thompson, P.","McNaught, J.","Ananiadou, S."],"abstract":"The extraction of information from texts requires resources that contain both syntactic and semantic properties of lexical units. As the use Of language in specialized domains, such as biology, can be very different to the general domain, there is a need for domain-specific resources to ensure that the information extracted is as accurate as possible. We are building a large-scale lexical resource for the biology domain. providing information about predicate-argument structure that has been bootstrapped from a biomedical corpus on the subject of E. Coli. The lexicon is currently focussed on verbs, and includes both automatically-extracted syntactic subcategorization frames, as well as semantic event frames that are based on annotation by domain experts. In addition, the lexicon contains manually-added explicit links between semantic and syntactic slots in corresponding frames. To Our knowledge, this lexicon currently represents a unique resource within in the biomedical domain.","keywords":["domain-specific lexical resources","lexical acquisition","syntax-semantics linking","Information Extraction","Biological Language Processing"],"pages":"137-148","url":"https:\/\/publications.cnr.it\/doc\/112956","volume":"5449","doi":"","editors_people":"Alexander Gelbukh","editors":["Gelbukh, A."],"published":"Proceedings of the 10th International Conference on Intelligent Text Processing and Computational Linguistics (CICLing 2009)","publisher":"Springer (Berlin, Germania)","issn":"0302-9743","isbn":"978-3-642-00381-3","conference_name":"International Conference on Intelligent Text Processing and Computational Linguistics (CICLing 2009)","conference_place":"Mexico City, Mexico","conference_date":"March 1-7, 2009"},{"id":130998,"last_updated":"2023-11-06 19:34:03","id_people":64541,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio","year":2008,"authors_people":"Dell'Orletta F.; Lenci A.; Marchi S.; Montemagni S.; Pirrelli V.; Venturi G.","authors_cnr":["Dell'Orletta, Felice","Pirrelli, Vito","Montemagni, Simonetta","Marchi, Simone"],"authors_cnr_id":["326","5595","10442"],"authors_cnr_institute":["048","048","048","048"],"authors":["Dell'Orletta, F.","Lenci, A.","Marchi, S.","Montemagni, S.","Pirrelli, V.","Venturi, G."],"abstract":"The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.","keywords":["Natural Language Processing","Machine Learning","Knowledge extraction from texts","Ontology learning","Legal ontologies"],"pages":"197-218","url":"https:\/\/publications.cnr.it\/doc\/64541","volume":"26","doi":"","editors_people":"","editors":[""],"published":"Aida Informazioni (Online)","publisher":"Aida (Roma, Italia)","issn":"1594-2201","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131160,"last_updated":"2023-11-06 19:34:04","id_people":84707,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio","year":2008,"authors_people":"Dell'Orletta Felice; Lenci Alessando; Marchi Simone; Montemagni Simonetta; Pirrelli Vito; Venturi Giulia","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Pirrelli, Vito","Montemagni, Simonetta","Marchi, Simone"],"authors_cnr_id":["326","5595","10442"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Lenci, A.","Marchi, S.","Montemagni, S.","Pirrelli, V.","Venturi, G."],"abstract":"The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.","keywords":["Natural Language Processing","Machine Learning","Knowledge extraction from texts","Ontology learning","Legal ontologies"],"pages":"197-218","url":"http:\/\/www.assiterm91.it\/wp-content\/uploads\/2010\/11\/Convegno-2008.pdf","volume":"Anno 26, numero 1-2","doi":"","editors_people":"","editors":[""],"published":"Terminologia analisi testuale e documentazione nella citt\u00e0 digitale","publisher":"Aida (Roma, Italia)","issn":"1594-2201","isbn":"","conference_name":"Atti del Convegno Nazionale Ass. I. Term","conference_place":"Arcavacata di Rende (CS)","conference_date":"5-7\/06\/2008"},{"id":131151,"last_updated":"2023-11-06 19:34:01","id_people":84698,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Acquiring Legal Ontologies from Domain-specific Texts","year":2008,"authors_people":"Dell'Orletta F.; Lenci A.; Montemagni S.; Marchi S.; Pirrelli V.; Venturi G.","authors_cnr":["Dell'Orletta, Felice","Venturi, Giulia","Pirrelli, Vito","Montemagni, Simonetta","Marchi, Simone"],"authors_cnr_id":["326","5595","10442"],"authors_cnr_institute":[""],"authors":["Dell'Orletta, F.","Lenci, A.","Montemagni, S.","Marchi, S.","Pirrelli, V.","Venturi, G."],"abstract":"The paper reports on methodology and preliminary results ofa case study in automatically extracting ontological knowledgefrom Italian legislative texts in the environmental domain. Weuse a fully-implemented ontology learning system (T2K) thatincludes a battery of tools for Natural Language Processing(NLP), statistical text analysis and machine language learn-ing. Tools are dynamically integrated to provide an incremen-tal representation of the content of vast repositories of unstruc-tured documents. Evaluated results, however preliminary, arevery encouraging, showing the great potential of NLP-poweredincremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.","keywords":["Ontology learning","Document management","knowledge extraction from texts","Natural Language Processing"],"pages":"98-101","url":"https:\/\/publications.cnr.it\/doc\/84698","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"LangTech 2008","conference_place":"Roma","conference_date":"28-29\/02\/2008"},{"id":131157,"last_updated":"2014-10-28 11:05:02","id_people":84704,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Building a Bio-Event Annotated Corpus for the Acquisition of Semantic Frames from Biomedical Corpora","year":2008,"authors_people":"Thompson P.; Cotter P.; Ananiadou S.; McNaught J.; Montemagni S.; Trabucco A.; Venturi G.","authors_cnr":["Venturi, Giulia","Trabucco, Andrea","Montemagni, Simonetta"],"authors_cnr_id":["5595"],"authors_cnr_institute":["048","","048"],"authors":["Thompson, P.","Cotter, P.","Ananiadou, S.","McNaught, J.","Montemagni, S.","Trabucco, A.","Venturi, G."],"abstract":"","keywords":["Corpus (creation","annotation","etc.)","Text mining","Semantics","Event Extraction"],"pages":"2159-2166","url":"https:\/\/publications.cnr.it\/doc\/84704","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"2-9517408-4-0","conference_name":"LREC 2008, Sixth International Conference on Language Resouces and Evaluation","conference_place":"Marrakech, Morocco","conference_date":"28-30 maggio 2014"},{"id":131158,"last_updated":"2014-10-28 16:39:30","id_people":84705,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Categorising Modality in Biomedical Texts","year":2008,"authors_people":"Thompson P.; Venturi G.; McNaught J.; Montemagni S.; Ananiadou S.","authors_cnr":["Venturi, Giulia","Montemagni, Simonetta"],"authors_cnr_id":["5595"],"authors_cnr_institute":["048","048"],"authors":["Thompson, P.","Venturi, G.","McNaught, J.","Montemagni, S.","Ananiadou, S."],"abstract":"The accurate recognition of modal information is vital for the correct interpretation of statements. In this paper, we report on the collection a list of words and phrases that express modal information in biomedical texts, and propose a categorisation scheme according to the type of information conveyed. We have performed a small pilot study through the annotation of 202 MEDLINE abstracts according to our proposed scheme. Our initial results suggest that modality in biomedical statements can be predicted fairly reliably though the presence of particular lexical items, together with a small amount of contextual information.","keywords":["Biomedical texts","Modality"],"pages":"27-34","url":"https:\/\/publications.cnr.it\/doc\/84705","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"2-9517408-4-0","conference_name":"LREC 2008, Sixth International Conference on Language Resources and Evaluation: Workshop 'Building and Evaluating Resources for Biomedical Text Mining'","conference_place":"Marrakech, Marocco","conference_date":"26 maggio 2008"},{"id":123403,"last_updated":"2012-04-19 15:48:46","id_people":171352,"institutes":["ILC","ITTIG","IGSG"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Building an ontological support for multilingual legislative drafting","year":2007,"authors_people":"Agnoloni T., Bacci L., Francesconi E., Spinosa P., Tiscornia D., Montemagni S., Venturi G.","authors_cnr":["Bacci, Lorenzo","Agnoloni, Tommaso","Venturi, Giulia","Montemagni, Simonetta","Francesconi, Enrico","Spinosa, Pierluigi","Tiscornia, Daniela"],"authors_cnr_id":["5595","10498","19619","20276"],"authors_cnr_institute":["104"],"authors":["Agnoloni, T.","Bacci, L.","Francesconi, E.","Spinosa, P.","Tiscornia, D.","Montemagni, S.","Venturi, G."],"abstract":"","keywords":[""],"pages":"9-18","url":"https:\/\/publications.cnr.it\/doc\/171352","volume":"","doi":"","editors_people":"Lodder Ar; Mommers L.","editors":["Ar, L.","Mommers, L."],"published":"Legal Knowledge and information Systems","publisher":"","issn":"","isbn":"","conference_name":"International Conference on Legal Knowledge and Information Systems (JURIX 2007)","conference_place":"Leiden","conference_date":"2007"},{"id":131147,"last_updated":"2014-10-28 17:09:37","id_people":84693,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"NLP-based ontology learning from legal texts. A case study","year":2007,"authors_people":"Lenci A., Montemagni S., Pirrelli V., Venturi G.","authors_cnr":["Venturi, Giulia","Pirrelli, Vito","Montemagni, Simonetta"],"authors_cnr_id":["326","5595"],"authors_cnr_institute":["048","048","048"],"authors":["Lenci, A.","Montemagni, S.","Pirrelli, V.","Venturi, G."],"abstract":"The paper reports on the methodology and preliminary results of a case study in automatically extracting ontological knowledge from Italian legislative texts in the environmental domain. We use a fully-implemented ontology learning system (T2K) that includes a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine language learning. Tools are dynamically integrated to provide an incremental representation of the content of vast repositories of unstructured documents. Evaluated results, however preliminary, are very encouraging, showing the great potential of NLP-powered incremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.","keywords":[""],"pages":"113-129","url":"https:\/\/publications.cnr.it\/doc\/84693","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"II Workshop on Legal Ontologies and Artificial Intelligence Techniques (LOAIT'07)","conference_place":"Stanford","conference_date":"4 giugno 2007"},{"id":131570,"last_updated":"2016-07-20 15:08:53","id_people":157440,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Report on Ontology learning tool and testing","year":2007,"authors_people":"Montemagni S., Marchi S., Venturi G., Bartolini R., Bertagna F., Ruffolo P., Peters W., Tiscornia D.","authors_cnr":["Bertagna, Francesca","Ruffolo, Paolo","Venturi, Giulia","Montemagni, Simonetta","Bartolini, Roberto","Marchi, Simone"],"authors_cnr_id":["5595","10441","10442"],"authors_cnr_institute":[""],"authors":["Montemagni, S.","Marchi, S.","Venturi, G.","Bartolini, R.","Bertagna, F.","Ruffolo, P.","Peters, W.","Tiscornia, D."],"abstract":"This deliverable documents the work done within the DALOS EU project for what concerns the definition and implementation of methodologies and techniques to bootstrap terminological and ontological knowledge from domain corpora. Starting from a corpus of legacy legislative texts in different languages, linguistic technologies combined with statistical techniques have been used to extract significant terms as well as to structure them in conceptual structures for the different languages dealt with within the project, namely Italian, English, Spanish and Dutch.","keywords":["Ontology Learning","Term Extraction","Natural Language Processing","Conceptual Indexing"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/157440","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131551,"last_updated":"2009-06-16 00:00:00","id_people":157421,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Bio-Event Linguistic Annotation Tool. User Manual","year":2007,"authors_people":"Montemagni S., Trabucco A., Venturi G.","authors_cnr":["Venturi, Giulia","Trabucco, Andrea","Montemagni, Simonetta"],"authors_cnr_id":["5595"],"authors_cnr_institute":[""],"authors":["Montemagni, S.","Trabucco, A.","Venturi, G."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/157421","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131552,"last_updated":"2009-06-16 00:00:00","id_people":157422,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Event annotation of domain corpora","year":2007,"authors_people":"Montemagni S., Trabucco A., Venturi G., Thompson P., Cotter P., Ananiadou S., McNaught J., Kim J., Rebholz D., Pezik P.","authors_cnr":["Venturi, Giulia","Trabucco, Andrea","Montemagni, Simonetta"],"authors_cnr_id":["5595"],"authors_cnr_institute":[""],"authors":["Montemagni, S.","Trabucco, A.","Venturi, G.","Thompson, P.","Cotter, P.","Ananiadou, S.","McNaught, J.","Kim, J.","Rebholz, D.","Pezik, P."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/157422","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""}]