[{"id":354900,"last_updated":"2023-02-20 09:11:47","id_people":478212,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Words and the Company they Keep: Digital corpora and infrastructures for the foreign language classroom","year":2023,"authors_people":"Francesca Frontini","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":[""],"authors":["Frontini, F."],"abstract":"We give an overview of corpora & language technologies and their use in foreign language teaching.","keywords":["corpora","didattica L2","tecnologie del linguaggio"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/478212","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Didattica della lingua, della cultura e cittadinanza attiva: sfide educative contemporanee-Seminari LEND Modena","conference_place":"","conference_date":"07\/02\/2023"},{"id":343119,"last_updated":"2022-10-20 16:17:15","id_people":472288,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Language Matters. The European Research Infrastructure CLARIN, Today\u00a0and\u00a0Tomorrow","year":2022,"authors_people":"Franciska de Jong Dieter Van Uytvanck Francesca Frontini Antal van den Bosch Darja Fi?er Andreas Witt","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":[""],"authors":["De Jong, F.","Van Uytvanck, D.","Frontini, F.","Van Den Bosch, A.","Fi\u0161er, D.","Witt, A."],"abstract":"LARIN stands for \"Common Language Resources and Technology Infrastructure\". In 2012 CLARIN ERIC was established as a legal entity with the mission to create and maintain a digital infrastructure to support the sharing, use, and sustainability of language data (in written, spoken, or multimodal form) available through repositories from all over Europe, in support of research in the humanities and social sciences and beyond. Since 2016 CLARIN has had the status of Landmark research infrastructure and currently it provides easy and sustainable access to digital language data and also offers advanced tools to discover, explore, exploit, annotate, analyse, or combine such datasets, wherever they are located. This is enabled through a networked federation of centres: language data repositories, service centres, and knowledge centres with single sign-on access for all members of the academic community in all participating countries. In addition, CLARIN offers open access facilities for other interested communities of use, both inside and outside of academia. Tools and data from different centres are interoperable, so that data collections can be combined and tools from different sources can be chained to perform operations at different levels of complexity. The strategic agenda adopted by CLARIN and the activities undertaken are rooted in a strong commitment to the Open Science paradigm and the FAIR data principles. This also enables CLARIN to express its added value for the European Research Area and to act as a key driver of innovation and contributor to the increasing number of industry programmes running on data-driven processes and the digitalization of society at large.","keywords":["research infrastructure","language resources","language technology","open science","service interoperability","innovation","SSH"],"pages":"31-58","url":"https:\/\/www.degruyter.com\/document\/doi\/10.1515\/9783110767377-002\/html","volume":"1","doi":"10.1515\/9783110767377-002","editors_people":"Darja Fi?er, Andreas Witt","editors":["Fi\u0161er, D.","Witt, A."],"published":"CLARIN: The Infrastructure for Language Resources","publisher":"Walter De Gruyter Inc (Boston\/Berlin\/Munich, USA)","issn":"","isbn":"978-3-11-076737-7","conference_name":"","conference_place":"","conference_date":""},{"id":340477,"last_updated":"2023-07-07 11:25:29","id_people":469112,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Italian Language Resources. From CLARIN-IT to the VLO and Back: Sketching a Methodology for Monitoring LRs Visibility","year":2022,"authors_people":"Dario Del Fante, Francesca Frontini. Monica Monachini, Valeria Quochi","authors_cnr":["Del Fante, Dario","Monachini, Monica","Quochi, Valeria","Frontini, Francesca"],"authors_cnr_id":["8945","11893","15911"],"authors_cnr_institute":[""],"authors":["Del Fante, D.","Frontini, F.","Monachini, M.","Quochi, V."],"abstract":"This paper sketches a user-oriented, qualitative methodology for both (i) monitoring the existence and availability of language resources relevant for a given CLARIN national community and language and (ii) assessing the offering potential of CLARIN, in terms of Language Resources provided to national consortia. From the user perspective, the methodology has been applied to investigate the visibility of language resources available for Italian within the CLARIN central services, in particular the Virtual Language Observatory. As a proof-of-concept, the methodology has been tested on the resources available through the CLARIN-IT data centres, but, ideally, it could be applied by any national data centre aiming to assess the existence of LRs in CLARIN for any given languages and check their accessibility for the interested users. It is thus argued that such an assessment might be a useful instrument in the hands of national coordinators and centre managers for (i) bringing to the fore both strengths and critical issues about their data providing community and (ii) for planning targeted actions to improve and increase both visibility and accessibility of their LRs.","keywords":["Virtual Language Observatory","CLARIN-IT","CLARIN-ERIC","Qualitative Assessment Methodology","User Involvement"],"pages":"10-22","url":"https:\/\/ecp.ep.liu.se\/index.php\/clarin\/article\/view\/413\/371","volume":"","doi":"10.3384\/9789179294441","editors_people":"Monachini, Monica and Eskevich, Maria","editors":["Monachini","Monica","Eskevich","Maria"],"published":"Selected Papers from the CLARIN Annual Conference 2021","publisher":"","issn":"","isbn":"978-91-7929-444-1","conference_name":"","conference_place":"","conference_date":""},{"id":343148,"last_updated":"2022-10-21 15:19:30","id_people":472294,"institutes":["ILC","IGSG"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Making Italian Parliamentary Records Machine-Actionable: the Construction of the ParlaMint-IT corpus","year":2022,"authors_people":"Tommaso Agnoloni, Roberto Bartolini, Francesca Frontini, Simonetta Montemagni, Carlo Marchetti, Valeria Quochi, Manuela Ruisi, Giulia Venturi","authors_cnr":["Montemagni, Simonetta","Bartolini, Roberto","Agnoloni, Tommaso","Quochi, Valeria","Frontini, Francesca","Venturi, Giulia"],"authors_cnr_id":["5595","10441","11403","11893","15911","17692"],"authors_cnr_institute":[""],"authors":["Agnoloni, T.","Bartolini, R.","Frontini, F.","Montemagni, S.","Marchetti, C.","Quochi, V.","Ruisi, M.","Venturi, G."],"abstract":"This paper describes the process of acquisition, cleaning, interpretation, coding and linguistic annotation of a collection of parliamentary debates from the Senate of the Italian Republic covering the COVID-19 pandemic emergency period and a former period for reference and comparison according to the CLARIN ParlaMint prescriptions. The corpus contains 1199 sessions and 79,373 speeches for a total of about 31 million words, and was encoded according to the ParlaCLARIN TEI XML format. It includes extensive metadata about the speakers, sessions, political parties and parliamentary groups. As required by the ParlaMint initiative, the corpus was also linguistically annotated for sentences, tokens, POS tags, lemmas and dependency syntax according to the universal dependencies guidelines. Named entity annotation and classification is also included. All linguistic annotation was performed automatically using state-of-the-art NLP technology with no manual revision. The Italian dataset is freely available as part of the larger ParlaMint 2.1 corpus deposited and archived in CLARIN repository together with all other national corpora. It is also available for direct analysis and inspection via various CLARIN services and has already been used both for research and educational purposes.","keywords":["parliamentary debates","CLARIN ParlaMint","corpus creation","corpus annotation"],"pages":"117-124","url":"https:\/\/aclanthology.org\/2022.parlaclarin-1.17\/","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"","conference_name":"Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference","conference_place":"Marseille, France","conference_date":"20\/06\/2022"},{"id":340372,"last_updated":"2022-10-18 23:27:09","id_people":468964,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"CLARIN-IT: An Overview on the Italian Clarin Consortium After Six Years of Activity","year":2022,"authors_people":"Dario Del Fante, Francesca Frontini, Monica Monachini, Valeria Quochi","authors_cnr":["Del Fante, Dario","Monachini, Monica","Quochi, Valeria","Frontini, Francesca"],"authors_cnr_id":["8945","11893","15911"],"authors_cnr_institute":[""],"authors":["Del Fante, D.","Frontini, F.","Monachini, M.","Quochi, V."],"abstract":"This paper offers an overview of the Italian CLARIN consortium after six years since its establishment. The members, the centres and the repositories and the most important collections are described. Lastly, in order to showcase the visibility and the accessiblity of Language Resources provided by CLARIN-IT from a user-perspective, we show how Italian resources are findable within CLARIN ERI","keywords":["Language Resources","Data Repositories and Archives","Research Infrastructures","CLARIN"],"pages":"8","url":"http:\/\/ceur-ws.org\/Vol-3160\/short21.pdf","volume":"","doi":"","editors_people":"Giorgio Maria Di Nunzio, Beatrice Portelli, Domenico Redavid, Gianmaria Silvello","editors":["Di Nunzio, G. M.","Portelli, B.","Redavid, D.","Silvello, G."],"published":"Proceedings of the 18th Italian Research Conference on Digital Libraries","publisher":"CEUR-WS. org (Aachen, DEU)","issn":"1613-0073","isbn":"","conference_name":"Italian Research Conference on Digital Libraries","conference_place":"Universit\u00e0 degli Studi di Padova","conference_date":"24\/02\/2022"},{"id":343150,"last_updated":"2022-10-21 13:07:20","id_people":472292,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Language Technologies for the Creation of Multilingual Terminologies. Lessons Learned from the SSHOC Project","year":2022,"authors_people":"Federica Gamba, Francesca Frontini, Daan Broeder, Monica Monachini","authors_cnr":["Monachini, Monica","Frontini, Francesca"],"authors_cnr_id":["8945","15911"],"authors_cnr_institute":[""],"authors":["Gamba, F.","Frontini, F.","Broeder, D.","Monachini, M."],"abstract":"This paper is framed in the context of the SSHOC project and aims at exploring how Language Technologies can help in promoting and facilitating multilingualism in the Social Sciences and Humanities (SSH). Although most SSH researchers produce culturally and societally relevant work in their local languages, metadata and vocabularies used in the SSH domain to describe and index research data are currently mostly in English. We thus investigate Natural Language Processing and Machine Translation approaches in view of providing resources and tools to foster multilingual access and discovery to SSH content across different languages. As case studies, we create and deliver as freely, openly available data a set of multilingual metadata concepts and an automatically extracted multilingual Data Stewardship terminology. The two case studies allow as well to evaluate performances of state-of-the-art tools and to derive a set of recommendations as to how best apply them. Although not adapted to the specific domain, the employed tools prove to be a valid asset to translation tasks. Nonetheless, validation of results by domain experts proficient in the language is an unavoidable phase of the whole workflow.","keywords":["Multilingual terminologies","data curation","language resource infrastructures"],"pages":"154-163","url":"https:\/\/aclanthology.org\/2022.lrec-1.17","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"","conference_name":"13th Conference on Language Resources and Evaluation (LREC 2022)","conference_place":"Marseille, France","conference_date":"22\/06\/2022-24\/06\/2022"},{"id":340875,"last_updated":"2022-07-29 15:11:40","id_people":469567,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Esth\u00e9tique de la voix dans les livres audio en langue fran\u00e7aise","year":2022,"authors_people":"Fabrice Hirsch, Francesca Frontini, Ivana Didirkov\u00e1 and J\u00e1n Drengubiak","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":[""],"authors":["Hirsch, F.","Frontini, F.","Didirkov\u00e1, I.","Drengubiak, J."],"abstract":"Aesthetics of voice in French-language audio books. This research aims at studying listeners' preferences in audiobooks' voices. Samples of 8 male and 7 female voices were extracted from different audiobooks and analyzed. A survey has been carried out to obtain 69 listeners' points of view by answering questions on vocal features. Results show that the participants' choices depend on the literary genre. Indeed, male voices are preferred for science-fiction novels and female voices for juvenile literature and contemporary novels. Nevertheless, other literary genres that were tested do not match with a specific voice. On the other hand, essays are expected to be read with a slower speech rate, whereas listeners prefer faster speech rates in erotic novels.","keywords":["audiobooks","voice esthetics","speech"],"pages":"","url":"https:\/\/doi.org\/10.1051\/shsconf\/202213808004","volume":"","doi":"10.1051\/shsconf\/202213808004","editors_people":"","editors":[""],"published":"138","publisher":"","issn":"","isbn":"","conference_name":"8e Congr\u00e8s Mondial de Linguistique Fran\u00e7aise","conference_place":"Universit\u00e9 d'Orl\u00e9ans, France","conference_date":"04-08\/07\/2022"},{"id":343202,"last_updated":"2022-11-10 11:00:10","id_people":472421,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"D3. 8 Lexical-semantic analytics for NLP","year":2022,"authors_people":"Federico Martelli, Marco Maru, Cesare Campagnano, Roberto Navigli, Paola Velardi, Rafael-J Ure\u00f1a-Ruiz, Francesca Frontini, Valeria Quochi, Jelena Kallas, Kristina Koppel, Margit Langemets, Jesse de Does, Rob Tempelaars, Carole Tiberius, Rute Costa, Ana Salgado, Simon Krek, Jaka ?ibej, Kaja Dobrovoljc, Polona Gantar, Tina Munda","authors_cnr":["Quochi, Valeria","Frontini, Francesca"],"authors_cnr_id":["11893","15911"],"authors_cnr_institute":[""],"authors":["Martelli, F.","Maru, M.","Campagnano, C.","Navigli, R.","Velardi, P.","Ure\u00f1a Ruiz, R.","Frontini, F.","Quochi, V.","Kallas, J.","Koppel, K.","Langemets, M.","De Does, J.","Tempelaars, R.","Tiberius, C.","Costa, R.","Salgado, A.","Krek, S.","\u010cibej, J.","Dobrovoljc, K.","Gantar, P.","Munda, T."],"abstract":"The present document illustrates the work carried out in task 3.3 (work package 3) focused on lexicalsemantic analytics for Natural Language Processing (NLP). This task aims at computing analytics for lexicalsemantic information such as words, senses and domains in the available resources, investigating their role in NLP applications. Specifically, this task concentrates on three research directions, namely i) which grouping senses based on their semantic similari sense clustering , in ty improves the performance of NLP tasks such as Word Sense Disambiguation (WSD), ii) domain labeling of text , in which the lexicographic resources made available by the ELEXIS project for research purposes allow better performances to be achieved, and fin senses ally iii) analysing the , for which a software package is made available. diachronic distribution of In this deliverable, we illustrate the research activities aimed at achieving the aforementioned goals and put forward suggestions for future works. Importantly, we stress the crucial role played by highquality lexicalsemantic r esources when investigating such linguistic aspects and their impact on NLP applications. To this end, as an additional contribution, we address the paucity of manually the ELEXIS parallelannotated data in the lexical senseannotated datasetsemantic research field and introduce , a novel entirely manuallyavailable in 10 European languages and featuring 5 annotation layers.","keywords":["research infrastructures","lexicography","lexical resources","word-sense disambiguation","WSD","sense-annotated language data","multilinguality"],"pages":"67","url":"https:\/\/elex.is\/wp-content\/uploads\/ELEXIS_D3_8_Lexical-Semantic_Analytics_for_NLP_final_report.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132494,"last_updated":"2022-05-05 11:42:29","id_people":463877,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"D5. 3 Overview of Online Tutorials and Instruction Manuals","year":2022,"authors_people":"Toma Tasovac, Carole Tiberius, Claudia Bamberg, Andrea Bellandi, Thomas Burch, Rute Costa, Matej ?ur?o, Francesca Frontini, Julia Hennemann, Kris Heylen, Milo? Jakub\u00ed?ek, Fahad Khan, Anne Klee, Iztok Kosem, Vojt?ch Kov\u00e1?, Ond?ej Matu?ka, John McCrae, Monica Monachini, Karlheinz M\u00f6rth, Tina Munda, Valeria Quochi, Andra? Repar, Christophe Roche, Ana Salgado, Henrike Sievers, Tam\u00e1s V\u00e1radi, Sandra Weyand, Anna Woldrich, Susanne Zhanial","authors_cnr":["Monachini, Monica","Quochi, Valeria","Frontini, Francesca","Bellandi, Andrea","Khan, Anas Fahad Aslam"],"authors_cnr_id":["8945","11893","15911","17627","18790"],"authors_cnr_institute":[""],"authors":["Tasovac, T.","Tiberius, C.","Bamberg, C.","Bellandi, A.","Burch, T.","Costa, R.","\u010eur\u010do, M.","Frontini, F.","Hennemann, J.","Heylen, K.","Jakub\u00ed\u010dek, M.","Khan, F.","Klee, A.","Kosem, I.","Kov\u00e1\u0159, V.","Matu\u0161ka, O.","McCrae, J.","Monachini, M.","M\u00f6rth, K.","Munda, T.","Quochi, V.","Repar, A.","Roche, C.","Salgado, A.","Sievers, H.","V\u00e1radi, T.","Weyand, S.","Woldrich, A.","Zhanial, S."],"abstract":"The ELEXIS Curriculum is an integrated set of training materials which contextualizes ELEXIS tools and services inside a broader, systematic pedagogic narrative. This means that the goal of the ELEXIS Curriculum is not simply to inform users about the functionalities of particular tools and services developed within the project, but to show how such tools and services are a) embedded in both lexicographic theory and practice; and b) representative of and contributing to the development of digital skills among lexicographers. The scope and rationale of the curriculum are described in more detail in the Deliverable D5.2 Guidelines for Producing ELEXIS Tutorials and Instruction Manuals. The goal of this deliverable, as stated in the project DOW, is to provide \"a clear, structured overview of tutorials and instruction manuals developed within the project.\"","keywords":["ELEXIS","lexicography","training materials"],"pages":"31","url":"https:\/\/elex.is\/wp-content\/uploads\/ELEXIS_D5_3_Overview-of-Online-Tutorials-and-Instruction-Manuals.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132483,"last_updated":"2022-07-29 11:50:57","id_people":463506,"institutes":["ILC"],"type":"misc","type_order":12,"type_people":"other","title":"CLARIN Tools and Resources for Lexicographic Work","year":2022,"authors_people":"Francesca Frontini Andrea Bellandi Valeria Quochi Monica Monachini Karlheinz M\u00f6rth Susanne Zhanial Matej ?ur?o Anna Woldrich","authors_cnr":["Quochi, Valeria","Monachini, Monica","Frontini, Francesca","Bellandi, Andrea"],"authors_cnr_id":["8945","15911","17627"],"authors_cnr_institute":[""],"authors":["Frontini, F.","Bellandi, A.","Quochi, V.","Monachini, M.","M\u00f6rth, K.","Zhanial, S.","\u010eur\u010do, M.","Woldrich, A."],"abstract":"This course introduces lexicographers to the CLARIN Research Infrastructure and highlights language resources and tools useful for lexicographic practices. The course consists of two parts. In Part 1, you will learn about CLARIN, its technical and knowledge infrastructure, and about how to deposit and find lexical resources in CLARIN. In Part 2, you will become acquainted with CLARIN tools that can be used to create lexical resources.","keywords":["CLARIN","lexicography"],"pages":"","url":"https:\/\/elexis.humanistika.org\/id\/UnwYPq70Dewbn7XDEjsMM","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":343122,"last_updated":"2022-10-20 17:20:56","id_people":472295,"institutes":["ILC"],"type":"misc","type_order":12,"type_people":"other","title":"Parallel sense-annotated corpus ELEXIS-WSD 1. 0","year":2022,"authors_people":"Federico Martelli, Roberto Navigli, Simon Krek, Jelena Kallas, Polona Gantar, Svetla Koeva, Sanni Nimb, Bolette Sandford Pedersen, Sussi Olsen, Margit Langemets, Kristina Koppel, Tiiu \u00dcksik, Kaja Dobrovoljc, Rafael Ure\u00f1a-Ruiz, Jos\u00e9-Luis Sancho-S\u00e1nchez, Veronika Lipp, Tam\u00e1s V\u00e1radi, Andr\u00e1s Gy?rffy, Simon L\u00e1szl\u00f3, Valeria Quochi, Monica Monachini, Francesca Frontini, Carole Tiberius, Rob Tempelaars, Rute Costa, Ana Salgado, Jaka ?ibej, Tina Munda","authors_cnr":["Monachini, Monica","Quochi, Valeria","Frontini, Francesca"],"authors_cnr_id":["8945","11893","15911"],"authors_cnr_institute":[""],"authors":["Martelli, F.","Navigli, R.","Krek, S.","Kallas, J.","Gantar, P.","Koeva, S.","Nimb, S.","Pedersen, B. S.","Olsen, S.","Langemets, M.","Koppel, K.","\u00dcksik, T.","Dobrovoljc, K.","Ure\u00f1a Ruiz, R.","Sancho S\u00e1nchez, J.","Lipp, V.","V\u00e1radi, T.","Gy\u0151rffy, A.","L\u00e1szl\u00f3, S.","Quochi, V.","Monachini, M.","Frontini, F.","Tiberius, C.","Tempelaars, R.","Costa, R.","Salgado, A.","\u010cibej, J.","Munda, T."],"abstract":"ELEXIS-WSD is a parallel sense-annotated corpus in which content words (nouns, adjectives, verbs, and adverbs) have been assigned senses. Version 1.0 contains sentences for 10 languages: Bulgarian, Danish, English, Spanish, Estonian, Hungarian, Italian, Dutch, Portuguese, and Slovene. The corpus was compiled by automatically extracting a set of sentences from WikiMatrix (Schwenk et al., 2019), a large open-access collection of parallel sentences derived from Wikipedia, using an automatic approach based on multilingual sentence embeddings. The sentences were manually validated according to specific formal, lexical and semantic criteria (e.g. by removing incorrect punctuation, morphological errors, notes in square brackets and etymological information typically provided in Wikipedia pages). To obtain a satisfying semantic coverage, we filtered out sentences with less than 5 words and less than 2 polysemous words were filtered out. Subsequently, in order to obtain datasets in the other nine target languages, for each selected sentence in English, the corresponding WikiMatrix translation into each of the other languages was retrieved. If no translation was available, the English sentence was translated manually. The resulting corpus is comprised of 2,024 sentences for each language.","keywords":["Word Sense Disambiguation","corpus parallelo","disambiguazione automatica del senso","annotazione semantica multilingue"],"pages":"","url":"http:\/\/hdl.handle.net\/11356\/1674","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132453,"last_updated":"2021-07-06 17:00:47","id_people":455049,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"An Internationally Fair Mediated Digital Discourse Corpus: Improving Knowledge on Reuse","year":2021,"authors_people":"Rachel Panckhurst Francesca Frontini","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":[""],"authors":["Panckhurst, R.","Frontini, F."],"abstract":"In this paper, the authors present a French Mediated Digital Discourse corpus, (88milSMS http:\/\/88milsms.huma-num.fr https:\/\/hdl.handle.net\/11403\/comere\/ cmr-88milsms). Efforts were undertaken over the years to ensure its publication according to the best practices and standards of the community, thus guaranteeing compliance with FAIR principles and CLARIN recommendations with pertinent scientific and pedagogical reuse. Since knowledge on how resources are reused is sometimes difficult to obtain, ways of improving this are also envisaged.","keywords":["Reuse","FAIR","SMS","corpus"],"pages":"185-193","url":"https:\/\/ecp.ep.liu.se\/index.php\/clarin\/article\/view\/20","volume":"180","doi":"10.3384\/ecp18020","editors_people":"","editors":[""],"published":"Link\u00f6ping electronic conference proceedings (Online)","publisher":"Link\u00f6ping University Electronic Press (Link\u00f6ping, Svezia)","issn":"1650-3740","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132446,"last_updated":"2021-05-12 09:45:40","id_people":453809,"institutes":["ILC"],"type":"edited_volume","type_order":3,"type_people":"book","title":"Humanit\u00e9s num\u00e9riques spatialis\u00e9es","year":2021,"authors_people":"Carmen Brando Francesca Frontini Dominic Moreau Mathieu Roche \u00c9ric Masson","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":[""],"authors":["Brando, C.","Frontini, F.","Moreau, D.","Roche, M.","Masson, \u00c9."],"abstract":"This special issue provides an introduction to the contributions presented in this thematic issue dedicated to the spatial humanities. Three main themes are addressed: (1) the processing of spatial information in textual corpora resulting from work in the human and social sciences, mainly in literary studies; (2) problems of acquisition, spatialisation and dissemination of geographical data of the past and from cultural heritage, thus, here, more connected with research in history; (3) spatial information and its processing and uses in archaeology. For each of these topics, we present the founding initiatives with historiographical elements, a brief status quaestionis and a synthesis of the contributions.","keywords":["spatial digital humanities","archaeology","history","history of the digital humanities","geographic information system","cartography","spatial analysis","textual analysis"],"pages":"","url":"https:\/\/journals.openedition.org\/revuehn\/689","volume":"3","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132448,"last_updated":"2021-05-12 09:44:23","id_people":453821,"institutes":["ILC"],"type":"editorial","type_order":4,"type_people":"article","title":"Introduction. Humanit\u00e9s num\u00e9riques et analyses spatiales: enjeux et perspectives","year":2021,"authors_people":"Carmen Brando Francesca Frontini Dominic Moreau Mathieu Roche \u00c9ric Masson","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":[""],"authors":["Brando, C.","Frontini, F.","Moreau, D.","Roche, M.","Masson, \u00c9."],"abstract":"","keywords":["spatial digital humanities","archaeology","history","history of the digital humanities","geographic information system","cartography","spatial analysis","textual analysis"],"pages":"","url":"https:\/\/journals.openedition.org\/revuehn\/2038","volume":"3","doi":"","editors_people":"","editors":[""],"published":"Humanit\u00e9s num\u00e9riques (Online)","publisher":"Humanistica (Bruxelles, Belgio)","issn":"2736-2337","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132472,"last_updated":"2022-02-16 15:49:26","id_people":461705,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Designing the ELEXIS Parallel Sense-Annotated Dataset in 10 European Languages","year":2021,"authors_people":"Martelli, Federico and Navigli, Roberto and Krek, Simon and Tiberius, Carole and Kallas, Jelena and Gantar, Polona and Koeva, Svetla and Nimb, Sanni and Pedersen, Bolette Sandford and Olsen, Sussi and Langements, Margit and Koppel, Kristina and ?ksik, Tiiu and Dobrovolijc, Kaja and Ure?a-Ruiz, Rafael-J. and Sancho-S?nchez, Jos?-Luis and Lipp, Veronika and Varadi, Tamas and Gy?rffy, Andr?s and L?szl?, Simon and Quochi, Valeria and Monachini, Monica and Frontini, Francesca and Tempelaars, Rob and Costa, Rute and Salgado, Ana and ?ibej, Jaka and Munda, Tina","authors_cnr":["Monachini, Monica","Quochi, Valeria","Frontini, Francesca"],"authors_cnr_id":["8945","11893","15911"],"authors_cnr_institute":[""],"authors":["Martelli, F.","Navigli, R.","Krek, S.","Tiberius, C.","Kallas, J.","Gantar, P.","Koeva, S.","Nimb, S.","Pedersen, B. S.","Olsen, S.","Langements, M.","Koppel, K.","\u00dcksik, T.","Dobrovolijc, K.","Ure\u00f1a Ruiz, R.","Sanchos\u00e1nchez, J.","Lipp, V.","Varadi, T.","Gy\u00f6rffy, A.","L\u00e1szl\u00f3, S.","Quochi, V.","Monachini, M.","Frontini, F.","Tempelaars, R.","Costa, R.","Salgado, A.","\u010cibej, J.","Munda, T."],"abstract":"Over the course of the last few years, lexicography has witnessed the burgeoning of increasingly reliable automatic approaches supporting the creation of lexicographic resources such as dictionaries, lexical knowledge bases and annotated datasets. In fact, recent achievements in the field of Natural Language Processing and particularly in Word Sense Disambiguation have widely demonstrated their effectiveness not only for the creation of lexicographic resources, but also for enabling a deeper analysis of lexical-semantic data both within and across languages. Nevertheless, we argue that the potential derived from the connections between the two fields is far from exhausted. In this work, we address a serious limitation affecting both lexicography and Word Sense Disambiguation, i.e. the lack of high-quality sense-annotated data and describe our efforts aimed at constructing a novel entirely manually annotated parallel dataset in 10 European languages. For the purposes of the present paper, we concentrate on the annotation of morpho-syntactic features. Finally, unlike many of the currently available sense-annotated datasets, we will annotate semantically by using senses derived from high-quality lexicographic repositories.","keywords":["Digital lexicography","Natural Language Processing","Computational Linguistics","Corpus Linguistics","Word Sense Disambiguation"],"pages":"377-396","url":"https:\/\/static-curis.ku.dk\/portal\/files\/279888836\/eLex_2021_22_pp377_395.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of the eLex 2021 conference","publisher":"","issn":"","isbn":"","conference_name":"eLex 2021","conference_place":"","conference_date":"05\/-7\/2021-07\/07\/2021"},{"id":132454,"last_updated":"2021-07-09 13:36:50","id_people":455136,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"SSHOC'ing drama in the cloud","year":2021,"authors_people":"Maria Eskevich Francesca Frontini","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":[""],"authors":["Eskevich, M.","Frontini, F."],"abstract":"At LIBER 2021 Online Conference, CLARIN and SSHOC presented a webinar showcasing how SSH researchers can benefit from the resources and services offered by SSH research infrastructures in order to produce and exploit highly encoded historical textual data. After the webinar, the participants were able to successfully guide and advise SSH researchers (with a particular focus on literature studies) in their choice amongst existing resources and tools, based on their research question.","keywords":["CLARIN","infrastrutture","scienze umane e sociali"],"pages":"","url":"https:\/\/zenodo.org\/record\/5082522#.YOgETBMzb0s","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"LIBER annual conference","conference_place":"virtual event","conference_date":"08\/07\/2021"},{"id":132424,"last_updated":"2021-02-03 13:14:44","id_people":443609,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Di cosa parliamo quando parliamo di FAIR?","year":2021,"authors_people":"Francesca Frontini Anas Fahad Khan","authors_cnr":["Frontini, Francesca","Khan, Anas Fahad"],"authors_cnr_id":["15911","18790"],"authors_cnr_institute":[""],"authors":["Frontini, F.","Khan, A. F."],"abstract":"Nel 2016 un consorzio di scienziati afferenti a diverse istituzioni e discipline enuncia i principi FAIR; in questi quattro anni l'importanza e la portata del programma FAIR \u00e8 divenuta sempre pi\u00f9 evidente. L'adesione a tali principi nelle discipline umanistiche sembra farsi largo, ma non senza difficolt\u00e0 e interrogativi. Questo lavoro propone una riflessione sulle implicazioni della proposta FAIR per la gestione dei dati scientifici, confrontandola con la sua effettiva ricezione nella comunit\u00e0 delle DH in Italia e in Europa.","keywords":["Principi FAIR","Open Data","dati della ricerca","politiche della ricerca","EOSC"],"pages":"19-24","url":"https:\/\/aiucd2021.labcd.unipi.it\/en\/book-of-abstracts-conference\/","volume":"","doi":"","editors_people":"Angelo Mario Del Grosso, Federico Boschetti, Enrica Salvatori","editors":["Del Grosso, A. M.","Boschetti, F.","Salvatori, E."],"published":"AIUCD 2021-DH per la societ\u00e0: e-guaglianza, par-tecipazione, diritti e valori nell'era digitale. Raccolta degli abstract estesi della 10a conferenza nazionale, Pisa, 2021","publisher":"","issn":"","isbn":"9788894253559","conference_name":"AIUCD 2021-DH per la societ\u00e0: e-guaglianza, partecipazione, diritti e valori nell'era digitale","conference_place":"","conference_date":"19-22\/01\/2021"},{"id":132481,"last_updated":"2022-02-01 17:16:47","id_people":463461,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"D3. 9 Report on Ontology and Vocabulary Collection and Publication","year":2021,"authors_people":"Francesca Frontini Federica Gamba Monica Monachini; Daan Broeder Kea Tijdens Irena Vipavc Brvar","authors_cnr":["Gamba, Federica","Monachini, Monica","Frontini, Francesca"],"authors_cnr_id":["8945","15911"],"authors_cnr_institute":[""],"authors":["Frontini, F.","Gamba, F.","Monachini, M."],"abstract":"This deliverable pertains to SSHOC Task 3.1 which was responsible for investigating and providing resources and tools to support the multilingual aspects of the future pan-EU SSH infrastructure. Making data and services accessible and usable in SSH is very much also a matter of providing relevant translations, translation of metadata concepts, multilingual vocabularies, terminology extraction across languages, multilingual databases. The deliverable offers a detailed report on the gathering and translation of relevant SSH metadata, ontologies and vocabularies for the use-cases indicated in the task's topics: multilingual metadata concepts and vocabularies, the multilingual occupation ontology, with cross-country female occupational titles. In accordance with SSHOC and the EOSC FAIR recommendations and requirements, the metadata vocabularies and ontologies have been published via several different formats and facilities. Section 1. The introduction sets the landscape and describes the need of multilingual vocabularies both for classification and discovery in the context of a cloud-based infrastructure that will offer access to research data and related services adapted to the needs of the SSH community. Section 2. \"Multilingual metadata\" investigates the possibility to use and test Natural Language Processing (NLP) approaches and Machine Translation (MT) to make the metadata more accessible using national languages other than English. A selected case study was the recommended metadata set of the CLARIN Concept Registry (CCR): the whole set of metadata and definitions were translated into French, Greek, and Italian. The section describes the machine-translation and evaluation process, also comparing different technologies. Section 3. \"Multilingual vocabularies and ontologies\" introduces two other typical case-studies. The first one addresses one of the pressing needs in social sciences research. Many surveys, indeed, ask respondents to specify their occupation and the occupational ontology is used for the survey questions. For many languages the occupational titles for males and females are not identical. In section 3.1 the enrichment of the occupational ontology with lists for male and female titles, is described for many languages, namely for Dutch, German, Slovenian and French. The second case study focuses on the automatic extraction of terminology from texts: a list of domain- specific terms was automatically extracted from a corpus of Data Curation and Stewardship, validated by domain experts, automatically translated into multiple languages (Dutch, French, German, Greek, Italian, Slovenian) and linked to other existing terminologies. Section 4. describes the SKOS-ification and publication process of the results, together with the challenges posed by multilinguality. Section 5. offers an overview of the exploitation and sustainability of the results and how these are made available to the community. Finally the Conclusions provide some reflections on Machine Translation approaches adopted for translating the vocabularies into multiple languages, the advantages in terms of time saving and some first recommendations to the community.","keywords":["Terminologies","Infrastructures","Social Sciences and Humanities","Data Curation","Data Stewardship","vocabularies","Translations","Metadata"],"pages":"","url":"https:\/\/doi.org\/10.5281\/zenodo.5913485","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132447,"last_updated":"2021-05-12 09:45:04","id_people":453820,"institutes":["ILC"],"type":"misc","type_order":12,"type_people":"other","title":"Guide d'annotation manuelle d'entit\u00e9s nomm\u00e9es dans des corpus litt\u00e9raires","year":2021,"authors_people":"Motasem Alrahabi, Carmen Brando, Francesca Frontini, Arthur Provenier, Romain Jalabert, Margarite Bordry, Camille Koskas, James Gawley","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":[""],"authors":["Alrahabi, M.","Brando, C.","Frontini, F.","Provenier, A.","Jalabert, R.","Bordry, M.","Koskas, C.","Gawley, J."],"abstract":"Guide d'annotation manuelle d'entit\u00e9s nomm\u00e9es dans des corpus litt\u00e9raires Campagne d'annotation OBVIL 2019-2021","keywords":["NER"],"pages":"","url":"https:\/\/hal.archives-ouvertes.fr\/hal-03156278","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132489,"last_updated":"2022-05-05 11:44:08","id_people":463861,"institutes":["ILC"],"type":"misc","type_order":12,"type_people":"other","title":"Linguistically annotated multilingual comparable corpora of parliamentary debates ParlaMint. ana 2. 1","year":2021,"authors_people":"Toma? Erjavec, Maciej Ogrodniczuk, Petya Osenova, Nikola Ljube?i?, Kiril Simov, Vladislava Grigorova, Micha? Rudolf, Andrej Pan?ur, Maty\u00e1? Kopp, Starka\u00f0ur Barkarson, Stein\u00feor Steingr\u00edmsson, Henk van der Pol, Griet Depoorter, Jesse de Does, Bart Jongejan, Dorte Haltrup Hansen, Costanza Navarretta, Mar\u00eda Calzada P\u00e9rez, Luciana D. de Macedo, Ruben van Heusden, Maarten Marx, \u00c7a?r? \u00c7\u00f6ltekin, Matthew Coole, Tommaso Agnoloni, Francesca Frontini, Simonetta Montemagni, Valeria Quochi, Giulia Venturi, Manuela Ruisi, Carlo Marchetti, Roberto Battistoni, Mikl\u00f3s Seb?k, Orsolya Ring, Roberts Dar?is, Andrius Utka, Mindaugas Petkevi?ius, Monika Briedien\u00e9, Tomas Krilavi?ius, Vaidas Morkevi?ius, Roberto Bartolini, Andrea Cimino, Sascha Diwersy, Giancarlo Luxardo, Paul Rayson","authors_cnr":["Cimino, Andrea","Montemagni, Simonetta","Bartolini, Roberto","Agnoloni, Tommaso","Quochi, Valeria","Frontini, Francesca","Venturi, Giulia"],"authors_cnr_id":["5595","10441","11403","11893","15911","17692"],"authors_cnr_institute":[""],"authors":["Erjavec, T.","Ogrodniczuk, M.","Osenova, P.","Ljube\u0161i\u0107, N.","Simov, K.","Grigorova, V.","Rudolf, M.","Pan\u010dur, A.","Kopp, M.","Barkarson, S.","Steingr\u00edmsson, S.","Van Der Pol, H.","Depoorter, G.","De Does, J.","Jongejan, B.","Haltrup Hansen, D.","Navarretta, C.","Calzada P\u00e9rez, M.","De Macedo, L. D.","Van Heusden, R.","Marx, M.","\u00c7\u00f6ltekin, \u00c7.","Coole, M.","Agnoloni, T.","Frontini, F.","Montemagni, S.","Quochi, V.","Venturi, G.","Ruisi, M.","Marchetti, C.","Battistoni, R.","Seb\u0151k, M.","Ring, O.","Dar\u0123is, R.","Utka, A.","Petkevi\u010dius, M.","Briedien\u0117, M.","Krilavi\u010dius, T.","Morkevi\u010dius, V.","Diwersy, S.","Luxardo, G.","Rayson, P."],"abstract":"ParlaMint 2.1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (from November 1st 2019), or being \"reference\" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition\/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e.g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e.g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https:\/\/clarin-eric.github.io\/parla-clarin\/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the linguistically marked-up version of the corpus, while the text version is available at http:\/\/hdl.handle.net\/11356\/1432. The ParlaMint.ana linguistic annotation includes tokenization, sentence segmentation, lemmatisation, Universal Dependencies part-of-speech, morphological features, and syntactic dependencies, and the 4-class CoNLL-2003 named entities. Some corpora also have further linguistic annotations, such as PoS tagging or named entities according to language-specific schemes, with their corpus TEI headers giving further details on the annotation vocabularies and tools.","keywords":["dibattiti parlamentari","covid-19","ParlaCLARIN","parlamenti","discorso politico","CLARIN","linguistic annotation","pos-tagging","ner","linguistic dependency annotation","UD"],"pages":"","url":"http:\/\/hdl.handle.net\/11356\/1432","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132490,"last_updated":"2022-05-05 11:44:28","id_people":463865,"institutes":["ILC"],"type":"misc","type_order":12,"type_people":"other","title":"Multilingual comparable corpora of parliamentary debates ParlaMint 2. 1","year":2021,"authors_people":"Toma? Erjavec, Maciej Ogrodniczuk, Petya Osenova, Nikola Ljube?i?, Kiril Simov, Vladislava Grigorova, Micha? Rudolf, Andrej Pan?ur, Maty\u00e1? Kopp, Starka\u00f0ur Barkarson, Stein\u00feor Steingr\u00edmsson, Henk van der Pol, Griet Depoorter, Jesse de Does, Bart Jongejan, Dorte Haltrup Hansen, Costanza Navarretta, Mar\u00eda Calzada P\u00e9rez, Luciana D. de Macedo, Ruben van Heusden, Maarten Marx, \u00c7a?r? \u00c7\u00f6ltekin, Matthew Coole, Tommaso Agnoloni, Francesca Frontini, Simonetta Montemagni, Valeria Quochi, Giulia Venturi, Manuela Ruisi, Carlo Marchetti, Roberto Battistoni, Mikl\u00f3s Seb?k, Orsolya Ring, Roberts Dar?is, Andrius Utka, Mindaugas Petkevi?ius, Monika Briedien\u00e9, Tomas Krilavi?ius, Vaidas Morkevi?ius, Roberto Bartolini, Andrea Cimino, Sascha Diwersy, Giancarlo Luxardo, Paul Rayson","authors_cnr":["Montemagni, Simonetta","Agnoloni, Tommaso","Quochi, Valeria","Frontini, Francesca","Venturi, Giulia"],"authors_cnr_id":["5595","11403","11893","15911","17692"],"authors_cnr_institute":[""],"authors":["Erjavec, T.","Ogrodniczuk, M.","Osenova, P.","Ljube\u0161i\u0107, N.","Simov, K.","Grigorova, V.","Rudolf, M.","Pan\u010dur, A.","Kopp, M.","Barkarson, S.","Steingr\u00edmsson, S.","Van Der Pol, H.","Depoorter, G.","De Does, J.","Jongejan, B.","Haltrup Hansen, D.","Navarretta, C.","Calzada P\u00e9rez, M.","De Macedo, L. D.","Van Heusden, R.","Marx, M.","\u00c7\u00f6ltekin, \u00c7.","Coole, M.","Agnoloni, T.","Frontini, F.","Montemagni, S.","Quochi, V.","Venturi, G.","Ruisi, M.","Marchetti, C.","Battistoni, R.","Seb\u0151k, M.","Ring, O.","Dar\u0123is, R.","Utka, A.","Petkevi\u010dius, M.","Briedien\u0117, M.","Krilavi\u010dius, T.","Morkevi\u010dius, V.","Diwersy, S.","Luxardo, G.","Rayson, P."],"abstract":"ParlaMint 2.1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (after November 1st 2019), or being \"reference\" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition\/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e.g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e.g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https:\/\/clarin-eric.github.io\/parla-clarin\/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the ParlaMint TEI-encoded corpora with the derived plain text version of the corpus along with TSV metadata on the speeches. Also included is the 2.0 release of the data and scripts available at the GitHub repository of the ParlaMint project. Note that there also exists the linguistically marked-up version of the corpus, which is available at http:\/\/hdl.handle.net\/11356\/1431.","keywords":["dibattiti parlamentari","covid-19","discorso politico","CLARIN","parlamenti","ParlaCLARIN"],"pages":"","url":"http:\/\/hdl.handle.net\/11356\/1431","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132480,"last_updated":"2022-02-02 16:07:43","id_people":463503,"institutes":["ILC"],"type":"misc","type_order":12,"type_people":"other","title":"SSHOC Multilingual Data Stewardship Terminology","year":2021,"authors_people":"Francesca Frontini Federica Gamba Monica Monachini Daan Broeder","authors_cnr":["Gamba, Federica","Monachini, Monica","Frontini, Francesca"],"authors_cnr_id":["8945","15911"],"authors_cnr_institute":[""],"authors":["Frontini, F.","Gamba, F.","Monachini, M.","Broeder, D."],"abstract":"The SSHOC Multilingual Data Stewardship Terminology is a multilingual terminology that collects terms specific to the domain of Data Stewardship, as well as their definitions. A list of domain-specific terms was automatically extracted from a corpus pertaining to the domain of Data Stewardship and Curation, validated by domain experts, assigned a definition, and linked to other existing terminologies (Loterre Open Science Thesaurus, terms4FAIRskills, Linked Open Vocabularies, ISO terms and definitions). Each term-definition pair was then automatically translated into multiple languages (Dutch, French, German, Greek, Italian, Slovenian) by employing Deep-L. The Multilingual Data Stewardship Terminology thus consists of 210 concepts available in Dutch, French, German, Greek, Italian, Slovenian. This resource was created within the frame of the SSHOC (Social Sciences and Humanities Open Cloud) project (H2020-INFRAEOSC-2018-2-823782). It is the result of the work of Task 3.1.2 \"extraction of terminology from technical documentation about standards and interoperability\", as described in D3.9, carried out jointly by ILC-CNR and CLARIN ERIC.","keywords":["terminology","data stewardship"],"pages":"","url":"http:\/\/hdl.handle.net\/20.500.11752\/ILC-567","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132482,"last_updated":"2022-02-02 16:06:29","id_people":463504,"institutes":["ILC"],"type":"misc","type_order":12,"type_people":"other","title":"SSHOC Multilingual Metadata","year":2021,"authors_people":"Francesca Frontini Federica Gamba Monica Monachini Daan Broeder","authors_cnr":["Gamba, Federica","Monachini, Monica","Frontini, Francesca"],"authors_cnr_id":["8945","15911"],"authors_cnr_institute":[""],"authors":["Frontini, F.","Gamba, F.","Monachini, M.","Broeder, D."],"abstract":"SSHOC Multilingual Metadata is based on the metadata set of the CLARIN Concept Registry (CCR). The CCR 232 approved metadata concepts, as well as their definitions, were automatically translated into several languages (Dutch, French, Greek, Italian) thanks to the support of Machine Translation tools, and eventually validated by native speakers who were also expert of the domain. This resource was created within the frame of the SSHOC (Social Sciences and Humanities Open Cloud) project (H2020-INFRAEOSC-2018-2-823782). It is the result of the work of Task 3.1.3 \"creating Multilingual metadata and taxonomies for discovery\", as described in D3.9, carried out jointly by ILC-CNR and CLARIN ERIC.","keywords":["metadata","terminology"],"pages":"","url":"http:\/\/hdl.handle.net\/20.500.11752\/ILC-568","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132398,"last_updated":"2020-11-28 12:19:39","id_people":437563,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Dans les coulisses des infrastructures europ\u00e9ennes en SHS. R\u00f4le et opportunit\u00e9s pour les acteurs de la recherche (ing\u00e9nieurs et chercheurs)","year":2020,"authors_people":"Frontini, Francesca","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":[""],"authors":["Frontini, F."],"abstract":"La composante technologique prend une dimension de jour en jour plus importante en LLASHS. Les projets de recherche sont de plus en plus nombreux \u00e0 mobiliser de gros volumes de donn\u00e9es exigeant des services adapt\u00e9s garants de formes de m\u00e9thodologies augment\u00e9es (exploitation, interop\u00e9rabilit\u00e9, accessibilit\u00e9, archivage). Afin de partager les savoirs et de garantir l'interop\u00e9rabilit\u00e9 et la pr\u00e9servation \u00e0 long terme de ces ressources et services, de grandes infrastructures informatiques se mettent en place aux niveaux national et international. Dans cette pr\u00e9sentation, vous allez d\u00e9couvrir le panorama, en la mati\u00e8re, des e-infrastructures et des grands projets europ\u00e9ens \u00e0 caract\u00e8re infrastructurel, avec un accent particulier sur les technologies utilis\u00e9es, les principaux services offerts, et les aspects les plus int\u00e9ressants en termes de synergie entre approches et disciplines diff\u00e9rentes. La pr\u00e9sentation portera sur des ERICs (European Research Infrastructure Consortium) \u00e9tablis, comme CLARIN et DARIAH, et sur des projets r\u00e9cents ou en cours de d\u00e9veloppement, comme PARTHENOS, SSHOC, ELEXIS et TRIPLE. Concernant les aspects techniques, on abordera les questions li\u00e9es au d\u00e9p\u00f4t, au stockage, \u00e0 l'identification (sigle sign on), aux formats et choix des m\u00e9tadonn\u00e9es et de mod\u00e9lisation formelle, \u00e0 la recherche f\u00e9d\u00e9r\u00e9e des sources. Nous soulignerons en particulier l'interaction de ces projets avec les infrastructures nationales, notamment Huma-Num, ainsi qu'avec la r\u00e9cemment constitu\u00e9e European Open Science Cloud (EOSC). La pr\u00e9sentation aura une vis\u00e9e pratique, avec l'objectif de fournir des indications concr\u00e8tes aux acteurs de la recherche (chercheurs, ing\u00e9nieurs...) qui souhaitent participer \u00e0 ces initiatives et aux groupes de travail qui les animent, ou plus largement favoriser l'acc\u00e8s des chercheurs fran\u00e7ais aux nombreux services et opportunit\u00e9s offerts.","keywords":["Infrastrutture di ricerca","Scienze umane e sociali"],"pages":"","url":"https:\/\/ja-mate2020.sciencesconf.org\/data\/pages\/Resume_Frontini_Nov.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Journ\u00e9es annuelles du r\u00e9seau Mate-shs (JA2020)","conference_place":"Montpellier","conference_date":"10\/11\/2020"},{"id":132263,"last_updated":"2018-08-13 16:54:50","id_people":387178,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"One Language to rule them all: modelling Morphological Patterns in a Large Scale Italian Lexicon with SWRL","year":2018,"authors_people":"Khan Fahad, Bellandi Andrea, Frontini Francesca, Monachini Monica","authors_cnr":["Khan, Anas Fahad","Monachini, Monica","Bellandi, Andrea"],"authors_cnr_id":["8945","17627"],"authors_cnr_institute":[""],"authors":["Khan, F.","Bellandi, A.","Frontini, F.","Monachini, M."],"abstract":"We present an application of Semantic Web Technologies to computational lexicography. More precisely we describe the publication of the morphological layer of the Italian Parole Simple Clips lexicon (PSC-M) as linked open data. The novelty of our work is in the use of the Semantic Web Rule Language (SWRL) to encode morphological patterns, thereby allowing the automatic derivation of the inflectional variants of the entries in the lexicon. By doing so we make these patterns available in a form that is human readable and that therefore gives a comprehensive morphological description of a large number of Italian word.","keywords":["Morphology","Linked Open Data","Italian Lexicon","SWRL","SQVRL"],"pages":"4385-4389","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2018\/pdf\/844.pdf","volume":"","doi":"","editors_people":"Nicoletta Calzolari (Conference chair, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Koiti Hasida, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, H\u00e9l\u00e8ne Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis, Takenobu Tokunaga","editors":["Calzolari, N."],"published":"Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"979-10-95546-00-9","conference_name":"Eleventh International Conference on Language Resources and Evaluation (LREC 2018)","conference_place":"Miyazaki, Japan","conference_date":"7-12\/05\/2018"},{"id":132183,"last_updated":"2021-04-27 16:34:59","id_people":368363,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Semantic Search Engine for Data Management and Sustainable Development: Marine Planning Service Platform","year":2017,"authors_people":"Giuseppe M. R. Manzella , Roberto Bartolini, Franco Bustaffa, Paolo D'Angelo, Maurizio De Mattei, Francesca Frontini, Maurizio Maltese, Daniele Medone, Monica Monachini, Antonio Novellino, Andrea Spada","authors_cnr":["Monachini, Monica","Bartolini, Roberto","Frontini, Francesca"],"authors_cnr_id":["8945","10441","15911"],"authors_cnr_institute":[""],"authors":["Manzella, G. M. R.","Bartolini, R.","Bustaffa, F.","D'Angelo, P.","De Mattei, M.","Frontini, F.","Maltese, M.","Medone, D.","Monachini, M.","Novellino, A.","Spada, A."],"abstract":"This chapter presents a computer platform supporting a Marine Information and Knowledge System based on a repository that gathers, classify and structures marine scientific literature and data, guaranteeing their accessibility by means of standard protocols. This requires the access to quality controlled data and to information that is provided in grey literature and\/or in relevant scientific literature. There exist efforts to develop search engines to find author's contributions to scientific literature or publications. This implies the use of persistent identifiers. However very few efforts are dedicated to link publications to data that was used, or cited in them or that can be of importance for the published studies. Full-text technologies are often unsuccessful since they assume the presence of specific keywords in the text; to fix this problem,it is suggested to use different semantic technologies for retrieving the text and data and thus getting much more complying results.","keywords":["Marine Information and Knowledge System"],"pages":"127-154","url":"http:\/\/www.igi-global.com\/chapter\/semantic-search-engine-for-data-management-and-sustainable-development\/166839#","volume":"Volume 7","doi":"10.4018\/978-1-5225-0700-0.ch006","editors_people":"Paolo Diviacco, Adam Leadbetter & Helen Glaves","editors":["Diviacco, P.","Leadbetter, A.","Glaves, H."],"published":"Oceanographic and Marine Cross-Domain Data Management for Sustainable Development","publisher":"IGI Global (Hershey, USA)","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132130,"last_updated":"2016-08-29 17:06:05","id_people":357604,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Annotation of Toponyms in TEI Digital Literary Editions and Linking to the Web of Data","year":2016,"authors_people":"Francesca Frontini Carmen Brando Marine Riguet Cl\u00e9mence Jacquot Vincent Jolivet","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048"],"authors":["Frontini, F.","Brando, C.","Riguet, M.","Jacquot, C.","Jolivet, V."],"abstract":"This paper aims to discuss the challenges and benefits of the annotation of place names in literary texts and literary criticism. We shall first highlight the problems of encoding spatial information in digital editions using the TEI format by means of two manual annotation experiments and the discussion of various cases. This will lead to the question of how to use existing semantic web resources to complement and en-rich toponym mark-up, in particular to provide mentions with precise geo-referencing. Finally the automatic annotation of a large corpus will show the potential of visualizing places from texts, by illustrating an analysis of the evolution of literary life from the spatial and geographical point of view.","keywords":["digital literary studies toponyms semantic web geographic databases maps and visualizations"],"pages":"49-75","url":"http:\/\/dx.doi.org\/10.14195\/2182-8830_4-2_3","volume":"4","doi":"10.14195\/2182-8830_4-2_3","editors_people":"","editors":[""],"published":"MATLIT: Materialidades da Literatura","publisher":"","issn":"2182-8830","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132128,"last_updated":"2016-08-29 17:10:14","id_people":357602,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"REDEN: Named Entity Linking in Digital Literary Editions Using Linked Data Sets","year":2016,"authors_people":"Carmen Brando Francesca Frontini Jean-Gabriel Ganascia","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048"],"authors":["Frontini, F.","Carmen, B.","Ganascia, J. G."],"abstract":"This paper proposes a graph-based Named Entity Linking (NEL) algorithm named REDEN for the disambiguation of authors' names in French literary criticism texts and scientific essays from the 19th and early 20th centuries. The algorithm is described and evaluated according to the two phases of NEL as reported in current state of the art, namely, candidate retrieval and candidate selection. REDEN leverages knowledge from different Linked Data sources in order to select candidates for each author mention, subsequently crawls data from other Linked Data sets using equivalence links (e.g., owl:sameAs), and, finally, fuses graphs of homologous individuals into a non-redundant graph well-suited for graph centrality calculation; the resulting graph is used for choosing the best referent. The REDEN algorithm is distributed in open-source and follows current standards in digital editions (TEI) and semantic Web (RDF). Its integration into an editorial workflow of digital editions in Digital humanities and cultural heritage projects is entirely plausible. Experiments are conducted along with the corresponding error analysis in order to test our approach and to help us to study the weaknesses and strengths of our algorithm, thereby to further improvements of REDEN.","keywords":["Named Entity Linking","graph centrality","linked data","data fusion","digital humanities"],"pages":"60-80","url":"https:\/\/csimq-journals.rtu.lv\/article\/view\/csimq.2016-7.04","volume":"7","doi":"10.7250\/csimq.2016-7.04","editors_people":"","editors":[""],"published":"Complex Systems Informatics and Modeling Quarterly","publisher":"","issn":"2255-9922","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132133,"last_updated":"2017-06-22 08:54:11","id_people":359144,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"A semantic engine for grey literature retrieval in the oceanography domain","year":2016,"authors_people":"Sara Goggi, Gabriella Pardelli, Roberto Bartolini, Francesca Frontini, MonicaMonachini, Giuseppe Manzella, Maurizio De Mattei and Franco Bustaffa","authors_cnr":["Monachini, Monica","Goggi, Sara","Bartolini, Roberto","Frontini, Francesca","Pardelli, Gabriella"],"authors_cnr_id":["8945","10172","10441","15911","16333"],"authors_cnr_institute":[""],"authors":["Goggi, S.","Pardelli, G.","Bartolini, R.","Frontini, F.","Monachini, M.","Manzella, G.","De Mattei, M.","Bustaffa, F."],"abstract":"Here we present the final results of the MAPS (Marine Planning and Service Platform) project, an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. The system takes as input non-textual data (measurements) and text - both published papers and documentation - and it provides an advanced search facility thanks to the rich set of metadata and, above all, to the possibility of a refined and domain targeted key-word indexing of texts using Natural Language Processing (NLP) techniques. The paper describes the system in its details providing also evidence of evaluation.","keywords":["Information Extraction","Search Engine","Operative Oceanography"],"pages":"155-161","url":"http:\/\/www.greynet.org\/thegreyjournal\/currentissue.html","volume":"12","doi":"","editors_people":"","editors":[""],"published":"The Grey journal (Print)","publisher":"TextRelease (Amsterdam, Paesi Bassi)","issn":"1574-1796","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132194,"last_updated":"2017-06-27 10:22:27","id_people":373630,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"CLARIN, l'infrastruttura europea delle risorse linguistiche per le scienze umane e sociali e il suo network italiano CLARIN-IT","year":2016,"authors_people":"Monica Monachini, Francesca Frontini","authors_cnr":["Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":[""],"authors":["Monachini, M.","Frontini, F."],"abstract":"ll 1\u00b0ottobre 2015 il MIUR firma l'adesione dell'Italia a CLARIN-ERIC, l'infrastruttura di ricerca che offre risorse e tecnologie linguistiche dedicate al settore delle scienze del linguaggio e delle scienze umane e sociali. Questo articolo intende fornire alla comunit\u00e0 italiana una ampia panoramica di CLARIN, la sua missione, i suoi pilastri, i servizi, la sua organizzazione tecnica ed amministrativa e la struttura di governance, sia a livello europeo che locale. Viene introdotto il network italiano, con il primo centro nazionale ILC4CLARIN, ospitato ed in via di sviluppo presso l'ILC-CNR, le funzionalit\u00e0, le risorse ed i servizi offerti; viene presentato infine il primo nucleo del consorzio nazionale CLARIN-IT, illustrando i criteri di costituzione, le attivit\u00e0 previste e le prospettive future.","keywords":["Infrastrutture di ricerca","Tecnologie linguistiche","Network italiano CLARIN-IT"],"pages":"1-30","url":"http:\/\/www.ai-lc.it\/IJCoL\/v2n2\/1-monachini_and_frontini.pdf","volume":"Vol. 2","doi":"","editors_people":"","editors":[""],"published":"Italian Journal of Computational Linguistics","publisher":"aAccademia University Press, Torino (Italia)","issn":"2499-4553","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132131,"last_updated":"2020-11-18 09:30:31","id_people":357638,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"GeoDomainWordNet: Linking the Geonames Ontology to WordNet","year":2016,"authors_people":"Francesca Frontini Riccardo Del Gratta Monica Monachini","authors_cnr":["Monachini, Monica","Del Gratta, Riccardo","Frontini, Francesca"],"authors_cnr_id":["8945","11933","15911"],"authors_cnr_institute":[""],"authors":["Frontini, F.","Del Gratta, R.","Monachini, M."],"abstract":"This paper illustrates the transformation of GeoNames' ontology concepts, with their English labels and glosses, into a GeoDomain WordNet-like resource in English, its translation into Italian, and its linking to the existing generic WordNets of both languages. The paper describes the criteria used for the linking of domain synsets to each other and to the generic ones and presents the published resource in RDF according to the w3c and lemon schema.","keywords":["GeoNames","WordNet","Language resources","Lexicons","Linguistic linked data","lemon","RDF"],"pages":"229-242","url":"http:\/\/link.springer.com\/chapter\/10.1007\/978-3-319-43808-5_18","volume":"9561","doi":"10.1007\/978-3-319-43808-5","editors_people":"Zygmunt Vetulani, Hans Uszkoreit, Marek Kubis","editors":["Vetulani, Z.","Uszkoreit, H.","Kubis, M."],"published":"Human Language Technology. Challenges for Computer Science and Linguistics","publisher":"","issn":"","isbn":"978-3-319-43808-5","conference_name":"","conference_place":"","conference_date":""},{"id":132114,"last_updated":"2016-08-08 14:20:42","id_people":355434,"institutes":["ILC"],"type":"edited_volume","type_order":3,"type_people":"book","title":"Language and Ontology (LangOnto2) & Terminology and Knowledge Structures (TermiKS)","year":2016,"authors_people":"Fahad Khan, ?pela Vintar , Pilar Le\u00f3n Ara\u00faz, Pamela Faber, Francesca Frontini, Artemis Parvizi, Larisa Gr?i?-Simeunovi?, Christina Unger","authors_cnr":["Khan, Anas Fahad","Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048","048"],"authors":["Khan, F.","Vintar, \u0160.","Le\u00f3n Ara\u00faz, P.","Faber, P.","Frontini, F.","Parvizi, A.","Gr\u010di\u0107 Simeunovi\u0107, L.","Unger, C."],"abstract":"This joint workshop brings together two different but closely related strands of research. On the one hand it looks at the overlap between ontologies and computational linguistics and on the other it explores the relationship between knowledge modelling and terminologies. In particular the workshop aims to create a forum for discussion in which the different relationships and commonalities between these two areas can be explored in detail, as well as presenting cutting edge research in each of the two individual areas. A significant amount of human knowledge can be found in texts. It is not surprising that languages such as OWL, which allow us to formally represent this knowledge, have become more and more popular both in linguistics and in automated language processing. For instance ontologies are now of core interest to many NLP fields including Machine Translation, Question Answering, Text Summarization, Information Retrieval, and Word Sense Disambiguation. At a more abstract level, however, ontologies can also help us to model and reason about phenomena in natural language semantics. In addition, ontologies and taxonomies can also be used in the organisation and formalisation of linguistically relevant categories such as those used in tagsets for corpus annotation. Notably also, the fact that formal ontologies are being increasingly accessed by users with limited to no background in formal logic has led to a growing interest in developing accessible front ends that allow for easy querying and summarisation of ontologies. It has also led to work in developing natural language interfaces for authoring ontologies and evaluating their design. Additionally in recent years there has been a renewed interest in the linguistic aspects of accessing, extracting, representing, modelling and transferring knowledge. Numerous tools for the automatic extraction of terms, term variants, knowledge-rich contexts, definitions, semantic relations and taxonomies from specialized corpora have been developed for a number of languages, and new theoretical approaches have emerged as potential frameworks for the study of specialized communication. However, the building of adequate knowledge models for practitioners (e.g. experts, researchers, translators, teachers etc.), on the one hand, and NLP applications (including cross-language, cross-domain, cross-device, multi-modal, multi-platform applications), on the other hand, still remains a challenge. The papers included in the workshop range across a wide variety of different areas and reflect the strong inter-disciplinary approach, which characterises both areas of research. In addition we are very happy to include two invited talks in the program presented by authorities in their respective fields: Pamela Faber from the field of terminology, and John McCrae, an expert on linguistic linked data and the interface between NLP and ontologies.","keywords":["lexicons","ontologies"],"pages":"","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2016\/index.html","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131656,"last_updated":"2016-03-04 14:03:37","id_people":348461,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linked Data Quality for Domain-Specific Named-Entity Linking","year":2016,"authors_people":"Carmen Brando Natalie Abadie Francesca Frontini","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048"],"authors":["Brando, C.","Abadie, N.","Frontini, F."],"abstract":"We present outgoing research whose goal is to assess quality of Linked Data for its usage in domain-specific Named-entity Linking (NEL). NEL is the task of assigning appropriate referents, typically an Uniform Resource Identifier (URI), to mentions of entities (e.g. persons or places) identified in textual documents. Nowadays, many of these approaches strongly rely on Linked Data as knowledge base. However, the scope of the chosen data sets can have an important influence on the performances of NEL as texts often concern specific domains of knowledge. In this paper, we describe LD quality aspects which should be considered for improving NEL in domain-specific contexts, then propose quality metrics and compute them for both French DBpedia and the French National Library (BnF) data sets thereby to discuss the opportunity of using these data sets for the linking of authors in old French Literary digital editions. Our ultimate goal is to improve a Natural Language Processing (NLP) pipeline for the automatic annotation of these texts.","keywords":["Linked Data","Quality","Named Entity Linking"],"pages":"13-24","url":"https:\/\/publications.cnr.it\/doc\/348461","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Atelier-Qualit\u00e9 des Donn\u00e9es du Web (QLOD'16) Joint \u00e0 la 16\u00e8me \u00e9dition de la conf\u00e9rence internationale francophone EGC 2016","conference_place":"Reims","conference_date":"19\/01\/2016"},{"id":132113,"last_updated":"2017-03-17 09:44:40","id_people":355425,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"LREC as a Graph: People and Resources in a Network","year":2016,"authors_people":"Del Gratta R., Frontini F., Monachini M., Pardelli G., Russo I., Bartolini R., Khan F., Soria C., Calzolari N.","authors_cnr":["Khan, Anas Fahad","Monachini, Monica","Soria, Claudia","Bartolini, Roberto","Del Gratta, Riccardo","Frontini, Francesca","Pardelli, Gabriella","Zamorani, Nicoletta"],"authors_cnr_id":["8945","9887","10441","11933","15911","16333","26123"],"authors_cnr_institute":["048","048","048","048","048","048","048","048"],"authors":["Del Gratta, R.","Frontini, F.","Monachini, M.","Pardelli, G.","Russo, I.","Bartolini, R.","Khan, F.","Soria, C.","Calzolari, N."],"abstract":"This proposal describes a new way to visualise resources in the LREMap, a community-built repository of language resource descriptions and uses. The LREMap is represented as a force-directed graph, where resources, papers and authors are nodes. The analysis of the visual representation of the underlying graph is used to study how the community gathers around LRs and how LRs are used in research.","keywords":["Language Resources","Resources Documentation","Data Visualisation"],"pages":"2529-2532","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2016\/index.html","volume":"","doi":"","editors_people":"Nicoletta Calzolari (Conference Chair), Khalid Choukri, Thierry Declerck, Sara Goggi, Marko Grobelnik, Bente Maegaard, Joseph Mariani, H\u00e9l\u00e8ne Mazo, Asunci\u00f3n Moreno, Jan Odijk, Stelios Piperidis","editors":["Calzolari, N.","Choukri, K.","Declerck, T.","Goggi, S.","Grobelnik, M.","Maegaard, B.","Mariani, J.","Mazo, H.","Moreno, A.","Odijk, J.","Piperidis, S."],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"978-2-9517408-9-1","conference_name":"Tenth International Conference on Language Resources and Evaluation (LREC 2016)","conference_place":"Portoroz, Slovenia","conference_date":"23-28 may"},{"id":132105,"last_updated":"2017-06-22 08:53:52","id_people":350374,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"A semantic engine for grey literature retrieval in the oceanography domain","year":2016,"authors_people":"Sara Goggi, Gabriella Pardelli, Roberto Bartolini, Francesca Frontini, MonicaMonachini, Giuseppe Manzella, Maurizio De Mattei and Franco Bustaffa","authors_cnr":["Monachini, Monica","Goggi, Sara","Bartolini, Roberto","Frontini, Francesca","Pardelli, Gabriella"],"authors_cnr_id":["8945","10172","10441","15911","16333"],"authors_cnr_institute":["048","048","048","048","048"],"authors":["Goggi, S.","Pardelli, G.","Bartolini, R.","Frontini, F.","Monachini, M.","Manzella, G.","De Mattei, M.","Bustaffa, F."],"abstract":"Here we present the final results of the MAPS (Marine Planning and Service Platform) project, an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. The system takes as input non-textual data (measurements) and text - both published papers and documentation - and it provides an advanced search facility thanks to the rich set of metadata and, above all, to the possibility of a refined and domain targeted key-word indexing of texts using Natural Language Processing (NLP) techniques. The paper describes the system in its details providing also evidence of evaluation.","keywords":["Information Extraction","Search Engine","Operative Oceanography"],"pages":"104-111","url":"https:\/\/publications.cnr.it\/doc\/350374","volume":"17","doi":"","editors_people":"Dominic Farace, Jerry Frantzen","editors":["Farace, D.","Frantzen, J."],"published":"","publisher":"","issn":"","isbn":"978-90-77484-27-2","conference_name":"Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature","conference_place":"Amsterdam","conference_date":"December 1st-2nd 2015"},{"id":132115,"last_updated":"2018-08-23 10:41:03","id_people":355436,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Al Qamus al Muhit, a Medieval Arabic Lexicon in LMF","year":2016,"authors_people":"Nahli O., Frontini F., Monachini M., Khan F., Zarghili A., Khalfi M.","authors_cnr":["Nahli, Ouafae","Khan, Anas Fahad","Monachini, Monica","Frontini, Francesca"],"authors_cnr_id":["8945","15911"],"authors_cnr_institute":["048","048","048","048"],"authors":["Nahli, O.","Frontini, F.","Monachini, M.","Khan, F.","Zarghili, A.","Khalfi, M."],"abstract":"This paper describes the conversion into LMF, a standard lexicographic digital format of 'al-q?m?s al-mu???, a Medieval Arabic lexicon. The lexicon is first described, then all the steps required for the conversion are illustrated. The work is will produce a useful lexicographic resource for Arabic NLP, but is also interesting per se, to study the implications of adapting the LMF model to the Arabic language. Some reflections are offered as to the status of roots with respect to previously suggested representations. In particular, roots are, in our opinion are to be not treated as lexical entries, but modeled as lexical metadata for classifying and identifying lexical entries. In this manner, each root connects all entries that are derived from it.","keywords":["Arabic Lexicon","LMF","Al Qamus al Muhi"],"pages":"943-950","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2016\/index.html","volume":"","doi":"","editors_people":"Nicoletta Calzolari (Conference Chair), Khalid Choukri, Thierry Declerck, Sara Goggi, Marko Grobelnik, Bente Maegaard, Joseph Mariani, H\u00e9l\u00e8ne Mazo, Asunci\u00f3n Moreno, Jan Odijk, Stelios Piperidis","editors":["Calzolari, N.","Choukri, K.","Declerck, T.","Goggi, S.","Grobelnik, M.","Maegaard, B.","Mariani, J.","Mazo, H.","Moreno, A.","Odijk, J.","Piperidis, S."],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"978-2-9517408-9-1","conference_name":"Tenth International Conference on Language Resources and Evaluation (LREC 2016)","conference_place":"Portoroz, Slovenia","conference_date":"23-28 may"},{"id":132129,"last_updated":"2016-08-29 17:08:41","id_people":357603,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"REDEN ONLINE: Disambiguation, Linking and Visualisation of References in TEI Digital Editions","year":2016,"authors_people":"Francesca Frontini Carmen Brando Jean-Gabriel Ganascia","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048"],"authors":["Frontini, F.","Carmen, B.","Ganascia, J. G."],"abstract":"","keywords":["entity linking","visualization","literary criticism","TEI"],"pages":"","url":"http:\/\/dh2016.adho.org\/abstracts\/362","volume":"","doi":"","editors_people":"","editors":[""],"published":"Digital Humanities 2016: Conference Abstracts","publisher":"","issn":"","isbn":"","conference_name":"Digital Humanities 2016","conference_place":"Jagiellonian University & Pedagogical University, Krak\u00f3w","conference_date":"11-16\/07\/2016"},{"id":132119,"last_updated":"2016-08-08 14:01:44","id_people":355476,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Marine Planning and Service Platform: Specific Ontology Based semantic Search Engine Serving Data Management and Sustainable Development","year":2016,"authors_people":"Manzella Giuseppe M.R. , Bartolini Roberto, Bustaffa Franco, D'Angelo Paolo, De Mattei Maurizio, Frontini Francesca , Maltese Maurizio, Medone Daniele, Monachini Monica, Novellino Antonio, Spada Andrea","authors_cnr":["Monachini, Monica","Bartolini, Roberto","Frontini, Francesca"],"authors_cnr_id":["8945","10441","15911"],"authors_cnr_institute":["048","048","048"],"authors":["Manzella, G. M. R.","Bartolini, R.","Bustaffa, F.","D'Angelo, P.","De Mattei, M.","Frontini, F.","Maltese, M.","Medone, D.","Monachini, M.","Novellino, A.","Spada, A."],"abstract":"The MAPS (Marine Planning and Service Platform) project is aiming at building a computer platform supporting a Marine Information and Knowledge System. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. In oceanography the cost related to data collection is very high and the new paradigm is based on the concept to collect once and re-use many times (for re-analysis, marine environment assessment, studies on trends, etc). This concept requires the access to quality controlled data and to information that is provided in reports (grey literature) and\/or in relevant scientific literature. Hence, creation of new technology is needed by integrating several disciplines such as data management, information systems, knowledge management...","keywords":["Marine Information","Knowledge System"],"pages":"2","url":"http:\/\/meetingorganizer.copernicus.org\/EGU2016\/orals\/20144","volume":"18","doi":"","editors_people":"","editors":[""],"published":"Geophysical research abstracts (Online)","publisher":"Copernicus GmbH (Katlenburg-Lindau, Germania)","issn":"1607-7962","isbn":"","conference_name":"European Geosciences Union General Assembly (EGU 2016)","conference_place":"Vienna, Austria","conference_date":"17-22 aprile 2016"},{"id":131654,"last_updated":"2017-07-24 18:06:44","id_people":368272,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"CLARIN-IT: servizi per la comunit\u00e0 italiana delle scienze umane e sociali","year":2016,"authors_people":"Monachini Monica, Alessandro Enea, Francesca Frontini","authors_cnr":["Bottai, Lorenzo","Enea, Alessandro","Monachini, Monica","Frontini, Francesca"],"authors_cnr_id":["8533","8945","15911"],"authors_cnr_institute":[""],"authors":["Monachini, M.","Enea, A.","Frontini, F."],"abstract":"CLARIN-IT -The Italian Common Language Resources and Technology Infrastructure: Monica Monachini - CLARIN Italian National Coordinator Alessandro Enea - Responsible of ILCforCLARIN & contact person for IDEM Francesca Frontini - Standing Committee for CLARIN Technical Centres (SCCTC) ILC-CNR National Representative","keywords":["CLARIN-IT","The Italian Common Language Resources and Technology Infrastructure"],"pages":"","url":"http:\/\/www.clarin-it.it\/en\/content\/clarin-it-idem-day-2016","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"CLARIN-IT @ IDEM Day 2016","conference_place":"Roma [Universit\u00e0 degli Studi di Roma Tre]","conference_date":"6-8 giugno 2016"},{"id":131901,"last_updated":"2015-12-11 14:42:39","id_people":287051,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Converting the PAROLE SIMPLE CLIPS Lexicon into RDF with lemon","year":2015,"authors_people":"Del Gratta Riccardo, Francesca Frontini, Fahad Khan, Monica Monachini","authors_cnr":["Del Gratta, Riccardo","Frontini, Francesca","Khan, Anas Fahad","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048","048","048"],"authors":["Del Gratta, R.","Frontini, F.","Khan, F.","Monachini, M."],"abstract":"This paper describes the publication and linking of (parts of) PAROLE SIMPLE CLIPS (PSC), a large scale Italian lexicon, to the Semantic Web and the Linked Data cloud using the lemon model. The main challenge of the conversion is discussed, namely the reconciliation between the PSC semantic structure which contains richly encoded semantic information, following the qualia structure of the Generative Lexicon theory and the lemon view of lexical sense as a reified pairing of a lexical item and a concept in an ontology. The result is two datasets: one consists of a list of lemon lexical entries with their lexical properties, relations and senses; the other consists of a list of OWL individuals representing the referents for the lexical senses. These OWL individuals are linked to each other by a set of semantic relations and mapped onto the SIMPLE OWL ontology of higher level semantic types.","keywords":["lemon","linked data","generative lexicon","RDF","OWL","lexical resource"],"pages":"387-392","url":"http:\/\/www.semantic-web-journal.net\/content\/converting-parole-simple-clips-lexicon-rdf-lemon-0","volume":"6","doi":"10.3233\/SW-140168","editors_people":"","editors":[""],"published":"Semantic web (Print)","publisher":"IOS Press (Amsterdam, Paesi Bassi)","issn":"1570-0844","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132081,"last_updated":"2015-12-10 13:34:18","id_people":334894,"institutes":["ILC"],"type":"journal_article","type_order":0,"type_people":"article","title":"Marine Planning and Service Platform (MAPS) An Advanced Research Engine for Grey Literature in Marine Science","year":2015,"authors_people":"Sara Goggi, Monica Monachini, Francesca Frontini, Roberto Bartolini, Gabriella Pardelli, Maurizio De Mattei, Franco Bustaffa, and Giuseppe Manzella","authors_cnr":["Monachini, Monica","Goggi, Sara","Bartolini, Roberto","Frontini, Francesca","Pardelli, Gabriella"],"authors_cnr_id":["8945","10172","10441","15911","16333"],"authors_cnr_institute":["048","048","048","048","048"],"authors":["Goggi, S.","Monachini, M.","Frontini, F.","Bartolini, R.","Pardelli, G.","De Mattei, M.","Bustaffa, F.","Manzella, G."],"abstract":"The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting a Marine Information and Knowledge System, as part of the data management activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. We will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced search engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the great impact that the processing, re-use as well as application of grey data have on societal needs\/problems and their answers.","keywords":["Marine Science Search Engine Source Data Oceanography"],"pages":"171-178","url":"https:\/\/publications.cnr.it\/doc\/334894","volume":"11","doi":"","editors_people":"","editors":[""],"published":"The Grey journal (Print)","publisher":"TextRelease (Amsterdam, Paesi Bassi)","issn":"1574-1796","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131660,"last_updated":"2015-11-09 16:25:03","id_people":334082,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"Disambiguation of Named Entities in Cultural Heritage Texts Using Linked Data Sets","year":2015,"authors_people":"Carmen Brando, Francesca Frontini, Jean-Gabriel Ganascia","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048"],"authors":["Brando, C.","Frontini, F.","Ganascia, J."],"abstract":"This paper proposes a graph-based algorithm baptized REDEN for the disambiguation of authors' names in French literary criticism texts and scientific essays from the 19th century. It leverages knowledge from different Linked Data sources in order to select candidates for each author mention, then performs fusion of DBpedia and BnF individuals into a single graph, and finally decides the best referent using the notion of graph centrality. Some experiments are conducted in order to identify the best size of disambiguation context and to assess the influence on centrality of specific relations represented as edges. This work will help scholars to trace the impact of authors' ideas across different works and time periods.","keywords":["Named-entity disambiguation Centrality Linked data Data fusion Digital humanities"],"pages":"505-514","url":"http:\/\/link.springer.com\/chapter\/10.1007%2F978-3-319-23201-0_51","volume":"539","doi":"10.1007\/978-3-319-23201-0_51","editors_people":"Tadeusz Morzy, Patrick Valduriez, Ladjel Bellatreche","editors":["Morzy, T.","Valduriez, P.","Bellatreche, L."],"published":"New Trends in Databases and Information Systems","publisher":"","issn":"","isbn":"978-3-319-23200-3","conference_name":"","conference_place":"","conference_date":""},{"id":132098,"last_updated":"2016-01-13 15:53:32","id_people":344351,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linked data for toponym linking in French literary texts","year":2015,"authors_people":"Carmen Brando, Francesca Frontini, Jean-Gabriel Ganascia","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048"],"authors":["Brando, C.","Frontini, F.","Ganascia, J."],"abstract":"The present article discusses first experiments in toponym linking of Modern French digital editions aiming to provide an external referent to Linked Data sources. We have so far focused on testing two knowledge bases - French DBpedia and Geonames - for recall. Results highlight quality issues in these data sets for usage in NLP-tasks in domain-specific heritage texts.","keywords":["Named-Entity Linking Linked Data Digital Humanities"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/344351","volume":"","doi":"10.1145\/2837689.2837699","editors_people":"Ross S. Purves, Christopher B. Jones","editors":["Purves, R. S.","Jones, C. B."],"published":"GIR '15 Proceedings of the 9th Workshop on Geographic Information Retrieval","publisher":"Association for Computing Machinery (New York, N. Y, Stati Uniti d'America)","issn":"1933-7825","isbn":"978-1-4503-3937-7","conference_name":"GIR'15 9th Workshop on Geographic Information Retrieval","conference_place":"Paris","conference_date":"26-27th November, 2015"},{"id":132093,"last_updated":"2021-04-08 08:30:49","id_people":342213,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Visualising Italian Language Resources: a Snapshot","year":2015,"authors_people":"Riccardo Del Gratta, Francesca Frontini, Monica Monachini, Gabriella Pardelli, Irene Russo, Roberto Bartolini, Sara Goggi, Fahad Khan, Valeria Quochi, Claudia Soria, Nicoletta Calzolari","authors_cnr":["Del Gratta, Riccardo","Russo, Irene","Khan, Anas Fahad","Monachini, Monica","Soria, Claudia","Goggi, Sara","Bartolini, Roberto","Quochi, Valeria","Frontini, Francesca","Pardelli, Gabriella","Zamorani, Nicoletta"],"authors_cnr_id":["8945","9887","10172","10441","11893","15911","16333","26123"],"authors_cnr_institute":[""],"authors":["Del Gratta, R.","Frontini, F.","Monachini, M.","Pardelli, G.","Russo, I.","Bartolini, R.","Goggi, S.","Khan, F.","Quochi, V.","Soria, C.","Calzolari, N."],"abstract":"This paper aims to provide a first snapshot of Italian Language Resources (LRs) and their uses by the community, as documented by the papers presented at two different conferences, LREC2014 and CLiC-it 2014. The data of the former were drawn from the LOD version of the LRE Map, while those of the latter come from manually analyzing the proceedings. The results are presented in the form of visual graphs and confirm the initial hypothesis that Italian LRs require concrete actions to enhance their visibility.","keywords":["Italian Language Resources"],"pages":"100-104","url":"https:\/\/books.openedition.org\/aaccademia\/1277?lang=it","volume":"","doi":"","editors_people":"Cristina Bosco, Sara Tonelli, Fabio Massimo Zanzotto","editors":["Bosco, C.","Tonelli, S.","Zanzotto, F. M."],"published":"Proceedings of the Second Italian Conference on Computational Linguistics CLiC-it 2015","publisher":"","issn":"","isbn":"978-88-99200-62-6","conference_name":"Second Italian Conference on Computational Linguistics CLiC-it 2015","conference_place":"Trento","conference_date":"3-4 December 2015"},{"id":132020,"last_updated":"2015-02-27 16:55:43","id_people":307909,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linguistic Pattern Extraction and Analysis for Classic French Plays","year":2015,"authors_people":"Francesca Frontini, Mohamed Amine Boukhaled, Jean-Gabriel Ganascia","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Frontini, F.","Boukhaled, M. A.","Ganascia, J."],"abstract":"Great authors of fiction and theatre have the capacity of creating memorable characters that take life and become almost as real as living persons to the readers\/audience. The study of characterization, namely of how this is achieved, is a well-researched topic in corpus stylistics: for instance (Mahlberg, 2012) attempts to identify typical lexical patterns for memorable Dickens' characters by extracting those lexical bundles that stand out (namely are overrepresented) in comparison to a general corpus. In other works, authorship attribution methods are applied to the different characters of a play to identify whether the author has been able to provide each of them with a \"distinct\" voice. For instance (Vogel & Lynch, 2008) compare individual Shakespeare characters against the whole play or even against all plays of the same author. The purpose of this paper is to propose a methodology for the study characterization of several characters in French plays of the classical period. The tools developed are meant to support textual analysis by: 1) Verifying the degree of characterization of each character with respect to others. 2) Automatically inducing a list of linguistic features that are significant, representative for that character. Preliminary investigations have been conducted on plays by Moliere, cross-comparing four protagonists from four different plays. The proposed methodology relies on sequential data mining for the extraction of linguistic patterns and on correspondence analysis for comparison of patterns frequencies in each character and for the visual representation of such differences.","keywords":["computational stylometry","thater","sequential pattern mining"],"pages":"3","url":"http:\/\/lipn.univ-paris13.fr\/~charnois\/conscilaGenres\/resumes\/frontini.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Journ\u00e9e ConSciLa (Confrontations en Sciences du Langage) Grammaire des genres et des styles: quelles approches privil\u00e9gier ?","conference_place":"Paris","conference_date":"16\/01\/2015"},{"id":132068,"last_updated":"2015-07-06 15:00:28","id_people":330648,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Semantic Web based Named Entity Linking for Digital Humanities and Heritage Texts","year":2015,"authors_people":"Francesca Frontini, Carmen Brando, Jean-Gabriel Ganascia","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Frontini, F.","Brando, C.","Ganascia, J."],"abstract":"This paper proposes a graph based methodology for automatically disambiguating authors' mentions in a corpus of French literary criticism. Candidate referents are identified and evaluated using a graph based named entity linking algorithm, which exploits a knowledge-base built out of two different resources (DBpedia and the BnF linked data). The algorithm expands previous ones applied for word sense disambiguation and entity linking, with good results. Its novelty resides in the fact that it successfully combines a generic knowledge base such as DBpedia with a domain specific one, thus enabling the efficient annotation of minor authors. This will help specialists to follow mentions of the same author in different works of literary criticism, and thus to investigate their literary appreciation over time.","keywords":["named-entity linking","linked data","digital humanities"],"pages":"77-88","url":"http:\/\/ceur-ws.org\/Vol-1364\/paper9.pdf","volume":"Vol-1364","doi":"","editors_people":"Arnaud Zucker , Isabelle Draelants , Catherine Faron Zucker , Alexandre Monnin","editors":["Zucker, A.","Draelants, I.","Zucker, C. F.","Monnin, A."],"published":"SW4SH 2015 Semantic Web for Scientific Heritage 2015","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"SW4SH 2015 Semantic Web for Scientific Heritage 2015","conference_place":"Portoroz, Slovenia","conference_date":"June, 1st 2015"},{"id":132071,"last_updated":"2015-06-22 16:14:26","id_people":331797,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Domain-adapted named-entity linker using Linked Data","year":2015,"authors_people":"Francesca Frontini, Carmen Brando, Jean-Gabriel Ganascia","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048"],"authors":["Frontini, F.","Brando, C.","Ganascia, J."],"abstract":"We present REDEN, a tool for graph-based Named Entity Linking that allows for the disambiguation of entities using domain-specific Linked Data sources and different configurations (e.g. context size). It takes TEI-annotated texts as input and outputs them enriched with external references (URIs). The possibility of customizing indexes built from various knowledge sources by defining temporal and spatial extents makes REDEN particularly suited to handle domain-specific corpora such as enriched digital editions in the Digital Humanities.","keywords":["named-entity disambiguation","evaluation","linked data","digital humanities"],"pages":"10","url":"http:\/\/ceur-ws.org\/Vol-1386\/named_entity.pdf","volume":"Vol-1386","doi":"","editors_people":"Ruben Izquierdo","editors":["Izquierdo, R."],"published":"Proceedings of the Workshop on NLP Applications: Completing the Puzzle","publisher":"M. Jeusfeld c\/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)","issn":"1613-0073","isbn":"","conference_name":"Workshop on NLP Applications: Completing the Puzzle co-located with the 20th International Conference on Applications of Natural Language to Information Systems (NLDB 2015)","conference_place":"Passau, Germany","conference_date":"June 17-19, 2015"},{"id":131997,"last_updated":"2021-04-08 08:39:25","id_people":304304,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Generative Lexicon and polysemy: inducing logical alternations","year":2015,"authors_people":"Francesca Frontini, Valeria Quochi, Monica Monachini","authors_cnr":["Quochi, Valeria","Frontini, Francesca","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":[""],"authors":["Frontini, F.","Quochi, V.","Monachini, M."],"abstract":"The current paper brings together the results of a series of experiments for inducing regular sense alternations, or regular\/ logical polysemy, from a computational lexicon based on the Generative Lexicon theory. The results are discussed in light of the potential benefits and uses of the amended algorithm.","keywords":["Polysemy","Generative Lexicon","Logical Alternations"],"pages":"7","url":"https:\/\/publications.cnr.it\/doc\/304304","volume":"","doi":"","editors_people":"Shu-Kai Hsieh and Kyoko Kanzaki (eds.)","editors":["Hsieh, S.","Kanzaki, K."],"published":"","publisher":"MAPLEX2015 Multiple Approaches to Lexicon Conference (Yamagata, JPN)","issn":"","isbn":"","conference_name":"MAPLEX2015 Multiple Approaches to Lexicon Conference","conference_place":"Yamagata, Japan","conference_date":"February 9-10, 2015"},{"id":132049,"last_updated":"2015-04-14 12:31:17","id_people":329370,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Marine Planning and Service Platform (MAPS): An Advanced Research Engine for Grey Literature in Marine Science","year":2015,"authors_people":"Sara Goggi, Monica Monachini, Francesca Frontini, Roberto Bartolini, Gabriella Pardelli, Maurizio De Mattei+, Franco Bustaffa+, Giuseppe Manzella\u00b0","authors_cnr":["Frontini, Francesca","Monachini, Monica","Goggi, Sara","Bartolini, Roberto","Pardelli, Gabriella"],"authors_cnr_id":["8945","10172","10441","16333"],"authors_cnr_institute":["048","048","048","048","048"],"authors":["Goggi, S.","Monachini, M.","Frontini, F.","Bartolini, R.","Pardelli, G.","De Mattei, M.","Bustaffa, F.","Manzella, G."],"abstract":"The MAPS {Marine Planning and Service Platform} project is a development of the Marine project {Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013} aiming at building a computer platform for supporting a Marine Information and Knowledge System, as part of the data management activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. We will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced search engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the great impact that the processing, re-use as well as application of grey data have on societal needs\/problems and their answers.","keywords":["Marine Science Search Engine Source Data Oceanography"],"pages":"108-114","url":"http:\/\/www.textrelease.com\/gl16program.html","volume":"16","doi":"","editors_people":"D. Farace and J. Frantzen","editors":["Farace, D.","Frantzen, J."],"published":"Grey Literature Lobby: Engines and Requesters for Change","publisher":"TextRelease (Amsterdam, NLD)","issn":"","isbn":"978-90-77484-23-4","conference_name":"Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change","conference_place":"Library of Congress Washington D. C., USA","conference_date":"December 8-9 2014"},{"id":132052,"last_updated":"2015-09-17 16:53:10","id_people":329646,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Using Ontologies to Model Polysemy in Lexical Resources","year":2015,"authors_people":"Fahad Khan, Francesca Frontini","authors_cnr":["Frontini, Francesca","Khan, Anas Fahad"],"authors_cnr_id":[""],"authors_cnr_institute":["048","048"],"authors":["Khan, F.","Frontini, F."],"abstract":"In this article we look at how the use of ontologies can assist in analysing polysemy in natural languages. We develop a model, the Lexical-Sense-Ontology model (LSO), to represent the interaction between a lexicon and ontology, based on lemon. We use the LSO model to show how default rules can be used to represent semi-productivity in polysemy as well as discussing the kinds of ontological information that are useful for studying polysemy.","keywords":["Polysemy","Ontology","Default Logic"],"pages":"","url":"http:\/\/www.aclweb.org\/anthology\/W\/W15\/W15-0404.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of the Workshop on Language and Ontologies","publisher":"","issn":"","isbn":"","conference_name":"Workshop on Language and Ontologies","conference_place":"London","conference_date":"14\/04\/2015"},{"id":132021,"last_updated":"2015-02-20 18:58:32","id_people":315607,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"What makes them different: the extraction of distinctive linguistic patterns for the protagonists of Moli\u00e8re's plays","year":2015,"authors_people":"Francesca Frontini","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Frontini, F."],"abstract":"Quantitative approaches to the study of style in literature are far from a modern novelty. They have however recently gained more and more popularity, not only among computer scientists and corpus linguistics, but also among some influential literary critics. The present panorama of quantitative techniques is very rich, but often confusing, with a plethora of denominations and methodologies often difficult to reconcile; computer scientists classify their work as stylometry or computational stylistics, while linguists may use the label corpus stylistics, and finally critics like Franco Moretti will talk about macro-analysis and distant reading. This talk will try first to identify the differences between these trends, distinguishing between corpus based and corpus driven approaches on the methodological side (Quiniou et al 2012), and (following Ramsey 2011) between experimental and hermeneutical approaches. Finally we will present ongoing work conducted at Labex OBVIL on syntactic pattern extraction from theatrical characters. The proposed approach, using correspondence analysis to extract distinctive traits for each character, is imagined rather as an hermeneutical tool, in the sense that it does not seek to demonstrate that two different characters have been endowed with significantly different stylistic traits by the playwright, but it does enable the visualisation of their relative distances and the extraction of those elements that make them distinct.","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/315607","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Cycle des s\u00e9minaires ILES LIMSI","conference_place":"Paris","conference_date":"03\/02\/2015"},{"id":132053,"last_updated":"2015-07-06 14:57:21","id_people":329647,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Analyse et extraction des motifs syntaxiques dans la prose de Robert Challe et de ses apocryphes","year":2015,"authors_people":"Francesca Frontini","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Frontini, F."],"abstract":"Cette contribution presente une extraction et une analyse des motifs syntaxiques dans la prose de Robert Challe et de ses apocryphes. En particulier nous analysons les diff\u00e9rence dans la syntaxe des contes originaux des Illustres Fran\u00e7aises et celle des contes apocryphes.","keywords":["Robert Challe","authorship attribution","stilistica computazionale"],"pages":"","url":"http:\/\/obvil.paris-sorbonne.fr\/sites\/default\/files\/projets\/analyse_motifs_syntaxiques_if_et_apocryphes.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Robert Challe: approches num\u00e9riques des questions d'auctorialit\u00e9","conference_place":"Paris","conference_date":"28\/03\/2015"},{"id":132075,"last_updated":"2015-12-10 14:57:12","id_people":332668,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Mining for characterising patterns in literature using correspondence analysis: an experiment on French novels","year":2015,"authors_people":"Francesca Frontini","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048"],"authors":["Frontini, F."],"abstract":"The talk presents and describes a bottom up methodology for the detection of stylistic traits in the syntax of literary texts. The extraction of syntactic patterns is performed blindly by a sequential pattern mining algorithm, while the identification of significant and interesting features is performed later by using correspondence analysis and filtering for the most contributive patterns.","keywords":["computational stylistics","French"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/332668","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"G\u00f6ttingen Dialog in Digital Humanities","conference_place":"G\u00f6ttingen","conference_date":"14\/07\/2015"},{"id":132086,"last_updated":"2015-11-12 13:48:16","id_people":336421,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Trattamento automatico del linguaggio per le Digital Humanities. Riconoscimento e disambiguazione di menzioni di autori in testi di critica letteraria","year":2015,"authors_people":"Francesca Frontini","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048"],"authors":["Frontini, F."],"abstract":"L'intervento scaturisce da una collaborazione tra ILC-CNR e il Labex OBVIL di Parigi. Lo scopo del progetto \u00e8 quello di adattare ed estendere algoritmi di riconoscimento, classificazione e disambiguazione di entit\u00e0 nominate (in particolare menzioni di autori) nel \"Corpus Critique\", un insieme di testi di critica letteraria francese che il Labex OBVIL sta pubblicando in edizione digitale (formato TEI). Tali algoritmi si basano su approcci TAL supervisionati e non supervisionati e sfruttano massicciamente le basi di conoscenza, sia generiche (DBpedia) che di dominio, disponibili online sotto forma di linked data; lo scopo di tali lavori \u00e8 di produrre risorse testuali annotate per facilitare la ricerca nell'ambito della storia della critica letteraria e della storia delle idee in generale. Durante il seminario verranno introdotti i formati e le risorse utilizzate, i criteri e le problematiche di annotazione emersi, e gli algoritmi riconoscimento e disambiguazione di entit\u00e0 nominate sviluppati. Pi\u00f9 in generale si cercher\u00e0 di mostrare con alcuni casi di utilizzo quali siano i vantaggi di arricchire risorse testuali con questo livello di annotazione, nel pi\u00f9 ampio contesto delle convergenze tra digital humanities e trattamento automatico del linguaggio. Link http:\/\/obvil.paris-sorbonne.fr\/ https:\/\/github.com\/cvbrandoe\/REDEN\/blob\/master\/README.md","keywords":["Named-entity disambiguation Centrality Linked data Data fusion Digital humanities"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/336421","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Seminario di Cultura Digitale","conference_place":"Pisa","conference_date":"04\/11\/2015"},{"id":132092,"last_updated":"2015-12-11 13:17:30","id_people":342185,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"The Syntax of Stage. Studying Linguistic Patterns in Moli\u00e8re","year":2015,"authors_people":"Francesca Frontini Elodie B\u00e9nard","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048"],"authors":["Frontini, F.","B\u00e9nard, E."],"abstract":"Theatrical dialogue is a very peculiar type of communication, namely a written text that aims to mimic orality. Great playwrights use dialogue to create iconic human types, that actors then bring to life. Characterisation, comical effects and other plot devices are often achieved through the use of specific linguistic patterns. For this reason theatrical dialogue is an interesting test bed for computer-aided literary analysis and stylometric tools. In this talk we shall analyse the application of advanced pattern extraction techniques to the study of Moli\u00e8re's dialogue and characters, where by \"pattern\" we mean sequences of lexical elements and parts of speech. In particular we shall see how different types of extractions may provide experts with different views on the texts and target different aspects of stylistic choice.","keywords":["Computational stylistics","syntactic patterns","Moli\u00e8re"],"pages":"","url":"http:\/\/www.uni-goettingen.de\/de\/525494.html","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"G\u00f6ttinger philologisches Forum","conference_place":"G\u00f6ttingen, Germany","conference_date":"03\/12\/2015"},{"id":132077,"last_updated":"2015-07-27 13:25:51","id_people":332819,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Moliere's Raisonneurs: a quantitative study of distinctive linguistic patterns","year":2015,"authors_people":"Francesca Frontini; Mohamed Amine Boukhaled; Jean Gabriel Ganascia","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":["15911"],"authors_cnr_institute":["048"],"authors":["Frontini, F.","Boukhaled, M. A.","Ganascia, J. G."],"abstract":"","keywords":["Computational Stylistics","Correspondence analysis","Corpus linguistics","Moli\u00e8re"],"pages":"114-117","url":"http:\/\/ucrel.lancs.ac.uk\/cl2015\/doc\/CL2015-AbstractBook.pdf","volume":"","doi":"","editors_people":"Federica Formato and Andrew Hardie","editors":["Formato, F.","Hardie, A."],"published":"Corpus Linguistics 2015-Abstract Book","publisher":"","issn":"","isbn":"","conference_name":"Corpus Linguistics 2015","conference_place":"Lancaster","conference_date":"21-24\/07\/2015"},{"id":132094,"last_updated":"2015-12-14 15:36:38","id_people":342221,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"A semantic engine for grey literature retrieval in the oceanography domain","year":2015,"authors_people":"Sara Goggi, Gabriella Pardelli, Roberto Bartolini, Francesca Frontini, Monica Monachini, Giuseppe Manzella, Maurizio De Mattei, Franco Bustaffa","authors_cnr":["Monachini, Monica","Goggi, Sara","Bartolini, Roberto","Frontini, Francesca","Pardelli, Gabriella"],"authors_cnr_id":["8945","10172","10441","15911","16333"],"authors_cnr_institute":["048","048","048","048","048"],"authors":["Goggi, S.","Pardelli, G.","Bartolini, R.","Frontini, F.","Monachini, M.","Manzella, G.","De Mattei, M.","Bustaffa, F."],"abstract":"Here we present the final results of MAPS (Marine Planning and Service Platform), an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. In previous publications the general architecture of the system as well as the set of metadata (Common Data Index) used to describe the documents were presented [3]; it was shown how individual oceanographic data-sets could be indexed within the MAPS library by types of measure, measurement tools, geographic areas, and also linked to specific textual documentation. Documentation is described using the current international standards: Title, Authors, Publisher, Language, Date of publication, Body\/Institution, Abstract, etc.; serial publications are described in terms of ISSN, while books are assigned ISBN; content of various types on electronic networks is described by means of doi and url. Each description is linked to the document. Thanks to this, the MAPS library already enables researchers to go from structured oceanographic data to documents describing it. But this was not enough: documents may contain important information that has not been encoded in the metadata. Thus an advanced Search Engine was put in place that uses semantic-conceptual technologies in order to extract key concepts from unstructured text such as technical documents (reports and grey literature) and scientific papers and to make them indexable and searchable by the end user in the same way as the structured data (such as oceanographic observations and metadata) is. More specifically once a document is uploaded in the MAPS library, key domain concepts in documents are extracted via a natural language processing pipeline and used as additional information for its indexing. The key term identification algorithm is based on marine concepts that were pre-defined in a domain ontology, but crucially it also allows for the discovery of new related concepts. So for instance starting from the domain term salinity, related terms such as sea salinity and average sea salinity will also be identified as key terms and used for indexing and searching documents. A hybrid search system is then put in place, where users can search the library by metadata or by free text queries. In the latter case, the NLP pipeline performs an analysis of the text of the query, and when key concepts are matched, the relevant documents are presented. The results may be later refined by using other structured information (e.g. date of publication, area, ...). Currently a running system has been put in place, with data from satellites, buoys and sea stations; such data is documented and searchable by its relevant metadata and documentation. Results of quantitative evaluation in terms of information retrieval measures will be presented in the poster; more specifically, given an evaluation set defined by domain experts and composed of pre-defined queries together with documents that answer such queries, it will be shown how the system is highly accurate in retrieving the correct documents from the library. Though this work focuses on oceanography, its results may be easily extended to other domains; more generally, the possibility of enhancing the visibility and accessibility of grey literature via its connection to the data it describes and to an advanced full text indexing are of great relevance for the topic of this conference.","keywords":["Information Extraction","Search Engine","Oceanography"],"pages":"76-77","url":"https:\/\/publications.cnr.it\/doc\/342221","volume":"17","doi":"","editors_people":"Dominic Farace, Jerry Frantzen","editors":["Farace, D.","Frantzen, J."],"published":"GL17 Program Book","publisher":"","issn":"","isbn":"978-90-77484-26-5","conference_name":"Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature","conference_place":"Amsterdam","conference_date":"December 1-2"},{"id":131868,"last_updated":"2015-02-20 15:48:49","id_people":285395,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"The LREMap for Under-Resourced Languages","year":2014,"authors_people":"Riccardo Del Gratta, Francesca Frontini, Fahad Khan, Joseph Mariani, Claudia Soria","authors_cnr":["Frontini, Francesca","Khan, Anas Fahad","Soria, Claudia","Del Gratta, Riccardo"],"authors_cnr_id":["9887","11933"],"authors_cnr_institute":["048","048","048","048"],"authors":["Del Gratta, R.","Frontini, F.","Khan, F.","Mariani, J.","Soria, C."],"abstract":"A complete picture of currently available language resources and technologies for the under-resourced languages of Europe is still lacking. Yet this would help policy makers, researchers and developers enormously in planning a roadmap for providing all languages with the necessary instruments to act as fully equipped languages in the digital era. In this paper we introduce the LRE Map and show its utility for documenting available language resources and technologies for under-resourced languages. The importance of the serialization of the LREMap into (L)LOD along with the possibility of its connection to a wider world is also introduced.","keywords":["language resources","less-resourced languages","linguistic linked open data"],"pages":"78-83","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/index.html","volume":"","doi":"","editors_people":"Laurette Pretorius, Claudia Soria, Paola Baroni","editors":["Pretorius, L.","Soria, C.","Baroni, P."],"published":"Proceedings of the Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)","publisher":"","issn":"","isbn":"","conference_name":"Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)","conference_place":"Reykjavik","conference_date":"26\/05\/2014"},{"id":131985,"last_updated":"2014-12-15 17:12:36","id_people":291452,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Polysemy alternations extraction using the PAROLE SIMPLE CLIPS Italian lexicon","year":2014,"authors_people":"Frontini F., Quochi V., Monachini M.","authors_cnr":["Frontini, Francesca","Monachini, Monica","Quochi, Valeria"],"authors_cnr_id":["8945","11893"],"authors_cnr_institute":["048","048","048"],"authors":["Frontini, F.","Quochi, V.","Monachini, M."],"abstract":"This paper presents the results of an experiment of polysemy alternations induction from a lexicon (Utt and Pad\u00b4o, 2011; Frontini et al., 2014), discussing the results and proposing an amendment in the original algorithm.","keywords":["Language Resources and Technologies"],"pages":"175-179","url":"http:\/\/clic.humnet.unipi.it\/proceedings\/Proceedings-CLICit-2014.pdf","volume":"","doi":"10.12871\/CLICIT2014134","editors_people":"Roberto Basili, Alessandro Lenci, Bernardo Magnini","editors":["Basili, R.","Lenci, A.","Magnini, B."],"published":"","publisher":"Pisa University Press srl (Pisa, ITA)","issn":"","isbn":"978-88-67-41472-7","conference_name":"Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 & the Fourth International Workshop EVALITA 2014","conference_place":"Pisa","conference_date":"9-11 December 2014, Pisa"},{"id":131896,"last_updated":"2016-03-29 10:00:21","id_people":286984,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Polysemy Index for Nouns: an Experiment on Italian using the PAROLE SIMPLE CLIPS Lexical Database","year":2014,"authors_people":"Frontini Francesca, Valeria Quochi, Sebastian Pad\u00f3, Jason Utt, Monica Monachini","authors_cnr":["Quochi, Valeria","Frontini, Francesca","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048","048"],"authors":["Frontini, F.","Quochi, V.","Pad\u00f3, S.","Utt, J.","Monachini, M."],"abstract":"An experiment is presented to induce a set of polysemous basic type alternations (such as ANIMAL-FOOD, or BUILDING-INSTITUTION) by deriving them from the sense alternations found in an existing lexical resource. The paper builds on previous work and applies those results to the Italian lexicon PAROLE SIMPLE CLIPS. The new results show how the set of frequent type alternations that can be induced from the lexicon is partly different from the set of polysemy relations selected and explicitly applied by lexicographers when building it. The analysis of mismatches shows that frequent type alternations do not always correspond to prototypical polysemy relations, nevertheless the proposed methodology represents a useful tool offered to lexicographers to systematically check for possible gaps in their resource.","keywords":["Polysemy","lexical resources","semantics"],"pages":"2955-2963","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/index.html","volume":"","doi":"","editors_people":"N. Calzolari, K. Choukri, T. Declerck, H. Loftsson, B. Maegaard, J. Mariani, A. Moreno, J. Odijk, S. Piperidis","editors":["Calzolari, N.","Choukri, K.","Declerck, T.","Loftsson, H.","Maegaard, B.","Mariani, J.","Moreno, A.","Odijk, J.","Piperidis, S."],"published":"LREC 2014 Ninth International Conference on Language Resources and Evaluation Proceedings","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"978-2-9517408-8-4","conference_name":"9th International Conference on Language Resources and Evaluation, LREC 2014","conference_place":"Reykjavik, Iceland","conference_date":"26-31 may"},{"id":132019,"last_updated":"2015-02-20 17:24:02","id_people":286824,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Using lemon to Model Lexical Semantic \u00a0Shift in Diachronic Lexical Resources","year":2014,"authors_people":"Fahad Khan, Federico Boschetti, Francesca Frontini","authors_cnr":["Frontini, Francesca","Khan, Anas Fahad","Boschetti, Federico"],"authors_cnr_id":["14630"],"authors_cnr_institute":["048","048","048"],"authors":["Khan, F.","Boschetti, F.","Frontini, F."],"abstract":"In this paper we propose a model, called lemonDIA, for representing lexical semantic change using the lemon framework and based on the ontological notion of the perdurant. Namely we extend the notion of sense in lemon by adding a temporal dimension and then define a class of perdurant entities that represents a shift in meaning of a word and which contains different related senses. We start by discussing the general problem of semantic shift and the utility of being able to easily access and represent such information in diachronic lexical resources. We then describe our model and illustrate it with examples.","keywords":["lemon","linked data","OWL","ontologies","perdurants","semantic shift"],"pages":"","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/workshops\/LREC2014Workshop-LDL2014%20Proceedings.pdf","volume":"","doi":"","editors_people":"Christian Chiarcos, John Philip McCrae, Petya Osenova, Cristina Vertan","editors":["Chiarcos, C.","McCrae, J. P.","Osenova, P.","Vertan, C."],"published":"Proceedings of the 3rd Workshop on Linked Data in Linguistics (LDL-2014)","publisher":"","issn":"","isbn":"","conference_name":"3rd Workshop on Linked Data in Linguistics: Multilingual Knowledge Resources and Natural Language Processing (LDL2014)","conference_place":"Reykjavik","conference_date":"May 27th, 2014"},{"id":131897,"last_updated":"2014-11-12 17:07:02","id_people":286990,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"The IMAGACT Visual Ontology. an Extendable Multilingual Infrastructure for the Representation of Lexical Encoding of Action","year":2014,"authors_people":"Massimo Moneglia, Susan Brown, Francesca Frontini, Gloria Gagliardi, Fahad Khan, Monica Monachini and Alessandro Panunzi","authors_cnr":["Frontini, Francesca","Khan, Anas Fahad","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048"],"authors":["Moneglia, M.","Brown, S.","Frontini, F.","Gagliardi, G.","Khan, F.","Monachini, M.","Panunzi, A."],"abstract":"Action verbs have many meanings, covering actions in different ontological types. Moreover, each language categorizes action in its own way. One verb can refer to many different actions and one action can be identified by more than one verb. The range of variations within and across languages is largely unknown, causing trouble for natural language processing tasks. IMAGACT is a corpus-based ontology of action concepts, derived from English and Italian spontaneous speech corpora, which makes use of the universal language of images to identify the different action types extended by verbs referring to action in English, Italian, Chinese and Spanish. This paper presents the infrastructure and the various linguistic information the user can derive from it. IMAGACT makes explicit the variation of meaning of action verbs within one language and allows comparisons of verb variations within and across languages. Because the action concepts are represented with videos, extension into new languages beyond those presently implemented in IMAGACT is done using competence-based judgments by mother-tongue informants without intense lexicographic work involving underdetermined semantic description","keywords":["Lexicon","Lexical Database","Ontologies"],"pages":"3425-3432","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/index.html","volume":"","doi":"","editors_people":"N. Calzolari, K. Choukri, T. Declerck, H. Loftsson, B. Maegaard, J. Mariani, A. Moreno, J. Odijk, S. Piperidis","editors":["Calzolari, N.","Choukri, K.","Declerck, T.","Loftsson, H.","Maegaard, B.","Mariani, J.","Moreno, A.","Odijk, J.","Piperidis, S."],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"978-2-9517408-8-4","conference_name":"9th International Conference on Language Resources and Evaluation, LREC 2014","conference_place":"Reykjavik, Iceland","conference_date":"26-31 may"},{"id":131898,"last_updated":"2014-11-13 15:53:47","id_people":287029,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Presenting a System of Human-Machine Interaction for Performing Map Tasks","year":2014,"authors_people":"Gabriele Pallotti, Francesca Frontini, Fabio Aff\u00e8, Monica Monachini and Stefania Ferrari","authors_cnr":["Frontini, Francesca","Aff\u00e8, Fabio","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048","048"],"authors":["Pallotti, G.","Frontini, F.","Aff\u00e8, F.","Monachini, M.","Ferrari, S."],"abstract":"A system for human machine interaction is presented, that offers second language learners of Italian the possibility of assessing their competence by performing a map task, namely by guiding the a virtual follower through a map with written instructions in natural language. The underlying natural language processing algorithm is described, and the map authoring infrastructure is presented.","keywords":["Language learning","human machine interaction","map tasks"],"pages":"3963-3966","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/index.html","volume":"","doi":"","editors_people":"N. Calzolari, K. Choukri, T. Declerck, H. Loftsson, B. Maegaard, J. Mariani, A. Moreno, J. Odijk, S. Piperidis","editors":["Calzolari, N.","Choukri, K.","Declerck, T.","Loftsson, H.","Maegaard, B.","Mariani, J.","Moreno, A.","Odijk, J.","Piperidis, S."],"published":"","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"978-2-9517408-8-4","conference_name":"9th International Conference on Language Resources and Evaluation, LREC 2014","conference_place":"Reykjavik, Iceland","conference_date":"2"},{"id":132017,"last_updated":"2015-02-20 17:06:27","id_people":315438,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"La mappa delle opinioni e dei sentimenti estratte dai social media","year":2014,"authors_people":"Francesca Frontini","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Frontini, F."],"abstract":"","keywords":[""],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/315438","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Seminario rivolto agli alunni dell'Istituto Tecnico Economico \"F. Carrara\" di Lucca, organizzato dall'Istituto di Linguistica Computazionale \"A. Zampolli\" del CNR di Pisa","conference_place":"Pisa, Area della Ricerca del CNR","conference_date":"31 marzo 2014"},{"id":131987,"last_updated":"2014-12-16 16:51:56","id_people":291816,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Marine Planning and Service Platform (MAPS): An Advanced Research Engine for Grey Literature in Marine Science","year":2014,"authors_people":"Goggi S., Monachini M., Frontini F., Bartolini R., Pardelli G., De Mattei M., Bustaffa F., Manzella G.","authors_cnr":["Frontini, Francesca","Monachini, Monica","Goggi, Sara","Bartolini, Roberto","Pardelli, Gabriella"],"authors_cnr_id":["8945","10172","10441","16333"],"authors_cnr_institute":["048","048","048","048","048"],"authors":["Goggi, S.","Monachini, M.","Frontini, F.","Bartolini, R.","Pardelli, G.","De Mattei, M.","Bustaffa, F.","Manzella, G."],"abstract":"The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting Operative Oceanography in its activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. Community and Requirements. Operative Oceanography is the branch of marine research which deals with the development of integrated systems for examining and modeling the ocean monitoring and forecast. Experts need access to real-time data on the state of the sea such as forecasts on temperatures, streams, tides and the relevant scientific literature. This finds application in many areas, ranging from civilian and military safety to protection of off-shore and coastal infrastructures. The metadata. The set of metadata associated with marine data is defined in the CDI (Common Data Index) documented standard. They encode: the types of sizes which have been measured; the measurement tools the platform which has been employed; the geographic area where measures have been taken; the environmental matrix; the descriptive documentation. As concerns the scientific documentation, at the current stage of the CDI standard, a document is shaped around the following metadata: Title, Authors, Version, ISBN\/DOI, Topic, Date of publication, Body\/Institution, Abstract. The search engine. The query system (which is actually under development) has been designed for operating with structured data - the metadata - and raw data - the associated technical and scientific documentation. Full-text technologies are often unsuccessful when applied to this type of queries since they assume the presence of specific keywords in the text; in order to fix this problem, the MAPS project suggests to use different emantic technologies for retrieving the text and data and thus getting much more complying results. In the Poster we will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced earch engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the 2 great impact that the processing, re-use as well as application of grey data have on societal needs\/problems and their answers.","keywords":["Marine Science Search Engine Source Data Oceanography"],"pages":"93-94","url":"http:\/\/greyguide.isti.cnr.it\/dfdownloadnew.php?ident=GLConference\/GL16\/2014-G01-015&langver=en&scelta=Metadata","volume":"","doi":"","editors_people":"compiled by D. Farace and J. Frantzen","editors":["Farace, C. B. D.","Frantzen, J."],"published":"","publisher":"","issn":"","isbn":"978-90-77484-24-1","conference_name":"Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change","conference_place":"Library of Congress Washington D. C., USA","conference_date":"December 8-9, 2014"},{"id":131986,"last_updated":"2014-12-16 12:49:41","id_people":291637,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"A Model for Representing Diachronic Semantic Information in Lexico-Semantic Resources on the Semantic Web","year":2014,"authors_people":"Khan F., Frontini F., Monachini M.","authors_cnr":["Frontini, Francesca","Khan, Anas Fahad","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048","048"],"authors":["Khan, F.","Frontini, F.","Monachini, M."],"abstract":"The Semantic Web offers a way of publishing structured data online that facilitates the interlinking of different datasets stored at different online locations? indeed one of the main aims of the Semantic Web movement is to actively encourage this enrichment of online datasets with information from other resources, in order to avoid the problem of so called 'data islands'. In contrast to conventional hyperlinks however the links between different resources on the Semantic Web can be given semantic types and classified hierarchically. Data published on the Semantic Web is referred to as Linked Data? if, in addition, this data is available with an open license then it can be referred to as Linked Open Data (Heath 2011).","keywords":["Cultural resources","Heritage resources"],"pages":"1-3","url":"http:\/\/www.dh.uni-leipzig.de\/wo\/wp-content\/uploads\/2014\/11\/Fahad-Khan-Francesca-Frontini-and-Monica-Monachini-A-Model-for-Representing.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"Greek and Latin in an age of Open Data. Open Philology Project","conference_place":"University of Leipzig, GERMANY","conference_date":"December 1-4, 2014"},{"id":132082,"last_updated":"2015-11-18 09:44:01","id_people":335399,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"MAPS: Architettura del Sistema","year":2014,"authors_people":"M. De Mattei; D. Medone; P. D'Angelo; M. Monachini; R. Bartolini; F. Frontini","authors_cnr":["Monachini, Monica","Bartolini, Roberto","Frontini, Francesca"],"authors_cnr_id":["8945","10441","15911"],"authors_cnr_institute":["048","048","048"],"authors":["De Mattei, M.","Medone, D.","D'Angelo, P.","Monachini, M.","Bartolini, R.","Frontini, F."],"abstract":"PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitivit\u00e0 Bando DLTM Azione 1.2.2 \"Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il presente documento \u00e8 il deliverable \"D3.1 - Architettura del Sistema\" del progetto MAPS (Marine Planning and Service Platform). Il progetto MAPS \u00e8 un'evoluzione del progetto precedente Marine. Tale evoluzione si articola su tre aspetti diversi: - Un meccanismo di federazione dei dati, che consenta di rendere disponibili ai propri utenti non soltanto i dati prodotti internamente da sistema Marine ma anche quelli resi disponibili da altri sistemi similari, soddisfacendo cos\u00ec un pi\u00f9 ampio ambito di esigenze informative. Il deliverable D2.2, Modello della Soluzione specifica in dettaglio queste nuove funzionalit\u00e0. - Un Catalogo dei Documenti che, conservando la documentazione tecnica e scientifica dei prodotti offerti, possa documentare in modo accurato le modalit\u00e0 di misurazione, elaborazione e controllo dei prodotti forniti e quindi i relativi ambiti di applicabilit\u00e0. - Un sistema di ricerca capace di selezionare i dati necessari ad uno scopo determinato non soltanto sulla base della loro tipologia, della loro dislocazione territoriale o di altre informazioni simili contenute nei metadati associati come avviene oggi nella maggior parte dei sistemi esistenti, ma anche sulla base delle informazioni contenute nella documentazione tecnica e scientifica. Tali funzionalit\u00e0 sono specificate nel deliverable D1.3 - Modello della Soluzione.","keywords":["Marine Science Search Engine Source Data Oceanography"],"pages":"1-35","url":"https:\/\/publications.cnr.it\/doc\/335399","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132085,"last_updated":"2015-11-18 09:44:09","id_people":335403,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"META: Report di progettazione degli algoritmi individuati","year":2014,"authors_people":"Maurizio De Mattei; Daniele Medone; Maurizio Maltese; Francesca Frontini; Roberto Bartolini; Monica Monachini;","authors_cnr":["Monachini, Monica","Bartolini, Roberto","Frontini, Francesca"],"authors_cnr_id":["8945","10441","15911"],"authors_cnr_institute":["048","048","048"],"authors":["De Mattei, M.","Medone, D.","Maltese, M.","Frontini, F.","Bartolini, R.","Monachini, M."],"abstract":"PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitivit\u00e0 Bando DLTM Azione 1.2.2 \"Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il deliverable definisce l'architettura del Sistema di Estrazione Eventi Meteo realizzato dagli autori nell'ambito del progetto META. Il sistema estrae da contenuti online informazione su eventi meteo critici verificatesi in Liguria e nel nord della Toscana.","keywords":["Ontology","Information Extraction","Taxonomy"],"pages":"1-19","url":"https:\/\/publications.cnr.it\/doc\/335403","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132083,"last_updated":"2015-11-18 09:44:22","id_people":335400,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"MAPS: Stato dell'Arte","year":2014,"authors_people":"Francesca Frontini; Roberto Bartolini;Monica Monachini","authors_cnr":["Monachini, Monica","Bartolini, Roberto","Frontini, Francesca"],"authors_cnr_id":["8945","10441","15911"],"authors_cnr_institute":["048","048","048"],"authors":["Frontini, F.","Bartolini, R.","Monachini, M."],"abstract":"PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitivit\u00e0 Bando DLTM Azione 1.2.2 \"Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012 Il documento descrive lo stato dell'arte delle tecnologie linguistiche applicate ai sistemi di ricerca semantica.","keywords":["Marine Science Search Engine Source Data Oceanography"],"pages":"1-21","url":"https:\/\/publications.cnr.it\/doc\/335400","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132084,"last_updated":"2015-11-18 09:44:12","id_people":335402,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"META:-Report sui modelli e tecniche linguistiche","year":2014,"authors_people":"Francesca Frontini; Roberto Bartolini; Monica Monachini","authors_cnr":["Monachini, Monica","Bartolini, Roberto","Frontini, Francesca"],"authors_cnr_id":["8945","10441","15911"],"authors_cnr_institute":["048","048","048"],"authors":["Frontini, F.","Bartolini, R.","Monachini, M."],"abstract":"PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitivit\u00e0 Bando DLTM Azione 1.2.2 \"Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il deliverable riassume lo stato dell'arte delle tecnologie semantiche che possono essere impiegate nella realizzazione del progetto META. Il progetto META \u00e8 una progetto di ricerca e sviluppo tecnologico finanziato dalla Regione Liguria con i fondi POR-FESR 2007-2013 della Comunit\u00e0 Europea che mira alla realizzazione di un sistema per l'allerta di eventi meteo critici in Liguria e nel nord della Toscana. Nell'ambito del progetto META le tecnologie semantiche sono utilizzate per estrarre eventi meteo di interesse da articoli pubblicati in rete o sui social network.","keywords":["Ontology","Information Extraction","Semantic Web","Search Engine"],"pages":"1-20","url":"https:\/\/publications.cnr.it\/doc\/335402","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131900,"last_updated":"2014-11-14 11:46:04","id_people":287039,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Stato dell'arte dei motori semantici. Progetto MAPS, programma operativo regionale POR-FESR (2007-2013)","year":2014,"authors_people":"Francesca Frontini, Roberto Bartolini, Monica Monachini, Gabriella Pardelli, Sara Goggi","authors_cnr":["Frontini, Francesca","Monachini, Monica","Goggi, Sara","Bartolini, Roberto","Pardelli, Gabriella"],"authors_cnr_id":["8945","10172","10441","16333"],"authors_cnr_institute":["048","048","","048","048"],"authors":["Frontini, F.","Bartolini, R.","Monachini, M.","Pardelli, G.","Goggi, S."],"abstract":"Il presente documento \u00e8 il deliverable \"D1.1 - Stato dell'Arte dei motori semantici del progetto MAPS (Marine Planning and Service Platform). Il progetto MAPS \u00e8 una evoluzione del progetto precedente Marine. Tramite il progetto Marine (Bando Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013 - pos n.1) \u00e8 stata realizzata una piattaforma informatica di supporto all'Oceanografia Operativa capace di raccogliere dati marini per renderli poi disponibili ai ricercatori e alle organizzazioni interessate tramite protocolli standard. Lo scopo del progetto MAPS \u00e8 quello di realizzare una Catalogo di Documenti contenente informazioni per la piattaforma Marine. Caratteristica di MAPS \u00e8 di fornire accesso ai dati oceanografici sia attraverso la ricerca per metadati, sia attraverso la ricerca semantica contenuta nella manualistica tecnico scientifica di riferimento.","keywords":[""],"pages":"1-22","url":"https:\/\/publications.cnr.it\/doc\/287039","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131907,"last_updated":"2016-03-30 12:02:53","id_people":287280,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Linking the Geonames ontology to WordNet","year":2013,"authors_people":"Francesca Frontini, Riccardo Del Gratta, Monica Monachini.","authors_cnr":["Del Gratta, Riccardo","Frontini, Francesca","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048","048"],"authors":["Frontini, F.","Del Gratta, R.","Monachini, M."],"abstract":"This paper illustrates the transformation of the GeoNames ontology concepts, with their English labels and glosses, into a GeoDomain WordNet-like resource in English, its translation into Italian, and its linking to the existing generic WordNets of both languages.","keywords":["GeoNames","WordNet","lemon"],"pages":"263-267","url":"http:\/\/hnk.ffzg.hr\/bibl\/ltc2013\/book\/papers\/OWN-2.pdf","volume":"","doi":"","editors_people":"Zygmunt Vetulani & Hans Uszkoreit (ed.)","editors":["Vetulani, Z.","Uszkoreit, H."],"published":"Human Language Technologies as a Challenge for Computer Science and Linguistics. Proceedings, 6th Language & Technology Conference, December 7-9, 2013, Pozna\u00f1, Poland","publisher":"Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)","issn":"","isbn":"978-2-9517408-8-4","conference_name":"6th Language & Technology Conference: Human Language Technologies as a Challenge for Computer Science and Linguistics","conference_place":"Poznan, Poland","conference_date":"December 7-9, 2013"},{"id":131909,"last_updated":"2015-02-26 12:39:15","id_people":287331,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Tour-pedia: a web application for the analysis and visualization of opinions for tourism domain","year":2013,"authors_people":"Andrea Marchetti, Maurizio Tesconi, Stefano Abbate, Angelica Lo Duca, Andrea D'Errico, Francesca Frontini and Monica Monachini","authors_cnr":["Frontini, Francesca","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048"],"authors":["Marchetti, A.","Tesconi, M.","Abbate, S.","Lo Duca, A.","D'Errico, A.","Frontini, F.","Monachini, M."],"abstract":"We present Tour-pedia an interactive web application that extracts opinions from reviews of accommodations from different sources available on-line. Polarity markers display on a map the different opinions. This tool is intended to help business operators to manage reputation on-line.","keywords":["Visualization tools","opinion mining","NLP on social media","tourism reviews"],"pages":"594-595","url":"http:\/\/www.iit.cnr.it\/sites\/default\/files\/ltc2013_opener_demo.pdf","volume":"","doi":"","editors_people":"Zygmunt Vetulani & Hans Uszkoreit (ed.)","editors":["Vetulani, Z.","Uszkoreit, H."],"published":"","publisher":"Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)","issn":"","isbn":"978-83-932640-4-9","conference_name":"6th Language & Technology Conference: Human Language Technologies as a Challenge for Computer Science and Linguistics","conference_place":"Poznan, Poland","conference_date":"December 7-9, 2013"},{"id":131910,"last_updated":"2014-11-17 16:07:50","id_people":287346,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"IMAGACT E-learning Platform for Basic Action Types. In: Pixel (ed.), Proceedings of the 6th International Conference ICT for Language Learning","year":2013,"authors_people":"Moneglia M., Panunzi A., Gagliardi G., Monachini M., Russo I., De Felice I., Khan F. & Frontini F.","authors_cnr":["Russo, Irene","Frontini, Francesca","Khan, Anas Fahad","De Felice, Irene","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048","048","048","048"],"authors":["Moneglia, M.","Panunzi, A.","Gagliardi, G.","Monachini, M.","Russo, I.","De Felice, I.","Khan, F.","Frontini, F."],"abstract":"Action verbs express important information in a sentence and they are the most frequent elements in speech, but they are also one of the most difficult part of the lexicon to learn for L2 language learners, because languages segment these concepts in very different ways. The two sentences \"Mary folds her shirt\" and \"Mary folds her arms\" refer to two completely different types of action, as becomes evident when they are translated into another language (e.g., in Italian they would be translated as \"Maria piega la camicia\" and \"Maria incrocia le braccia\" respectively). IMAGACT e-learning platform aims to make these differences evident by creating a cross-linguistic ontology of action types, whose nodes consist of 3D scenes, each of which relates to one action type. In order to identify these types, contexts of use have been extracted from English and Italian spontaneous speech corpora for around 600 high frequency action verbs (for each language). All instances that refer to similar events (e.g., fold the shirt\/ the blanket) are grouped under one single action type: each one of these types is then represented by a linguistic best example and a short video that represents simple actions (e.g. a man taking a glass from a table).The action types extracted for Italian and English are compared and merged into one cross-linguistic ontology of action. IMAGACT has provided an internet based annotation infrastructure to derive this information from corpora. The project is now completed for the Italian and English lexicon, data extraction for Chinese and Spanish is ongoing. Reference to prototypical imagery is crucial in order to bootstrap the learning process. By selecting the set of 3D scenes referred to by a verb in one language and viewing the type of activity represented therein learners can directly understand the range of applicability of each verb. Thanks to an easy interface, a user can access the English\/Italian\/Chinese lexicon by lemma or directly by 3D scenes. For example, searching for the verb \"to turn\",s\/he will be presented with a number of scenes, showing the various action types associated to that verb.Clicking on a scene s\/he or she will know how this type of action is referred to in other the languages","keywords":["Ontology"],"pages":"85-89","url":"https:\/\/publications.cnr.it\/doc\/287346","volume":"","doi":"","editors_people":"Pixel (ed.)","editors":["Pixel"],"published":"Conference Proceedings. ICT for Language Learning","publisher":"libreriauniversitaria. it (Limena, ITA)","issn":"","isbn":"978-88-6292-423-8","conference_name":"International Conference \"ICT for Language Learning\", 6th edition","conference_place":"Florence, Italy","conference_date":"14-15 november 2013"},{"id":131862,"last_updated":"2014-10-20 11:28:51","id_people":285373,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"(Fore)seeing actions in objects. Acquiring distinctive affordances from language","year":2013,"authors_people":"Irene Russo, Irene De Felice, Francesca Frontini, Fahad Khan, Monica Monachini","authors_cnr":["Russo, Irene","Frontini, Francesca","Khan, Anas Fahad","De Felice, Irene","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":[""],"authors":["Russo, I.","De Felice, I.","Frontini, F.","Khan, F.","Monachini, M."],"abstract":"In this paper we investigate if conceptual information concerning objects' affordances as possibilities for actions anchored to an object can be at least partially acquired through language. Considering verb-noun pairs as the linguistic realizations of relations between actions performed by an agent and objects we collect this information from the ImagAct dataset, a linguistic resource obtained from manual annotation of basic action verbs, and from a web corpus(itTenTen). The notion of affordance verb as the most distinctive verb in ImagAct enables a comparison with distributional data that reveal how lemmas ranking based on a semantic association measure that mirror that of affordances as the most distinctive actions an object can be involved in.","keywords":[""],"pages":"151-161","url":"https:\/\/docs.google.com\/viewer?a=v&pid=sites&srcid=ZGVmYXVsdGRvbWFpbnxubHBjczIwMTN8Z3g6MTI0ZGMzYWYwYmMxNjY1Mg","volume":"","doi":"","editors_people":"Bernadette Sharp, Michael Zock","editors":["Sharp, B.","Zock, M."],"published":"Proceedings of NLPCS 2013-10th International Workshop on Natural Language Processing and Cognitive Science","publisher":"","issn":"","isbn":"","conference_name":"NLPCS 2013-10th International Workshop on Natural Language Processing and Cognitive Science","conference_place":"Marseille","conference_date":"15-17\/10\/2013"},{"id":131914,"last_updated":"2014-11-18 12:04:03","id_people":287456,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Disambiguation of Basic Action Types through Nouns' Telic Qualia","year":2013,"authors_people":"Irene Russo, Francesca Frontini, Irene De Felice, Fahad Khan, Monica Monachini","authors_cnr":["Russo, Irene","Frontini, Francesca","Khan, Anas Fahad","De Felice, Irene","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":[""],"authors":["Russo, I.","Frontini, F.","De Felice, I.","Khan, F.","Monachini, M."],"abstract":"Knowledge about semantic associations between words is effective to disambiguate word senses. The aim of this paper is to investigate the role and the relevance of telic information from SIMPLE in the disambiguation of basic action types of Italian HOLD verbs ( prendere, 'to take', raccogliere, 'to pick up', pigliare 'to grab' etc.). We propose an experiment to compare the results obtained with telic information from SIMPLE with basic co-occurrence information extracted from corpora (most salient verbs modifying nouns) classified in terms of general semantic classes to avoid data sparseness.","keywords":[""],"pages":"70-75","url":"http:\/\/www.aclweb.org\/anthology\/W13-5410","volume":"","doi":"","editors_people":"Roser Saur\u00ed, Nicoletta Calzolari, Chu-Ren Huang, Alessandro Lenci, Monica Monachini, James Pustejovsky","editors":["Saur\u00ed, R.","Calzolari, N.","Huang, C.","Lenci, A.","Monachini, M.","Pustejovsky, J."],"published":"Proceedings of the 6th International Conference on Generative Approaches to the Lexicon. Generative Lexicon and Distributional Semantics","publisher":"Association for Computational Linguistics (Stroudsburg, USA)","issn":"","isbn":"978-1-937284-98-5","conference_name":"6th International Conference on Generative Approaches to the Lexicon Generative Lexicon and Distributional Semantics","conference_place":"Pisa, Italy","conference_date":"24-25\/09\/2013"},{"id":132018,"last_updated":"2015-02-20 17:23:21","id_people":287038,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Flexible Acquisition of Subcategorization Frames in Italian","year":2012,"authors_people":"Caselli, Tommaso; Frontini, Francesca; Quochi, Valeria; Rubino, Francesco and Russo, Irene","authors_cnr":["Caselli, Tommaso","Russo, Irene","Frontini, Francesca","Rubino, Francesco","Quochi, Valeria"],"authors_cnr_id":["11893"],"authors_cnr_institute":["048","048","048","048","048"],"authors":["Caselli, T.","Frontini, F.","Quochi, V.","Rubino, F.","Russo, I."],"abstract":"Lexica of predicate-argument structures constitute a useful tool for several tasks in NLP. This paper describes a web-service system for automatic acquisition of verb subcategorization frames (SCFs) from parsed data in Italian. The system acquires SCFs in an unsupervised manner. We created two gold standards for the evaluation of the system, the first by mixing together information from two lexica (one manually created and the second automatically acquired) and manual exploration of corpus data and the other annotating data extracted from a specialized corpus (environmental domain). Data filtering is accomplished by means of the maximum likelihood estimate (MLE). The evaluation phase has allowed us to identify the best empirical MLE threshold for the creation of a lexicon (P=0.653, R=0.557, F1=0.601). In addition to this, we assigned to the extracted entries of the lexicon a confidence score based on the relative frequency and evaluated the extractor on domain specific data. The confidence score will allow the final user to easily select the entries of the lexicon in terms of their reliability: one of the most interesting feature of this work is the possibility the final users have to customize the results of the SCF extractor, obtaining different SCF lexica in terms of size and accuracy.","keywords":["lexicon","automatic acquisition","subcategorisation frames"],"pages":"2842-2848","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2012\/summaries\/390.html","volume":"","doi":"","editors_people":"Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Mehmet U?ur Do?an, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis","editors":["Calzolari, N.","Choukri, K.","Declerck, T.","Do\u011fan, M. U.","Maegaard, B.","Mariani, J.","Odijk, J.","Piperidis, S."],"published":"Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)","publisher":"European Language Resources Association ELRA (Paris, FRA)","issn":"","isbn":"9782951740877","conference_name":"Eight International Conference on Language Resources and Evaluation (LREC'12)","conference_place":"Istanbul, Turkey","conference_date":"23-25 Maggio 2012"},{"id":128425,"last_updated":"2022-08-24 00:43:28","id_people":223098,"institutes":["IIT","ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"L-LEME: an Automatic Lexical Merger based on the LMF Standard","year":2012,"authors_people":"Riccardo Del Gratta, Francesca Frontini, Monica Monachini, Valeria Quochi, Francesco Rubino, Matteo Abrate, Angelica Lo Duca","authors_cnr":["Abrate, Matteo","Frontini, Francesca","Rubino, Francesco","Lo Duca, Angelica","Monachini, Monica","Quochi, Valeria","Del Gratta, Riccardo"],"authors_cnr_id":["8945","11893","11933"],"authors_cnr_institute":["044","048","048","044","048","048","048"],"authors":["Del Gratta, R.","Frontini, F.","Monachini, M.","Quochi, V.","Rubino, F.","Abrate, M.","Lo Duca, A."],"abstract":"The present paper describes LMF LExical MErger (L-LEME), an architecture to combine two lexicons in order to obtain new resource(s). L-LEME relies on standards, thus exploiting the benefits of the ISO Lexical Markup Framework (LMF) to ensure interoperability. L-LEME is meant to be dynamic and heavily adaptable: it allows the users to configure it to meet their specific needs. The L-LEME architecture is composed of two main modules: the Mapper, which takes in input two lexicons A and B and a set of user-defined rules and instructions to guide the mapping process (Directives D) and gives in output all matching entries. The algorithm also calculates a cosine similarity score. The Builder takes in input the previous results, a set of Directives D1 and produces a new LMF lexicon C. The Directives allow the user to define its own building rules and different merging scenarios. L-LEME is applied to a specific concrete task within the PANACEA project, namely the merging of two Italian SubCategorization Frame (SCF) lexicons. The experiment is interesting in that A and B have different philosophies behind, being A built by human introspection and B automatically extracted. Ultimately, L-LEME has interesting repercussions in many language technology applications","keywords":["LMF","Lexicon mapping","similarity score"],"pages":"31-40","url":"https:\/\/publications.cnr.it\/doc\/223098","volume":"","doi":"","editors_people":"Bel N. , Gavrilidou M. , Monachini M., Quochi V., Rimell L.","editors":["Bel, N.","Gavrilidou, M.","Monachini, M.","Quochi, V.","Rimell, L."],"published":"Proceedings of the LREC 2012 Workshop on Language Resource Merging","publisher":"","issn":"","isbn":"978-2-9517408-7-7","conference_name":"The Eight International Conference on Language Resources and Evaluation (LREC) 2012","conference_place":"Istanbul, Turkey","conference_date":"2012"},{"id":131754,"last_updated":"2017-03-02 11:36:19","id_people":220182,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"The Language Library: supporting community effort for collective resource production","year":2012,"authors_people":"Del Gratta, Riccardo; Frontini, Francesca; Rubino, Francesco; Russo, Irene; Calzolari, Nicoletta","authors_cnr":["Russo, Irene","Frontini, Francesca","Rubino, Francesco","Del Gratta, Riccardo"],"authors_cnr_id":["11933"],"authors_cnr_institute":[""],"authors":["Del Gratta, R.","Frontini, F.","Rubino, F.","Russo, I.","Calzolari, N."],"abstract":"Relations among phenomena at different linguistic levels are at the essence of language properties but today we focus mostly on one specific linguistic layer at a time, without (having the possibility of) paying attention to the relations among the different layers. At the same time our efforts are too much scattered without much possibility of exploiting other people's achievements. To address the complexities hidden in multilayer interrelations even small amounts of processed data can be useful, improving the performance of complex systems. Exploiting the current trend towards sharing we want to initiate a collective movement that works towards creating synergies and harmonisation among different annotation efforts that are now dispersed. In this paper we present the general architecture of the Language Library, an initiative which is conceived as a facility for gathering and making available through simple functionalities the linguistic knowledge the field is able to produce, putting in place new ways of collaboration within the LRT community. In order to reach this goal, a first population round of the Language Library has started around a core of parallel\/comparable texts that have been annotated by several contributors submitting a paper for LREC2012. The Language Library has also an ancillary aim related to language documentation and archiving and it is conceived as a theory-neutral space which allows for several language processing philosophies to coexist.","keywords":["annotation","metadata","scientific crowdsourcing"],"pages":"43-49","url":"https:\/\/publications.cnr.it\/doc\/220182","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"The Eight International Conference on Language Resources and Evaluation (LREC'12)","conference_place":"Istanbul, Turkey","conference_date":"23-25 may 2012"},{"id":128655,"last_updated":"2016-04-01 13:36:10","id_people":278677,"institutes":["IIT","ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"GLOSS, an infrastructure for the semantic annotation and mining of documents in the public security domain","year":2012,"authors_people":"Frontini Francesca, Aliprandi Carlo, Bacciu Clara, Bartolini Roberto, Marchetti Andrea, Parenti Enrico, Piccinonno Fulvio, Soru T.","authors_cnr":["Bacciu, Clara","Frontini, Francesca","Marchetti, Andrea","Bartolini, Roberto"],"authors_cnr_id":["1738","10441"],"authors_cnr_institute":[""],"authors":["Frontini, F.","Aliprandi, C.","Bacciu, C.","Bartolini, R.","Marchetti, A.","Parenti, E.","Piccinonno, F.","Soru, T."],"abstract":"Efficient access to information is crucial in the work of organizations that require decision taking in emergency situations. This paper gives an outline of GLOSS, an integrated system for the analysis and retrieval of data in the environmental and public security domain. We shall briefly present the GLOSS infrastructure and its use, and how semantic information of various kinds is integrated, annotated and made available to the final users.","keywords":["semantic annotation","text mining","geographic data"],"pages":"21-25","url":"https:\/\/publications.cnr.it\/doc\/278677","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"European language resources association (ELRA) (Paris, FRA)","issn":"","isbn":"978-2-9517408-7-7","conference_name":"Eight International Conference on Language Resources and Evaluation. LREC'12. European Language Resources Association: France","conference_place":"Istanbul","conference_date":"21-27\/05\/2012"},{"id":131773,"last_updated":"2014-10-23 10:53:31","id_people":220785,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Automatic Creation of Quality Multi-Word Lexica from Noisy Text Data","year":2012,"authors_people":"Francesca Frontini, Valeria Quochi, Francesco Rubino","authors_cnr":["Frontini, Francesca","Rubino, Francesco","Quochi, Valeria"],"authors_cnr_id":["11893"],"authors_cnr_institute":["048","048","048"],"authors":["Frontini, F.","Quochi, V.","Rubino, F."],"abstract":"This paper describes the design of a tool for the automatic creation of multi-word lexica that is deployed as a web service and runs on automatically web-crawled data within the framework of the PANACEA platform. The main purpose of our task is to provide a (computationally \"light\") tool that creates a full high quality lexical resource of multi-word items. Within the platform, this tool is typically inserted in a work flow whose first step is automatic web-crawling. Therefore, the input data of our lexical extractor is intrinsically noisy. The paper evaluates the capacity of the tool to deal with noisy data, and in particular with texts containing a significant amount of duplicated paragraphs. The accuracy of the extraction of multi-word expressions from the original crawled corpus is compared to the accuracy of the extraction from a later \"de-duplicated\" version of the corpus. The paper shows how our method can extract with sufficiently good precision also from the original, noisy crawled data. The output of our tool is a multi-word lexicon formatted and encoded in XML according to the Lexical Mark-up Framework.","keywords":["Lexical induction","multi-word extraction","web-based distributed platform","noisy data"],"pages":"","url":"http:\/\/www.kde.cs.tut.ac.jp\/~aono\/pdf\/COLING2012\/AND\/pdf\/AND04.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"Proceedings of the Sixth Workshop on Analytics for Noisy Unstructured Text Data","publisher":"ACM, Association for computing machinery (New York, USA)","issn":"","isbn":"978-1-4503-1919-5","conference_name":"AND 2012","conference_place":"Mumbai, India","conference_date":"December 9, 2012"},{"id":131753,"last_updated":"2015-02-27 09:38:24","id_people":219704,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"The META-SHARE Metadata Schema for the Description of Language Resources","year":2012,"authors_people":"Gavrilidou, Maria [1]; Labropoulou, Penny [1]; Desipri, Elina [1]; Piperidis, Stelio [1]; Papageorgiou, Haris [1]; Monachini, Monica [2]; Frontini, Francesca [2]; Declerck, Thierry [3]; Francopoulo, Gil [4]; Arranz, Victoria [5]; Mapelli, Valerie [5]","authors_cnr":["Frontini, Francesca","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048"],"authors":["Gavrilidou, M.","Labropoulou, P.","Desipri, E.","Piperidis, S.","Papageorgiou, H.","Monachini, M.","Frontini, F.","Declerck, T.","Francopoulo, G.","Arranz, V.","Mapelli, V."],"abstract":"This paper presents a metadata model for the description of language resources proposed in the framework of the META-SHARE infrastructure, aiming to cover both datasets and tools\/technologies used for their processing. It places the model in the overall framework of metadata models, describes the basic principles and features of the model, elaborates on the distinction between minimal and maximal versions thereof, briefly presents the integrated environment supporting the LRs description and search and retrieval processes and concludes with work to be done in the future for the improvement of the model.","keywords":["metadata","META-SHARE","LRs description"],"pages":"1090-1097","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2012\/index.html","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-2-9517408-7-7","conference_name":"The Eight International Conference on Language Resources and Evaluation (LREC'12)","conference_place":"Istanbul, Turkey","conference_date":"23-25 may 2012"},{"id":131756,"last_updated":"2013-05-30 09:58:13","id_people":220211,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Verb interpretation for basic action types: annotation, ontology induction and creation of prototypical scenes","year":2012,"authors_people":"Monachini, Monica [1]; Frontini, Francesca [1]; De Felice, Irene [1]; Russo, Irene [1]; Khan, Fahad [1]; Gagliardi, Gloria [2]; Panunzi, Alessandro [2]","authors_cnr":["Russo, Irene","Frontini, Francesca","Khan, Anas Fahad","De Felice, Irene","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048","048","048","048"],"authors":["Monachini, M.","Frontini, F.","De Felice, I.","Russo, I.","Khan, F.","Gagliardi, G.","Panunzi, A."],"abstract":"In the last 20 years dictionaries and lexicographic resources such as WordNet have started to be enriched with multimodal content. Short videos depicting basic actions support the user's need (especially in second language acquisition) to fully understand the range of applicability of verbs. The IMAGACT project has among its results a repository of action verbs ontologically organised around prototypical action scenes in the form of both video recordings and 3D animations. The creation of the IMAGACT ontology, which consists in deriving action types from corpus instances of action verbs, intra and cross linguistically validating them and producing the prototypical scenes thereof, is the preliminary step for the creation of a resouce that users can browse by verb, learning how to match different action prototypes with the correct verbs in the target language. The mapping of IMAGACT types onto WordNet synsets allows for a mutual enrichment of both resources.","keywords":["ontology of actions","lexical resource","3D animations"],"pages":"69-80","url":"https:\/\/publications.cnr.it\/doc\/220211","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"COLING 2012-3rd Workshop on Cognitive Aspects of the Lexicon (CogALex-III)","conference_place":"Mumbai, India","conference_date":"15 Dicembre 2012"},{"id":131732,"last_updated":"2015-02-26 14:26:41","id_people":220262,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"IMAGACT: Deriving an Action Ontology from Spoken Corpora","year":2012,"authors_people":"Moneglia, Massimo [1]; Gagliardi, Gloria [1]; Panunzi, Alessandro [1]; Frontini, Francesca [2]; Russo, Irene [2]; Monachini, Monica [2]","authors_cnr":["Russo, Irene","Frontini, Francesca","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048","048"],"authors":["Moneglia, M.","Gagliardi, G.","Panunzi, A.","Frontini, F.","Russo, I.","Monachini, M."],"abstract":"This paper presents the IMAGACT annotation infrastructure which uses both corpus - based and competence - based methods for the simultaneous extraction of a language independent Action ontology from English and Italian spontaneous speech corpora. The infrastructure relies on an innovative methodology based on images of prototypical scenes and will identify high frequency action concepts in everyday life, suitable for the implementation of an open set of languages.","keywords":["Action verbs Ontology imagery"],"pages":"42-47","url":"https:\/\/publications.cnr.it\/doc\/220262","volume":"","doi":"","editors_people":"Bunt H.","editors":["Bunt, H."],"published":"Proceedings of the Eight Joint ISO-ACL SIGSEM Workshop on Interoperable Semantic Annotation ISA-8","publisher":"","issn":"","isbn":"978-90-74029-00-1","conference_name":"Eighth Joint ISO-ACL SIGSEM Workshop on Interoperable Semantic Annotation (ISA-8)","conference_place":"Pisa, Italy","conference_date":"3-5 October 2012"},{"id":131751,"last_updated":"2015-02-26 15:10:19","id_people":219656,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"The IMAGACT Cross-linguistic Ontology of Action. A new infrastructure for natural language disambiguation","year":2012,"authors_people":"Moneglia, Massimo [1]; Monachini, Monica [2]; Calabrese, Omar [3]; Panunzi, Alessandro [1]; Frontini, Francesca [2]; Gagliardi, Gloria [1]; Russo, Irene [2]","authors_cnr":["Russo, Irene","Frontini, Francesca","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048","048"],"authors":["Moneglia, M.","Monachini, M.","Calabrese, O.","Panunzi, A.","Frontini, F.","Gagliardi, G.","Russo, I."],"abstract":"Action verbs, which are highly frequent in speech, cause disambiguation problems that are relevant to Language Technologies. This is a consequence of the peculiar way each natural language categorizes Action i.e. it is a consequence of semantic factors. Action verbs are frequently \"general\", since they extend productively to actions belonging to different ontological types. Moreover, each language categorizes action in its own way and therefore the cross-linguistic reference to everyday activities is puzzling. This paper briefly sketches the IMAGACT project, which aims at setting up a cross-linguistic Ontology of Action for grounding disambiguation tasks in this crucial area of the lexicon. The project derives information on the actual variation of action verbs in English and Italian from spontaneous speech corpora, where references to action are high in frequency. Crucially it makes use of the universal language of images to identify action types, avoiding the underdeterminacy of semantic definitions. Action concept entries are implemented as prototypic scenes; this will make it easier to extend the Ontology to other languages.","keywords":["Action verbs","Ontology","Imagery"],"pages":"2606-2613","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2012\/pdf\/428_Paper.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-2-9517408-7-7","conference_name":"The Eight International Conference on Language Resources and Evaluation (LREC'12)","conference_place":"Istanbul, Turkey","conference_date":"23-25 may 2012"},{"id":131758,"last_updated":"2015-02-26 14:09:52","id_people":220270,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Mapping a corpusinduced ontology of action verbs on ItalWordNet","year":2012,"authors_people":"Moneglia, Massimo [1]; Monachini, Monica [2]; Panunzi, Alessandro [1]; Frontini, Francesca [2]; Gagliardi, Gloria [1]; Russo, Irene [2]","authors_cnr":["Russo, Irene","Frontini, Francesca","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048","048"],"authors":["Moneglia, M.","Monachini, M.","Panunzi, A.","Frontini, F.","Gagliardi, G.","Russo, I."],"abstract":"Action verbs are the least predictable linguistic type for bilingual dictionaries and they cause major problems for NLP technologies. This is not only because of language specific phraseology, but it is rather a consequence of the peculiar way each language categorizes events. In ordinary languages the most frequent action verbs are \"general\", since they extend productively to actions belonging to different ontological types. Moreover, each language categorizes actions in its own way and therefore the cross-linguistic reference to everyday activities is puzzling. A cross-linguistic stable ontology of actions is difficult to achieve because our knowledge on the actual variation of verbs across types of actions is largely unknown. This paper briefly presents the problems and the building strategies of the IMAGACT Ontology, which aims at filling this gap, and compares some early results on a set of Italian verbs with the information contained in ItalWordNet.","keywords":["action verbs ontology image"],"pages":"219-226","url":"https:\/\/publications.cnr.it\/doc\/220270","volume":"","doi":"","editors_people":"Fellbaum C., Vossen P.","editors":["Fellbaum, C.","Vossen, P."],"published":"Proceedings of the 6th Global WordNet Conference (GWC2012)","publisher":"","issn":"","isbn":"978-80-263-0244-5","conference_name":"Global Wordnet Conference (GWC2012)","conference_place":"Matsue, Japan","conference_date":"9-13 January 2012"},{"id":131772,"last_updated":"2023-08-21 19:57:38","id_people":220778,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"A MWE Acquisition and Lexicon Builder Web Service","year":2012,"authors_people":"Quochi, Valeria; Frontini, Francesca; Rubino, Francesco","authors_cnr":["Frontini, Francesca","Rubino, Francesco","Quochi, Valeria"],"authors_cnr_id":["11893"],"authors_cnr_institute":["048","048","048"],"authors":["Quochi, V.","Frontini, F.","Rubino, F."],"abstract":"This paper describes the development of a web-service tool for the automatic extraction of Multi-word expressions lexicons, which has been integrated in a distributed platform for the automatic creation of linguistic resources. The main purpose of the work described is thus to provide a (computationally \"light\") tool that produces a full lexical resource: multi-word terms\/items with relevant and useful attached information that can be used for more complex processing tasks and applications (e.g. parsing, MT, IE, query expansion, etc.). The output of our tool is a MW lexicon formatted and encoded in XML according to the Lexical Mark-up Framework. The tool is already functional and available as a service. Evaluation experiments show that the tool precision is of about 80%.","keywords":["Multiword extraction","lexical resources","LMF","web services"],"pages":"2291-2306","url":"http:\/\/aclweb.org\/anthology\/C\/C12\/C12-1140.pdf","volume":"","doi":"","editors_people":"Martin Kay and Christian Boitet","editors":["Kay, M.","Boitet, C."],"published":"Proceedings of COLING 2012: Technical Papers","publisher":"Curran Associates (Red Hook, NY 12571, USA)","issn":"","isbn":"9781627483896","conference_name":"International Conference on Computational Linguistics (COLING)","conference_place":"Mumbai, India","conference_date":"December 2012"},{"id":131771,"last_updated":"2016-03-15 15:50:21","id_people":220773,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"Integrating NLP Tools in a Distributed Environment: A Case Study Chaining a Tagger with a Dependency Parser","year":2012,"authors_people":"Rubino, Francesco; Frontini, Francesca; Quochi, Valeria","authors_cnr":["Frontini, Francesca","Rubino, Francesco","Quochi, Valeria"],"authors_cnr_id":["11893"],"authors_cnr_institute":["048","048","048"],"authors":["Rubino, F.","Frontini, F.","Quochi, V."],"abstract":"The present paper tackles the issue of PoS tag conversion within the framework of a distributed web service platform for the automatic creation of language resources. PoS tagging is now considered a \"solved problem\"; yet, because of the differences in the tagsets, interchange of the various PoS taggers vailable is still hampered. In this paper we describe the implementation of a PoS-tagged-corpus converter, which is needed for chaining together in a workflow the FreeLing PoS tagger for Italian and the DESR dependency parser, given that these two tools have been developed independently. The conversion problems experienced during the implementation, related to the properties of the different tagsets and of tagset conversion in general, are discussed together with the solutions adopted. Finally, the converter is evaluated by assessing the impact of conversion on the performance of the dependency parser by comparing with the outcome of the native pipeline. From this we learn that in most cases parsing errors are due to actual tagging errors, and not to conversion itself. Besides, information on accuracy loss is an important feature in a distributed environment of (NLP) services, where users need to decide which services best suit their needs","keywords":["PoS tag conversion","interoperability","NLP pipelines"],"pages":"2125-2131","url":"http:\/\/www.lrec-conf.org\/proceedings\/lrec2012\/summaries\/726.html","volume":"","doi":"","editors_people":"Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Mehmet U?ur Do?an, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis","editors":["Calzolari, N.","Choukri, K.","Declerck, T.","Do\u011fan, M. U.","Maegaard, B.","Mariani, J.","Odijk, J.","Piperidis, S."],"published":"Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)","publisher":"European language resources association (ELRA) (Paris, FRA)","issn":"","isbn":"9782951740877","conference_name":"Language Resources and Evaluation Conference 2012","conference_place":"Istanbul, Turchia","conference_date":"23-25 Maggio 2012"},{"id":128369,"last_updated":"2013-05-31 09:33:10","id_people":220733,"institutes":["IIT","ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Web Language Identification Testing Tool","year":2012,"authors_people":"Abrate, Matteo [1]; Bacciu, Clara [1]; Frontini, Francesca [2]; Lapolla, Mariantonietta Noemi [1]; Marchetti, Andrea [1]; Monachini, Monica [2]","authors_cnr":["Abrate, Matteo","Bacciu, Clara","Frontini, Francesca","Marchetti, Andrea","Monachini, Monica"],"authors_cnr_id":["1738","8945"],"authors_cnr_institute":["044","044","048","044","048"],"authors":["Abrate, M.","Bacciu, C.","Frontini, F.","Lapolla, M. N.","Marchetti, A.","Monachini, M."],"abstract":"Nowadays a variety of tools for automatic language identification are available. Regardless of the approach used, at least two features can be identified as crucial to evaluate the performances of such tools: the precision of the presented results and the range of languages that can be detected. In this work we shall focus on a subtask of written language identification that is important to preserve and enhance multilinguality in the Web, i.e. detecting the language of a Web page given its URL. Most specifically, the final aim is to verify to which extent under-represented languages are recognized by available tools. The main specificity of Web Language Identification (WLI) lies in the fact that often an HTML page can provide interesting extralinguistic clues (URL domain name, metadata, encoding, etc) that can enhance accuracy. We shall first provide some data and statistics on the presence of languages on the web, secondly discuss existing practices and tools for language identification according to different metrics - for instance the approaches used and the number of supported languages - and finally make some proposals on how to improve current Web Language Identifiers. We shall also present a preliminary WLI service that builds on the Google Chromium Compact Language Detector; the WLI tool allows us to test the Google n-gram based algorithm against an adhoc gold standard of pages in various languages. The gold standard, based on a selection of Wikipedia projects, contains samples in languages for which no automatic recognition has been attempted; it can thus be used by specialists to develop and evaluate WLI systems.","keywords":["Multilingual Web"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/220733","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"The Multilingual Web-the Way Ahead","conference_place":"Luxembourg","conference_date":"15-16 March 2012"},{"id":129442,"last_updated":"2017-03-02 12:15:39","id_people":348940,"institutes":["IIT","ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Web Language Identification Testing Tool","year":2012,"authors_people":"F. Frontini, M. Monachini, M. N. LaPolla, A. Marchetti, M. Abrate, C. Bacciu","authors_cnr":["Abrate, Matteo","Bacciu, Clara","La Polla, Mariantonietta Noemi","Marchetti, Andrea","Monachini, Monica","Frontini, Francesca"],"authors_cnr_id":["1738","8945","15911"],"authors_cnr_institute":["044","044","044","044","048","048"],"authors":["Frontini, F.","Monachini, M.","Lapolla, M. N.","Marchetti, A.","Abrate, M.","Bacciu, C."],"abstract":"Nowadays a variety of tools for automatic language identification are available. Regardless of the approach used, at least two features can be identified as crucial to evaluate the performances of such tools: the precision of the presented results and the range of languages that can be detected. In this work we shall focus on a subtask of written language identification that is important to preserve and enhance multilinguality in the Web, i.e. detecting the language of a Web page given its URL. Most specifically, the final aim is to verify to which extent under-represented languages are recognized by available tools. The main specificity of Web Language Identification (WLI) lies in the fact that often an HTML page can provide interesting extralinguistic clues (URL domain name, metadata, encoding, etc) that can enhance accuracy. We shall first provide some data and statistics on the presence of languages on the web, secondly discuss existing practices and tools for language identification according to different metrics - for instance the approaches used and the number of supported languages - and finally make some proposals on how to improve current Web Language Identifiers. We shall also present a preliminary WLI service that builds on the Google Chromium Compact Language Detector; the WLI tool allows us to test the Google n-gram based algorithm against an ad-hoc gold standard of pages in various languages. The gold standard, based on a selection of Wikipedia projects, contains samples in languages for which no automatic recognition has been attempted; it can thus be used by specialists to develop and evaluate WLI systems.","keywords":["Language Identification Tools","Multilingual Web"],"pages":"1-1","url":"https:\/\/publications.cnr.it\/doc\/348940","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"W3C Workshop, Call for Participation: The Multilingual Web-The Way Ahead","conference_place":"Luxembourg","conference_date":"15-16\/03\/2012"},{"id":128383,"last_updated":"2013-06-03 13:58:00","id_people":221743,"institutes":["IIT","ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Specifiche architetturali e funzionali","year":2012,"authors_people":"Aliprandi, Carlo [1]; Bacciu, Clara [2]; Bartolini, Roberto [3]; Frontini, Francesca [3]; Lapolla, Noemi [2]; Marchetti, Andrea [2]; Piccinonno, Fulvio; Soru, Tiziana [1]","authors_cnr":["Bacciu, Clara","Frontini, Francesca","Marchetti, Andrea","Bartolini, Roberto"],"authors_cnr_id":["1738","10441"],"authors_cnr_institute":["044","048","044","048"],"authors":["Aliprandi, C.","Bacciu, C.","Bartolini, R.","Frontini, F.","Lapolla, N.","Marchetti, A.","Piccinonno, F.","Soru, T."],"abstract":"Questo documento contiene le specifiche funzionali ed architetturali del sistema GLOSS elaborate come risultato dell'obiettivo operativo 1. Tali specifiche debbono essere di riferimento per tutte le fasi di sviluppo dei vari componenti del sistema stesso e della loro integrazione in un prototipo dimostrativo. Ad una breve introduzione che richiama gli obiettivi generali del progetto, seguono: 1. La descrizione delle funzionalit\u00e0 suddivisa nelle varie fasi che compongono il flusso operativo di GLOSS. 2. La descrizione dell'architettura del sistema da realizzare nella quale si fornisce lo schema dell'integrazione dei vari componenti, il protocollo di comunicazione e memorizzazione dei dati che viene trattato pi\u00f9 nel dettaglio nel documento D1.2 GAF - Gloss Annotation Format, e la descrizione di ciascun componente del sistema. Per sua natura, questo documento sar\u00e0 soggetto a revisione durante tutto il periodo di sviluppo del sistema. Questa prima versione deve intendersi come guida per l'implementazione ed ha lo scopo di fornire a chi partecipa a questo progetto una visione generale delle funzionalit\u00e0 di GLOSS e come queste dovranno essere integrate nel prototipo dimostratore.","keywords":["GLOSS specifiche funzionali"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/221743","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131786,"last_updated":"2013-06-03 11:36:54","id_people":221582,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"D4. 5 Final Report on the Corpus Acquisition & Annotation subsystem and its components","year":2012,"authors_people":"Prokopidis, Prokopis [1]; Papavassiliou, Vassilis [1]; Toral, Antonio [2]; Poch Riera, Marc; Frontini, Francesca [3]; Rubino, Francesco [3]; Thurmair, Gregor","authors_cnr":["Frontini, Francesca","Rubino, Francesco"],"authors_cnr_id":[""],"authors_cnr_institute":["048","048"],"authors":["Prokopidis, P.","Papavassiliou, V.","Toral, A.","Poch Riera, M.","Frontini, F.","Rubino, F.","Thurmair, G."],"abstract":"PANACEA WP4 targets the creation of a Corpus Acquisition and Annotation (CAA) subsystem for the acquisition and processing of monolingual and bilingual language resources (LRs). The CAA subsystem consists of tools that have been integrated as web services in the PANACEA platform of LR production. D4.2 Initial functional prototype and documentation in T13 and D4.4 Report on the revised Corpus Acquisition & Annotation subsystem and its components in T23 provided initial and updated documentation on this subsystem, while this deliverable presents the final documentation of the subsystem as it evolved after the third development cycle of the project. The deliverable is structured as follows. The Corpus Acquisition Component (i.e. the Focused Monolingual and Bilingual Crawlers (FMC\/FBC)) is described in section 2. The final list of tools for corpus normalization (cleaning and de-duplication) is detailed in section 3. Section 4 provides documentation on all NLP tools included in the subsystem. Due to its nature, this deliverable aggregates considerable parts of all previous WP4 deliverables. The main new additions include a) new functionalities for, among others, crawling strategy, de-duplication, and detection of parallel document pairs; and b) new NLP tools for syntactic analysis, named entity recognition, tweet processing and anonymization.","keywords":["Corpus Acquisition"],"pages":"","url":"http:\/\/www.jotform.com\/uploads\/fabioaffeilc\/30222975566357\/225350067351490116\/PANACEA","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131787,"last_updated":"2020-09-12 01:47:14","id_people":221616,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"D7. 4 Third evaluation report. Evaluation of PANACEA v3 and produced resources","year":2012,"authors_people":"Quochi, Valeria; Frontini, Francesca; Bartolini, Roberto; Hamon, Olivier; Poch Riera, Marc; Padro, Muntsa; Bel, Nuria; Thurmair, Gregor; Toral, Antonio; Kamran, Amir","authors_cnr":["Frontini, Francesca","Bartolini, Roberto","Quochi, Valeria"],"authors_cnr_id":["10441","11893"],"authors_cnr_institute":[""],"authors":["Quochi, V.","Frontini, F.","Bartolini, R.","Hamon, O.","Poch Riera, M.","Padro, M.","Bel, N.","Thurmair, G.","Toral, A.","Kamran, A."],"abstract":"D7.4 reports on the evaluation of the different components integrated in the PANACEA third cycle of development as well as the final validation of the platform itself. All validation and evaluation experiments follow the evaluation criteria already described in D7.1. The main goal of WP7 tasks was to test the (technical) functionalities and capabilities of the middleware that allows the integration of the various resource-creation components into an interoperable distributed environment (WP3) and to evaluate the quality of the components developed in WP5 and WP6. The content of this deliverable is thus complementary to D8.2 and D8.3 that tackle advantages and usability in industrial scenarios. It has to be noted that the PANACEA third cycle of development addressed many components that are still under research. The main goal for this evaluation cycle thus is to assess the methods experimented with and their potentials for becoming actual production tools to be exploited outside research labs. For most of the technologies, an attempt was made to re-interpret standard evaluation measures, usually in terms of accuracy, precision and recall, as measures related to a reduction of costs (time and human resources) in the current practices based on the manual production of resources. In order to do so, the different tools had to be tuned and adapted to maximize precision and for some tools the possibility to offer confidence measures that could allow a separation of the resources that still needed manual revision has been attempted. Furthermore, the extension to other languages in addition to English, also a PANACEA objective, has been evaluated. The main facts about the evaluation results are now summarized.","keywords":["PANACEA","evaluation","machine translation"],"pages":"","url":"http:\/\/hdl.handle.net\/10230\/22533","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131788,"last_updated":"2015-02-24 18:27:38","id_people":221631,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"D6. 2 Integrated Final Version of the Components for Lexical Acquisition","year":2012,"authors_people":"Rimell, Laura; Bel, N\u00faria; Padr\u00f3, Muntsa; Frontini, Francesca; Monachini, Monica; Quochi, Valeria","authors_cnr":["Frontini, Francesca","Monachini, Monica","Quochi, Valeria"],"authors_cnr_id":["8945","11893"],"authors_cnr_institute":["048","048","048"],"authors":["Rimell, L.","Bel, N.","Padr\u00f3, M.","Frontini, F.","Monachini, M.","Quochi, V."],"abstract":"The PANACEA project has addressed one of the most critical bottlenecks that threaten the development of technologies to support multilingualism in Europe, and to process the huge quantity of multilingual data produced annually. Any attempt at automated language processing, particularly Machine Translation (MT), depends on the availability of language-specific resources. Such Language Resources (LR) contain information about the language's lexicon, i.e. the words of the language and the characteristics of their use. In Natural Language Processing (NLP), LRs contribute information about the syntactic and semantic behaviour of words - i.e. their grammar and their meaning - which inform downstream applications such as MT. To date, many LRs have been generated by hand, requiring significant manual labour from linguistic experts. However, proceeding manually, it is impossible to supply LRs for every possible pair of European languages, textual domain, and genre, which are needed by MT developers. Moreover, an LR for a given language can never be considered complete nor final because of the characteristics of natural language, which continually undergoes changes, especially spurred on by the emergence of new knowledge domains and new technologies. PANACEA has addressed this challenge by building a factory of LRs that progressively automates the stages involved in the acquisition, production, updating and maintenance of LRs required by MT systems. The existence of such a factory will significantly cut down the cost, time and human effort required to build LRs. WP6 has addressed the lexical acquisition component of the LR factory, that is, the techniques for automated extraction of key lexical information from texts, and the automatic collation of lexical information into LRs in a standardized format. The goal of WP6 has been to take existing techniques capable of acquiring syntactic and semantic information from corpus data, improving upon them, adapting and applying them to multiple languages, and turning them into powerful and flexible techniques capable of supporting massive applications. One focus for improving the scalability and portability of lexical acquisition techniques has been to extend exiting techniques with more powerful, less \"supervised\" methods. In NLP, the amount of supervision refers to the amount of manual annotation which must be applied to a text corpus before machine learning or other techniques are applied to the data to compile a lexicon. More manual annotation means more accurate training data, and thus a more accurate LR. However, given that it is impractical from a cost and time perspective to manually annotate the vast amounts of data required for multilingual MT across domains, it is important to develop techniques which can learn from corpora with less supervision. Less supervised methods are capable of supporting both large-scale acquisition and efficient domain adaptation, even in the domains where data is scarce. Another focus of lexical acquisition in PANACEA has been the need of LR users to tune the accuracy level of LRs. Some applications may require increased precision, or accuracy, where the application requires a high degree of confidence in the lexical information used. At other times a greater level of coverage may be required, with information about more words at the expense of some degree of accuracy. Lexical acquisition in PANACEA has investigated confidence thresholds for lexical acquisition to ensure that the ultimate users of LRs can generate lexical data from the PANACEA factory at the desired level of accuracy.","keywords":["Lexical Acquisition"],"pages":"","url":"http:\/\/www.panacea-lr.eu\/system\/deliverables\/PANACEA_D6.2.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131789,"last_updated":"2015-03-06 13:23:12","id_people":221650,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"D6. 5 Merged dictionaries","year":2012,"authors_people":"Rimell, Laura; Bel, N\u00faria; Padr\u00f3, Muntsa; Frontini, Francesca; Monachini, Monica; Quochi, Valeria; Del Gratta, Riccardo","authors_cnr":["Frontini, Francesca","Monachini, Monica","Quochi, Valeria","Del Gratta, Riccardo"],"authors_cnr_id":["8945","11893","11933"],"authors_cnr_institute":["048","048","048","048"],"authors":["Rimell, L.","Bel, N.","Padr\u00f3, M.","Frontini, F.","Monachini, M.","Quochi, V.","Del Gratta, R."],"abstract":"This document presents the merged dictionaries delivered in PANACEA. Those dictionaries result from merging already existing lexica, generally for general domain, with domain specific lexica acquired using PANACEA platform. The domain specific lexica are presented and delivered in D6.3 and the merging repository that allowed the multilevel merging in D6.4.","keywords":["merged dictionaries","computational lexicon"],"pages":"","url":"http:\/\/www.panacea-lr.eu\/\/en\/deliverables\/list","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131794,"last_updated":"2015-02-24 18:30:12","id_people":221755,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"D6. 3 Monolingual lexica for English, Spanish and Italian tuned for a particular domain (LAB and ENV)","year":2012,"authors_people":"Rimell, Laura; Bel, Nuria; Padr\u00f2, Muntsa; Frontini, Francesca; Monachini, Monica; Quochi, Valeria; Del Gratta, Riccardo","authors_cnr":["Frontini, Francesca","Monachini, Monica","Quochi, Valeria","Del Gratta, Riccardo"],"authors_cnr_id":["8945","11893","11933"],"authors_cnr_institute":["048","048","048","048"],"authors":["Rimell, L.","Bel, N.","Padr\u00f2, M.","Frontini, F.","Monachini, M.","Quochi, V.","Del Gratta, R."],"abstract":"This document presents the lexica acquired using PANACEA platform for Labour and Environment domains. The languages of the lexica are English, Spanish and Italian. The lexical information acquired depends on the language, according to the available tools in the platform.","keywords":["Lexicon Acqusition"],"pages":"","url":"http:\/\/www.panacea-lr.eu\/system\/deliverables\/PANACEA_D6.3.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131684,"last_updated":"2013-02-26 13:08:54","id_people":205564,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"The Language Library: Many Layers, More Knowledge","year":2011,"authors_people":"Nicoletta Calzolari, Riccardo Del Gratta, Francesca Frontini, Irene Russo","authors_cnr":["Russo, Irene","Frontini, Francesca","Del Gratta, Riccardo"],"authors_cnr_id":["11933"],"authors_cnr_institute":["048","048","048"],"authors":["Calzolari, N.","Del Gratta, R.","Frontini, F.","Russo, I."],"abstract":"In this paper we outline the general concept of the Language Library, a new initiative that has the purpose of building a huge archive of structured colletion of linguistic information. The Language Library is conceived as a community built repository and as an environment that allows language specialists to share multidimensional and multi-level annotated\/processed resources. The first steps towards its implementation are briefly sketched.","keywords":["Language Resources","Language Library"],"pages":"93-97","url":"https:\/\/publications.cnr.it\/doc\/205564","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-974-466-564-5","conference_name":"Workshop on Language Resources, Technology and Services in the Sharing Paradigm","conference_place":"Chiang Mai","conference_date":"12 Novembre 2011"},{"id":131685,"last_updated":"2013-02-26 15:04:30","id_people":205601,"institutes":["ILC"],"type":"conference_article","type_order":5,"type_people":"conferenceObject","title":"A Metadata Schema for the Description ofLanguage Resources (LRs)","year":2011,"authors_people":"Frontini Francesca, Monachini Monica, Gavrilidou Maria, Labropoulou Penny, Piperidis Stelios, Francopoulo Gil, Arranz Victoria, Mapelli Valerie","authors_cnr":["Frontini, Francesca","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048"],"authors":["Frontini, F.","Monachini, M.","Gavrilidou, M.","Labropoulou, P.","Piperidis, S.","Francopoulo, G.","Arranz, V.","Mapelli, V."],"abstract":"This paper presents the metadata schema for describing language resources (LRs) currently under development for the needs of META-SHARE, an open distributed facility for the exchange and sharing of LRs. An essential ingredient in its setup is the existence of formal and standardized LR descriptions, cornerstone of the interoperability layer of any such initiative. The description of LRs is granular and abstractive, combining the taxonomy of LRs with an inventory of a structured set of descriptive elements, of which only a minimal subset is obligatory; the schema additionally proposes recommended and optional elements. Moreover, the schema includes a set of relations catering for the appropriate inter-linking of resources. The current paper presents the main principles and features of the metadata schema, focusing on the description of text corpora and lexical \/ conceptual resources.","keywords":["metadata","language resources"],"pages":"84-92","url":"https:\/\/publications.cnr.it\/doc\/205601","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"978-974-466-564-5","conference_name":"Workshop on Language Resources, Technology and Services in the Sharing Paradigm","conference_place":"Chiang Mai","conference_date":"12 Novembre 2011"},{"id":131690,"last_updated":"2015-02-26 12:26:05","id_people":205738,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Towards interfacing lexical and ontological resources","year":2011,"authors_people":"Francesca Frontini, Monica Monachini","authors_cnr":["Frontini, Francesca","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048"],"authors":["Frontini, F.","Monachini, M."],"abstract":"During the last two decades, the Computational Linguistics community has dedicated considerable effort to the research and development Lexical Resources (LRs), especially Computational Lexicons. These LRs, even though belonging to different linguistic approaches and theories, share a common element; all of them contain, explicitly or implicitly, an ontology as the means of organizing their structure.","keywords":["language resources","ontologies"],"pages":"26","url":"https:\/\/publications.cnr.it\/doc\/205738","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"ONTOLOGIES AND LEXICAL SEMANTICS","conference_place":"Roma","conference_date":"01 Ottobre 2011"},{"id":131982,"last_updated":"2015-02-26 13:23:59","id_people":290606,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"The FLaReNet Databook","year":2011,"authors_people":"Arranz V., Bel N., Budin G., Caselli T., Choukri K., Del Gratta R., Frontini F., Goggi S., Monachini M., Quochi V., Rubino F., Russo I. et alii","authors_cnr":["Del Gratta, Riccardo","Caselli, Tommaso","Russo, Irene","Frontini, Francesca","Rubino, Francesco","Monachini, Monica","Goggi, Sara","Quochi, Valeria"],"authors_cnr_id":["8945","10172","11893"],"authors_cnr_institute":["048","048","048","048","048","048","048","048"],"authors":["Arranz, V.","Bel, N.","Budin, G.","Caselli, T.","Choukri, K.","Del Gratta, R.","Frontini, F.","Goggi, S.","Monachini, M.","Quochi, V.","Rubino, F.","Russo, I."],"abstract":"The FLaReNet Databook is not only the collection of all the factual material collected during the activities of the project, but also a set on innovative initiatives and instruments that will remain in place for the continuous collection of such \"facts\". The purpose of the Databook is in fact, on one side, to consolidate the analyses carried out in the project and, at the same time, to set up the proper mechanisms that will enable the provision of a continuous stream of relevant factual material, also after the end of the project.","keywords":["Language Resources (LRs)"],"pages":"1-8","url":"http:\/\/www.flarenet.eu\/?q=FLaReNet_Databook","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131709,"last_updated":"2015-02-26 12:27:30","id_people":206406,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"Documentation and User Manual of the META-SHARE Metadata Model","year":2011,"authors_people":"Elina Desipri, Maria Gavrilidou, Penny Labropoulou, Stelios Piperidis, Francesca Frontini, Monica Monachini, Victoria Arranz, Val\u00e9rie Mapelli, Gil Francopoulo, Thierry Declerck","authors_cnr":["Frontini, Francesca","Monachini, Monica"],"authors_cnr_id":["8945"],"authors_cnr_institute":["048","048"],"authors":["Desipri, E.","Gavrilidou, M.","Labropoulou, P.","Piperidis, S.","Frontini, F.","Monachini, M.","Arranz, V.","Mapelli, V.","Francopoulo, G.","Declerck, T."],"abstract":"The current deliverable presents the META-SHARE metadata schema v1.0, as implemented in the META-SHARE XSD's v1.0 released to (META-NET and PSP partners) in July 2011 for text corpora and lexical\/conceptual resources and its supplement for audio corpora, tools and language descriptions (simplified\/refactored version) as implemented in November. It is meant to act as a user manual, providing explanations on the model contents for LRs providers and LRs curators that wish to describe their resources in accordance to it. Work on the schema is ongoing and changes\/updates to the model are constantly being made; where appropriate, some changes that are already under way are documented in this deliverable.","keywords":["Language resources","metadata","standards"],"pages":"150","url":"https:\/\/publications.cnr.it\/doc\/206406","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131713,"last_updated":"2015-02-26 12:19:24","id_people":206457,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"KYOTO-LMF WordNet Representation Format","year":2011,"authors_people":"Monica Monachini, Francesca Frontini, Claudia Soria","authors_cnr":["Frontini, Francesca","Monachini, Monica","Soria, Claudia"],"authors_cnr_id":["8945","9887"],"authors_cnr_institute":["048","048","048"],"authors":["Monachini, M.","Frontini, F.","Soria, C."],"abstract":"The format described in the following pages is the final revised proposal for representing wordnets inside the Kyoto project (henceforth \"Kyoto-LMF wordnet format\"). The reference model is Lexical Markup Framework (LMF), version 16, probably one of the most widely recognized standards for the representation of NLP lexicons. The goals of LMF are to provide a common model for the creation and use of such lexical resources, to manage the exchange of data between and among them, and to enable the merging of a large number of individual resources to form extensive global electronic respurces. LMF was specifically designed to accomodate as many models of lexical representations as possible. Purposefully, it is designed as a mea-model, i.e a high-level specification for lexical resources defining the structural constraints of a lexicon.","keywords":["Wordnets","LMF","ISO","Representation formats","standards"],"pages":"32","url":"https:\/\/publications.cnr.it\/doc\/206457","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132045,"last_updated":"2015-03-18 15:39:28","id_people":327309,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"WP-4. 4: Report on the revised Corpus Acquisition & Annotation subsystem and its components","year":2011,"authors_people":"Prokopis Prokopidis, Vassilis Papavassiliou, Antonio Toral, Marc Poch Riera, Francesca Frontini, Francesco Rubino, Gregor Thurmair","authors_cnr":["Frontini, Francesca","Rubino, Francesco"],"authors_cnr_id":[""],"authors_cnr_institute":["048","048"],"authors":["Prokopidis, P.","Papavassiliou, V.","Toral, A.","Riera, M. P.","Frontini, F.","Rubino, F.","Thurmair, G."],"abstract":"","keywords":["corpus acquisition","corpus annotation"],"pages":"","url":"http:\/\/www.panacea-lr.eu\/system\/deliverables\/PANACEA_D4.4.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":132046,"last_updated":"2015-03-18 15:44:12","id_people":327310,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"WP-4. 5: Final Report on the Corpus Acquisition & Annotation subsystem and its components","year":2011,"authors_people":"Prokopis Prokopidis, Vassilis Papavassiliou, Antonio Toral, Marc Poch Riera, Francesca Frontini, Francesco Rubino, Gregor Thurmair","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":[""],"authors_cnr_institute":["048"],"authors":["Prokopidis, P.","Papavassiliou, V.","Toral, A.","Riera, M. P.","Frontini, F.","Rubino, F.","Thurmair, G."],"abstract":"","keywords":["corpus acquisition","corpus annotation"],"pages":"","url":"http:\/\/www.panacea-lr.eu\/system\/deliverables\/PANACEA_D4.5.pdf","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131705,"last_updated":"2015-02-26 12:33:01","id_people":206329,"institutes":["ILC"],"type":"technical_report","type_order":8,"type_people":"report","title":"KyotoCore: integrated system for knowledge mining from text","year":2011,"authors_people":"Piek Vossen, Wauter Bosma, German Rigau, Eneko Agirre, Aitor Soroa, Carlo Aliprandi, Joost de Jonge, Feikje Hielkema, Monica Monachini, Roberto Bartolini, Francesca Frontini","authors_cnr":["Frontini, Francesca","Monachini, Monica","Bartolini, Roberto"],"authors_cnr_id":["8945","10441"],"authors_cnr_institute":["048","048","048"],"authors":["Vossen, P.","Bosma, W.","Rigau, G.","Agirre, E.","Soroa, A.","Aliprandi, C.","De Jonge, J.","Hielkema, F.","Monachini, M.","Bartolini, R.","Frontini, F."],"abstract":"In this deliverable, we describe KyotoCore, an integrated system for applying text mining. We describe the software architecture of KyotoCore, the single modules and the process flows. Finally, we describe a use case where we apply the complete process toan English database on estuaries.","keywords":["Knowledge and text mining software"],"pages":"56","url":"https:\/\/publications.cnr.it\/doc\/206329","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131384,"last_updated":"2015-02-20 19:05:29","id_people":136473,"institutes":["ILC"],"type":"book_chapter","type_order":2,"type_people":"bookPart","title":"From Pattern Dictionary to Patternbank","year":2010,"authors_people":"Jezek E.; Frontini F.","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":[""],"authors_cnr_institute":[""],"authors":["Jezek, E.","Frontini, F."],"abstract":"","keywords":["Ontology. Computational Semantics"],"pages":"215-237","url":"https:\/\/publications.cnr.it\/doc\/136473","volume":"","doi":"","editors_people":"Gilles-Maurice de Schryver","editors":["De Schryver, G."],"published":"A Way with Words: Recent Advances in Lexical Theory and Analysis","publisher":"","issn":"","isbn":"","conference_name":"","conference_place":"","conference_date":""},{"id":131316,"last_updated":"1970-01-01 01:00:00","id_people":112965,"institutes":["ILC"],"type":"conference_misc","type_order":6,"type_people":"conferenceObject","title":"Statistical profiling of Italian L2 texts: competence and native language","year":2010,"authors_people":"Frontini F.","authors_cnr":["Frontini, Francesca"],"authors_cnr_id":[""],"authors_cnr_institute":[""],"authors":["Frontini, F."],"abstract":"","keywords":["Text categorization"],"pages":"","url":"https:\/\/publications.cnr.it\/doc\/112965","volume":"","doi":"","editors_people":"","editors":[""],"published":"","publisher":"","issn":"","isbn":"","conference_name":"20th Annual Conference of the European Second Language Association","conference_place":"Reggio Emilia","conference_date":"2010"}]