@INCOLLECTION{CALAMAI_2022_INCOLLECTION_CPPCSM_467178, AUTHOR = {Calamai, S. and Piccardi, D. and Pretto, N. and Candeo, G. and Stamuli, M. F. and Monachini, M.}, TITLE = {Not Just Paper: Enhancement of Archive Cultural Heritage}, YEAR = {2022}, ABSTRACT = {Oral archives and digital technologies have gone hand-in-hand for a very long time. Both sides benefit from this interdisciplinary junction: technology enhances the preservation and diffusion of oral materials, while exploiting them to develop cutting-edge tools for their treatment. This chapter deals with an Italian instantiation of this mutual relationship: the Archivio Vi.Vo. project. Offering innovative solutions concerning metadata, audio restoration, description, and access, Archivio Vi.Vo. aims to build an online platform to host the oral archives from Tuscany. The project is powered by CLARIN-IT, which guarantees its compliance with standards and offers resources for data access and discoverability. Archivio Vi.Vo. has not been built from scratch: it is instead a cross-fertilization of previous initiatives and research projects (e.g., the Gra.fo project). Moreover, the chapter presents the related, contemporary work of a multidisciplinary group striving to synthesize a Vademecum for future generations of oral archive researchers. Lastly, a brief list of tentative ideas for future developments of the Archivio Vi.Vo. platform will be presented.}, KEYWORDS = {digital oral archives, research infrastructures, models for digital preservation}, URL = {https://www.degruyter.com/document/isbn/9783110767377/html}, VOLUME = {1}, DOI = {10.1515_9783110767377-025}, PUBLISHER = {Walter De Gruyter \& Co (Berlin, DEU)}, ISBN = {9783110767377}, BOOKTITLE = {CLARIN The Infrastructure for Language Resources}, EDITOR = {Fišer, D. and Witt, A.}, } @INCOLLECTION{DEJONG_2022_INCOLLECTION_DM_472304, AUTHOR = {De Jong, F. and Monachini, M.}, TITLE = {Introduction. Selected papers from the CLARIN Annual Conference 2021}, YEAR = {2022}, ABSTRACT = {CLARIN, the Common Language Resources and Technology Infrastructure, is a virtual platform that is accessible for everyone interested in language. CLARIN offers access to language resources, technology, and knowledge, and enables cross-country collaboration among academia, industry, policy-makers, cultural institutions, and the general public. Researchers, students, and citizens are offered access to digital language resources and technology services to deploy, connect, analyse and sustain such resources. Inline with the Open Science agenda, CLARIN enables scholars from the Social Sciences and Humanities(SSH) and beyond to engage in and contribute to cutting-edge, data-driven research based on language data in a range of formats and modalities.}, KEYWORDS = {Language Resource Infrastructure}, PAGES = {i-v}, URL = {https://publications.cnr.it/doc/472304}, VOLUME = {189}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, BOOKTITLE = {SELECTED PAPERS FROM THE CLARIN ANNUAL CONFERENCE 2021}, EDITOR = {Monachini, M. and Eskevich, M.}, } @INCOLLECTION{DELFANTE_2022_INCOLLECTION_DFMQ_469112, AUTHOR = {Del Fante, D. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {Italian Language Resources. From CLARIN-IT to the VLO and Back: Sketching a Methodology for Monitoring LRs Visibility}, YEAR = {2022}, ABSTRACT = {This paper sketches a user-oriented, qualitative methodology for both (i) monitoring the existence and availability of language resources relevant for a given CLARIN national community and language and (ii) assessing the offering potential of CLARIN, in terms of Language Resources provided to national consortia. From the user perspective, the methodology has been applied to investigate the visibility of language resources available for Italian within the CLARIN central services, in particular the Virtual Language Observatory. As a proof-of-concept, the methodology has been tested on the resources available through the CLARIN-IT data centres, but, ideally, it could be applied by any national data centre aiming to assess the existence of LRs in CLARIN for any given languages and check their accessibility for the interested users. It is thus argued that such an assessment might be a useful instrument in the hands of national coordinators and centre managers for (i) bringing to the fore both strengths and critical issues about their data providing community and (ii) for planning targeted actions to improve and increase both visibility and accessibility of their LRs.}, KEYWORDS = {Virtual Language Observatory, CLARIN-IT, CLARIN-ERIC, Qualitative Assessment Methodology, User Involvement}, PAGES = {10-22}, URL = {https://ecp.ep.liu.se/index.php/clarin/article/view/413/371}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, BOOKTITLE = {Selected Papers from the CLARIN Annual Conference 2021}, EDITOR = {Monachini and Monica and Eskevich and Maria}, } @INCOLLECTION{VAGIONAKIS_2022_INCOLLECTION_VDBBDMM_472291, AUTHOR = {Vagionakis, I. and Del Gratta, R. and Boschetti, F. and Baroni, P. and Del Grosso, A. M. and Mancinelli, T. and Monachini, M.}, TITLE = {'Cretan Institutional Inscriptions' Meets CLARIN-IT}, YEAR = {2022}, ABSTRACT = {This paper presents 'Cretan Institutional Inscriptions', a resource in the domain of Digital Epigraphy developed at the Ca' Foscari University of Venice and supported by CLARIN-IT as part of its actions addressed to initiatives, projects and events in the field of Social Sciences and Humanities. The paper begins with a brief outline of the project within which the resource was created and then goes into a more in-depth description of the main methodologies used to develop the resource (EpiDoc and EFES) and of their benefits. The paper then focuses on the cooperation of the project with the Venice Centre of Digital and Public Humanities and the Italian node of CLARIN, also illustrating the dockerization process applied to the resource hosted on the CLARIN-IT servers. Some desiderata for future developments are outlined as well. The paper ends with some remarks about the widening of CLARIN horizons towards Digital Epigraphy and on the role of its K-Centres in this respect.}, KEYWORDS = {Digital Epigraphy, Digital Classics, Ancient Greek, Crete, Institutions, Text Encoding Initiative, TEI, EpiDoc, EpiDoc Front-End Services, EFES, Virtual Language Observatory, Dockerization, ILC4CLARIN, CLARIN-IT, CLARIN}, PAGES = {139-150}, URL = {https://ecp.ep.liu.se/index.php/clarin/article/view/424/382}, VOLUME = {189}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, BOOKTITLE = {Selected Papers from the CLARIN Annual Conference 2021}, EDITOR = {Monachini, M. and Eskevich, M.}, } @EDITORIAL{MONACHINI_2022_EDITORIAL_ME_472302, AUTHOR = {Monachini, M. and Eskevich, M.}, TITLE = {Selected Papers from the CLARIN Annual Conference 2021}, YEAR = {2022}, ABSTRACT = {This volume presents the highlights of the 10th CLARIN Annual Conference 2021. The conference was held on 27th --29th September 2021 and because of the COVID-19 pandemic, for the second year in row a virtual format had te be adopted. CLARIN, the Common Language Resources and Technology Infrastructure, is a virtual platform that is accessible for everyone interested in language. CLARIN offers access to language resources, technology, and knowledge, and enables cross-country collaboration among academia, industry, policy-makers, cultural institutions, and the general public. Researchers, students, and citizens are offered access to digital language resources and technology services to deploy, connect, analyse and sustain such resources. In line with the Open Science agenda, CLARIN enables scholars from the Social Sciences and Humanities (SSH) and beyond to engage in and contribute to cutting-edge, data-driven research based on language data in a range of formats and modalities.}, KEYWORDS = {Language Resource Infrastructure}, PAGES = {1-212}, URL = {https://publications.cnr.it/doc/472302}, VOLUME = {189}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, } @INPROCEEDINGS{DELFANTE_2022_INPROCEEDINGS_DFMQ_468964, AUTHOR = {Del Fante, D. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {CLARIN-IT: An Overview on the Italian Clarin Consortium After Six Years of Activity}, YEAR = {2022}, ABSTRACT = {This paper offers an overview of the Italian CLARIN consortium after six years since its establishment. The members, the centres and the repositories and the most important collections are described. Lastly, in order to showcase the visibility and the accessiblity of Language Resources provided by CLARIN-IT from a user-perspective, we show how Italian resources are findable within CLARIN ERI}, KEYWORDS = {Language Resources, Data Repositories and Archives, Research Infrastructures, CLARIN}, PAGES = {8}, URL = {http://ceur-ws.org/Vol-3160/short21.pdf}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Italian Research Conference on Digital Libraries}, CONFERENCE_PLACE = {Università degli Studi di Padova}, CONFERENCE_DATE = {24/02/2022}, BOOKTITLE = {Proceedings of the 18th Italian Research Conference on Digital Libraries}, EDITOR = {Di Nunzio, G. M. and Portelli, B. and Redavid, D. and Silvello, G.}, } @INPROCEEDINGS{GAMBA_2022_INPROCEEDINGS_GFBM_472292, AUTHOR = {Gamba, F. and Frontini, F. and Broeder, D. and Monachini, M.}, TITLE = {Language Technologies for the Creation of Multilingual Terminologies. Lessons Learned from the SSHOC Project}, YEAR = {2022}, ABSTRACT = {This paper is framed in the context of the SSHOC project and aims at exploring how Language Technologies can help in promoting and facilitating multilingualism in the Social Sciences and Humanities (SSH). Although most SSH researchers produce culturally and societally relevant work in their local languages, metadata and vocabularies used in the SSH domain to describe and index research data are currently mostly in English. We thus investigate Natural Language Processing and Machine Translation approaches in view of providing resources and tools to foster multilingual access and discovery to SSH content across different languages. As case studies, we create and deliver as freely, openly available data a set of multilingual metadata concepts and an automatically extracted multilingual Data Stewardship terminology. The two case studies allow as well to evaluate performances of state-of-the-art tools and to derive a set of recommendations as to how best apply them. Although not adapted to the specific domain, the employed tools prove to be a valid asset to translation tasks. Nonetheless, validation of results by domain experts proficient in the language is an unavoidable phase of the whole workflow.}, KEYWORDS = {Multilingual terminologies, data curation, language resource infrastructures}, PAGES = {154-163}, URL = {https://aclanthology.org/2022.lrec-1.17}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, CONFERENCE_NAME = {13th Conference on Language Resources and Evaluation (LREC 2022)}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {22/06/2022-24/06/2022}, } @TECHREPORT{TASOVAC_2022_TECHREPORT_TTBBBCUFHHJKKKKMMMMMQRRSSVWWZ_463877, AUTHOR = {Tasovac, T. and Tiberius, C. and Bamberg, C. and Bellandi, A. and Burch, T. and Costa, R. and Ďurčo, M. and Frontini, F. and Hennemann, J. and Heylen, K. and Jakubíček, M. and Khan, F. and Klee, A. and Kosem, I. and Kovář, V. and Matuška, O. and McCrae, J. and Monachini, M. and Mörth, K. and Munda, T. and Quochi, V. and Repar, A. and Roche, C. and Salgado, A. and Sievers, H. and Váradi, T. and Weyand, S. and Woldrich, A. and Zhanial, S.}, TITLE = {D5. 3 Overview of Online Tutorials and Instruction Manuals}, YEAR = {2022}, ABSTRACT = {The ELEXIS Curriculum is an integrated set of training materials which contextualizes ELEXIS tools and services inside a broader, systematic pedagogic narrative. This means that the goal of the ELEXIS Curriculum is not simply to inform users about the functionalities of particular tools and services developed within the project, but to show how such tools and services are a) embedded in both lexicographic theory and practice; and b) representative of and contributing to the development of digital skills among lexicographers. The scope and rationale of the curriculum are described in more detail in the Deliverable D5.2 Guidelines for Producing ELEXIS Tutorials and Instruction Manuals. The goal of this deliverable, as stated in the project DOW, is to provide "a clear, structured overview of tutorials and instruction manuals developed within the project."}, KEYWORDS = {ELEXIS, lexicography, training materials}, PAGES = {31}, URL = {https://elex.is/wp-content/uploads/ELEXIS_D5_3_Overview-of-Online-Tutorials-and-Instruction-Manuals.pdf}, } @MISC{FRONTINI_2022_MISC_FBQMMZUW_463506, AUTHOR = {Frontini, F. and Bellandi, A. and Quochi, V. and Monachini, M. and Mörth, K. and Zhanial, S. and Ďurčo, M. and Woldrich, A.}, TITLE = {CLARIN Tools and Resources for Lexicographic Work}, YEAR = {2022}, ABSTRACT = {This course introduces lexicographers to the CLARIN Research Infrastructure and highlights language resources and tools useful for lexicographic practices. The course consists of two parts. In Part 1, you will learn about CLARIN, its technical and knowledge infrastructure, and about how to deposit and find lexical resources in CLARIN. In Part 2, you will become acquainted with CLARIN tools that can be used to create lexical resources.}, KEYWORDS = {CLARIN, lexicography}, URL = {https://elexis.humanistika.org/id/UnwYPq70Dewbn7XDEjsMM}, } @MISC{MARTELLI_2022_MISC_MNKKGKNPOLKKDUSLVGLQMFTTCSIM_472295, AUTHOR = {Martelli, F. and Navigli, R. and Krek, S. and Kallas, J. and Gantar, P. and Koeva, S. and Nimb, S. and Pedersen, B. S. and Olsen, S. and Langemets, M. and Koppel, K. and Üksik, T. and Dobrovoljc, K. and Ureña Ruiz, R. and Sancho Sánchez, J. and Lipp, V. and Váradi, T. and Győrffy, A. and László, S. and Quochi, V. and Monachini, M. and Frontini, F. and Tiberius, C. and Tempelaars, R. and Costa, R. and Salgado, A. and Čibej, J. and Munda, T.}, TITLE = {Parallel sense-annotated corpus ELEXIS-WSD 1. 0}, YEAR = {2022}, ABSTRACT = {ELEXIS-WSD is a parallel sense-annotated corpus in which content words (nouns, adjectives, verbs, and adverbs) have been assigned senses. Version 1.0 contains sentences for 10 languages: Bulgarian, Danish, English, Spanish, Estonian, Hungarian, Italian, Dutch, Portuguese, and Slovene. The corpus was compiled by automatically extracting a set of sentences from WikiMatrix (Schwenk et al., 2019), a large open-access collection of parallel sentences derived from Wikipedia, using an automatic approach based on multilingual sentence embeddings. The sentences were manually validated according to specific formal, lexical and semantic criteria (e.g. by removing incorrect punctuation, morphological errors, notes in square brackets and etymological information typically provided in Wikipedia pages). To obtain a satisfying semantic coverage, we filtered out sentences with less than 5 words and less than 2 polysemous words were filtered out. Subsequently, in order to obtain datasets in the other nine target languages, for each selected sentence in English, the corresponding WikiMatrix translation into each of the other languages was retrieved. If no translation was available, the English sentence was translated manually. The resulting corpus is comprised of 2,024 sentences for each language.}, KEYWORDS = {Word Sense Disambiguation, corpus parallelo, disambiguazione automatica del senso, annotazione semantica multilingue}, URL = {http://hdl.handle.net/11356/1674}, } @ARTICLE{MONACHINI_2021_ARTICLE_MSCPB_466817, AUTHOR = {Monachini, M. and Stamuli, M. F. and Calamai, S. and Pretto, N. and Bianchi, S.}, TITLE = {The Grey-side of Audio Archives}, YEAR = {2021}, ABSTRACT = {Archives often include documents that can hardly be considered publications or grey literature as such, yet they maintain their documentary value and play a role of primary sources for the specialists. These documents, indeed, can help archivists to reveal the sedimentation process of the archive itself and to preserve the authentic context of the documentary production. They also appear to be very useful for the community of researchers and scholars. This happens more frequently with oral archives which include 'non-conventional sources', thus bringing together audio documents, fieldworks notes, correspondence, slipcases, analogic compact cassettes or open reels. At the cross-road of two disciplines, Archival Science and Grey Literature, this paper aims to argue the applicability of the concept of grey literature to this wide range of documentary materials, by showing the experience of Archivio Vi.Vo, a regional project aiming at building a model for archiving, preserving, managing and disseminating audio documents.}, KEYWORDS = {oral archives, infrastructures}, PAGES = {34-37}, URL = {https://publications.cnr.it/doc/466817}, VOLUME = {22}, PUBLISHER = {TransAtlantic (Amsterdam, Paesi Bassi)}, ISSN = {1386-2316}, JOURNAL = {The GL-conference series. Conference proceedings}, } @EDITORIAL{MONACHINI_2021_EDITORIAL_ME_472301, AUTHOR = {Monachini, M. and Eskevich, M.}, TITLE = {CLARIN Annual Conference Proceedings}, YEAR = {2021}, ABSTRACT = {CLARIN2021 is organised for the wider Humanities and Social Sciences communities in order to exchange ideas and experiences within the CLARIN infrastructure. This includes the design, construction and operation of the CLARIN infrastructure, the data, tools and services that it contains or for which there is a need, its actual use by researchers, its relation to other infrastructures and projects, and the CLARIN Knowledge Sharing Infrastructure.}, KEYWORDS = {Language Resource Infrastrucuture}, PAGES = {1-178}, URL = {https://publications.cnr.it/doc/472301}, } @INPROCEEDINGS{CALAMAI_2021_INPROCEEDINGS_CPSPCBM_466824, AUTHOR = {Calamai, S. and Pretto, N. and Stamuli, M. F. and Piccardi, D. and Candeo, G. and Bianchi, S. and Monachini, M.}, TITLE = {COMMUNITY-BASED SURVEY AND ORAL ARCHIVE INFRASTRUCTURE IN THE ARCHIVIO VI. VO. PROJECT}, YEAR = {2021}, ABSTRACT = {Audio and audiovisual archives are at the crossroads of different fields of knowledge, yet they require common solutions for both their long-term preservation and their description, availability, use and reuse. Archivio Vi.Vo. is an Italian project financed by the Tuscany Region, aiming to (i) explore methods for long-term preservation and secure access to oral sources and (ii) develop an infrastructure under the CLARIN-IT umbrella offering several services for scholars from different domains interested in oral sources. This paper describes the project's infrastructure and its methodology through a case study on the Caterina Bueno's audio archive.}, KEYWORDS = {inglese}, URL = {https://publications.cnr.it/doc/466824}, VOLUME = {180}, DOI = {10.3384/ecp180}, ISBN = {978-91-7929-609-4}, CONFERENCE_NAME = {CLARIN2020 Annual Conference}, CONFERENCE_PLACE = {virtual}, CONFERENCE_DATE = {5/10/2020-7/10/2020}, BOOKTITLE = {SELECTED PAPERS FROM THE CLARIN ANNUAL CONFERENCE 2020}, EDITOR = {Navarretta, C. and Eskevich, M.}, } @INPROCEEDINGS{DIDONATO_2021_INPROCEEDINGS_DDMP_461475, AUTHOR = {Di Donato, F. and Dumouchel, S. and Monachini, M. and Pohle, S.}, TITLE = {The discovery platform GOTRIPLE: An EOSC service for social sciences and humanities research}, YEAR = {2021}, ABSTRACT = {In this paper we present TRIPLE - Transforming Research through Innovative Practices for Linked Interdisciplinary Exploration - an on-going project funded as part of the European Horizon 2020 programme INFRAEOSC-02-2019 "Prototyping new innovative services" (2019-2023). The project's main objective is to develop a multilingual and multicultural discovery solution for the social sciences and humanities (SSH), which will provide a single access point that allows users to explore, find, access and reuse materials such as literature, data, projects and researcher profiles at European scale. The paper first provides an overview of TRIPLE's main goals and impacts. It then describes the methodology adopted for the design and development of the project platform, GOTRIPLE. Finally, it contextualises the project within the European research landscape, and more specifically in the European Open Science Cloud (EOSC) ecosystem. In the conclusion, some current challenges and open issues are presented.}, KEYWORDS = {EOSC, Open Science, scholarly communication, discoverability, OPERAS, TRIPLE}, PAGES = {31-38}, URL = {http://amsacta.unibo.it/6712/1/AIUCD2021_BOA-versione3A.pdf}, DOI = {10.6092/unibo/amsacta/6712}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale}, CONFERENCE_PLACE = {on-line}, CONFERENCE_DATE = {19-22/01/2021}, BOOKTITLE = {AIUCD 2021-Book of Extended Abstracts. p. 624}, EDITOR = {Boschetti, F. and Del Grosso, A. M. and Salvatori, E.}, } @INPROCEEDINGS{MARTELLI_2021_INPROCEEDINGS_MNKTKGKNPOLKKDUSLVGLQMFTCSIM_461705, AUTHOR = {Martelli, F. and Navigli, R. and Krek, S. and Tiberius, C. and Kallas, J. and Gantar, P. and Koeva, S. and Nimb, S. and Pedersen, B. S. and Olsen, S. and Langements, M. and Koppel, K. and Üksik, T. and Dobrovolijc, K. and Ureña Ruiz, R. and Sanchosánchez, J. and Lipp, V. and Varadi, T. and Györffy, A. and László, S. and Quochi, V. and Monachini, M. and Frontini, F. and Tempelaars, R. and Costa, R. and Salgado, A. and Čibej, J. and Munda, T.}, TITLE = {Designing the ELEXIS Parallel Sense-Annotated Dataset in 10 European Languages}, YEAR = {2021}, ABSTRACT = {Over the course of the last few years, lexicography has witnessed the burgeoning of increasingly reliable automatic approaches supporting the creation of lexicographic resources such as dictionaries, lexical knowledge bases and annotated datasets. In fact, recent achievements in the field of Natural Language Processing and particularly in Word Sense Disambiguation have widely demonstrated their effectiveness not only for the creation of lexicographic resources, but also for enabling a deeper analysis of lexical-semantic data both within and across languages. Nevertheless, we argue that the potential derived from the connections between the two fields is far from exhausted. In this work, we address a serious limitation affecting both lexicography and Word Sense Disambiguation, i.e. the lack of high-quality sense-annotated data and describe our efforts aimed at constructing a novel entirely manually annotated parallel dataset in 10 European languages. For the purposes of the present paper, we concentrate on the annotation of morpho-syntactic features. Finally, unlike many of the currently available sense-annotated datasets, we will annotate semantically by using senses derived from high-quality lexicographic repositories.}, KEYWORDS = {Digital lexicography, Natural Language Processing, Computational Linguistics, Corpus Linguistics, Word Sense Disambiguation}, PAGES = {377-396}, URL = {https://static-curis.ku.dk/portal/files/279888836/eLex_2021_22_pp377_395.pdf}, CONFERENCE_NAME = {eLex 2021}, CONFERENCE_DATE = {05/-7/2021-07/07/2021}, BOOKTITLE = {Proceedings of the eLex 2021 conference}, } @INPROCEEDINGS{VAGIONAKIS_2021_INPROCEEDINGS_VDBBDMM_461540, AUTHOR = {Vagionakis, I. and Del Gratta, R. and Boschetti, F. and Baroni, P. and Del Grosso, A. M. and Mancinelli, T. and Monachini, M.}, TITLE = {'Cretan Institutional Inscriptions' Meets CLARIN-IT}, YEAR = {2021}, ABSTRACT = {This paper describes a project in the domain of Digital Epigraphy named 'Cretan Institutional Inscriptions' and developed at the Ca' Foscari University of Venice. The project is supported by CLARIN-IT as part of the actions addressed to initiatives, projects and events in the field of Humanities and Social Sciences. The main goal is to make the project visible through CLARIN channels with the hope that it will be a forerunner for other digital epigraphy projects in CLARIN. The article illustrates also the dockerization process applied to the 'Cretan Institutional Inscriptions' project, currently hosted on the CLARIN-IT servers.}, KEYWORDS = {Digital Epigraphy, Digital Classics, Ancient Greek, Crete, Institutions, Text Encoding Initiative, TEI, EpiDoc, EpiDoc Front-End Services, EFES, Virtual Language Observatory, Dockerization, ILC4CLARIN, CLARIN-IT, CLARIN}, PAGES = {48-53}, URL = {https://office.clarin.eu/v/CE-2021-1923-CLARIN2021_ConferenceProceedings.pdf}, CONFERENCE_NAME = {CLARIN Annual Conference 2021}, CONFERENCE_PLACE = {Virtual Edition}, CONFERENCE_DATE = {27-29/09/2021}, BOOKTITLE = {Proceedings of CLARIN Annual Conference 2021 (Virtual Edition)}, EDITOR = {Monachini, M. and Eskevich, M.}, } @TECHREPORT{FRONTINI_2021_TECHREPORT_FGM_463461, AUTHOR = {Frontini, F. and Gamba, F. and Monachini, M.}, TITLE = {D3. 9 Report on Ontology and Vocabulary Collection and Publication}, YEAR = {2021}, ABSTRACT = {This deliverable pertains to SSHOC Task 3.1 which was responsible for investigating and providing resources and tools to support the multilingual aspects of the future pan-EU SSH infrastructure. Making data and services accessible and usable in SSH is very much also a matter of providing relevant translations, translation of metadata concepts, multilingual vocabularies, terminology extraction across languages, multilingual databases. The deliverable offers a detailed report on the gathering and translation of relevant SSH metadata, ontologies and vocabularies for the use-cases indicated in the task's topics: multilingual metadata concepts and vocabularies, the multilingual occupation ontology, with cross-country female occupational titles. In accordance with SSHOC and the EOSC FAIR recommendations and requirements, the metadata vocabularies and ontologies have been published via several different formats and facilities. Section 1. The introduction sets the landscape and describes the need of multilingual vocabularies both for classification and discovery in the context of a cloud-based infrastructure that will offer access to research data and related services adapted to the needs of the SSH community. Section 2. "Multilingual metadata" investigates the possibility to use and test Natural Language Processing (NLP) approaches and Machine Translation (MT) to make the metadata more accessible using national languages other than English. A selected case study was the recommended metadata set of the CLARIN Concept Registry (CCR): the whole set of metadata and definitions were translated into French, Greek, and Italian. The section describes the machine-translation and evaluation process, also comparing different technologies. Section 3. "Multilingual vocabularies and ontologies" introduces two other typical case-studies. The first one addresses one of the pressing needs in social sciences research. Many surveys, indeed, ask respondents to specify their occupation and the occupational ontology is used for the survey questions. For many languages the occupational titles for males and females are not identical. In section 3.1 the enrichment of the occupational ontology with lists for male and female titles, is described for many languages, namely for Dutch, German, Slovenian and French. The second case study focuses on the automatic extraction of terminology from texts: a list of domain- specific terms was automatically extracted from a corpus of Data Curation and Stewardship, validated by domain experts, automatically translated into multiple languages (Dutch, French, German, Greek, Italian, Slovenian) and linked to other existing terminologies. Section 4. describes the SKOS-ification and publication process of the results, together with the challenges posed by multilinguality. Section 5. offers an overview of the exploitation and sustainability of the results and how these are made available to the community. Finally the Conclusions provide some reflections on Machine Translation approaches adopted for translating the vocabularies into multiple languages, the advantages in terms of time saving and some first recommendations to the community.}, KEYWORDS = {Terminologies, Infrastructures, Social Sciences and Humanities, Data Curation, Data Stewardship, vocabularies, Translations, Metadata}, URL = {https://doi.org/10.5281/zenodo.5913485}, } @MISC{FRONTINI_2021_MISC_FGMB_463503, AUTHOR = {Frontini, F. and Gamba, F. and Monachini, M. and Broeder, D.}, TITLE = {SSHOC Multilingual Data Stewardship Terminology}, YEAR = {2021}, ABSTRACT = {The SSHOC Multilingual Data Stewardship Terminology is a multilingual terminology that collects terms specific to the domain of Data Stewardship, as well as their definitions. A list of domain-specific terms was automatically extracted from a corpus pertaining to the domain of Data Stewardship and Curation, validated by domain experts, assigned a definition, and linked to other existing terminologies (Loterre Open Science Thesaurus, terms4FAIRskills, Linked Open Vocabularies, ISO terms and definitions). Each term-definition pair was then automatically translated into multiple languages (Dutch, French, German, Greek, Italian, Slovenian) by employing Deep-L. The Multilingual Data Stewardship Terminology thus consists of 210 concepts available in Dutch, French, German, Greek, Italian, Slovenian. This resource was created within the frame of the SSHOC (Social Sciences and Humanities Open Cloud) project (H2020-INFRAEOSC-2018-2-823782). It is the result of the work of Task 3.1.2 "extraction of terminology from technical documentation about standards and interoperability", as described in D3.9, carried out jointly by ILC-CNR and CLARIN ERIC.}, KEYWORDS = {terminology, data stewardship}, URL = {http://hdl.handle.net/20.500.11752/ILC-567}, } @MISC{FRONTINI_2021_MISC_FGMB_463504, AUTHOR = {Frontini, F. and Gamba, F. and Monachini, M. and Broeder, D.}, TITLE = {SSHOC Multilingual Metadata}, YEAR = {2021}, ABSTRACT = {SSHOC Multilingual Metadata is based on the metadata set of the CLARIN Concept Registry (CCR). The CCR 232 approved metadata concepts, as well as their definitions, were automatically translated into several languages (Dutch, French, Greek, Italian) thanks to the support of Machine Translation tools, and eventually validated by native speakers who were also expert of the domain. This resource was created within the frame of the SSHOC (Social Sciences and Humanities Open Cloud) project (H2020-INFRAEOSC-2018-2-823782). It is the result of the work of Task 3.1.3 "creating Multilingual metadata and taxonomies for discovery", as described in D3.9, carried out jointly by ILC-CNR and CLARIN ERIC.}, KEYWORDS = {metadata, terminology}, URL = {http://hdl.handle.net/20.500.11752/ILC-568}, } @ARTICLE{NICOLOSI_2020_ARTICLE_NMN_429366, AUTHOR = {Nicolosi, A. and Monachini, M. and Nava, B.}, TITLE = {CLARIN-IT and the Definition of a Digital Critical Edition for Ancient Greek Poetry}, YEAR = {2020}, ABSTRACT = {Ancient Greek studies, and Classics in general, is a perfect field of investigation in Digital Humanities. Indeed, DH approaches could become a means of building models for complex realities, analyzing them with computational methods and sharing the results with a broader public. Ancient texts have a complex tradition, which includes many witnesses (texts that handed down other texts) and different typologies of supports (papyri, manuscripts, and epigraphs). These texts are the basis of all European Literatures and it is crucial to spread their knowledge, in a reliable and easy way. Our project on ancient Greek fragmentary poetry (DEA - Digital Edition of Archilochus: New models and tools for authoring, editing and indexing an ancient Greek fragmentary author), growing out of the existing experience, tries to define a TEI-based digital critical edition combined with NLP techniques and semantic web technologies. Our goal is to provide a complete and reliable tool for scholars, suitable for critical studies in Classics, and a user-friendly environment also for non-specialist users. The project represents one of the attempts within the context of CLARIN-IT to contribute to the wider impact of CLARIN on the specific Italian community interested in Digital Classics. It is intended to improve services in fostering new knowledge in SSH digital research and sustaining the existing one.}, KEYWORDS = {Digital edition, Ancient Greek, research infrastructures, digital humanities, digital classics}, PAGES = {85-93}, URL = {https://ep.liu.se/ecp/172/011/ecp20172011.pdf}, VOLUME = {172}, DOI = {10.3384/ecp2020172011}, PUBLISHER = {Linköping University Electronic Press (Linköping, Svezia)}, ISSN = {1650-3740}, JOURNAL = {Linköping electronic conference proceedings (Online)}, } @EDITORIAL{BROEDER_2020_EDITORIAL_BEM_472326, AUTHOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, TITLE = {Proceedings of the Workshop about Language Resources for the SSH Cloud}, YEAR = {2020}, ABSTRACT = {This workshop was envisaged to focus on the goals and aims of realising the SSHOC part of the EOSC, where SSH data, language processing tools, and services are made available, adjusted and accessible for users across SSH domain. It provides a forum to discuss common requirements, challenges and opportunities for developing, enhancing, integrating tools and services for managing and processing SSH research data. Such SSH scenarios based implementations of currently existing language tools and services demonstrate their multidisciplinary usability and stimulate further multidisciplinary collaboration across the various subfields of SSH and beyond, which will increase the potential for societal impact.}, KEYWORDS = {Social Science and Humanties Open Cloud, EOSC, Language Resource Infrastructure}, PAGES = {1-46}, URL = {https://aclanthology.org/2020.lr4sshoc-1}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-43-6}, } @INPROCEEDINGS{BOSCHETTI_2020_INPROCEEDINGS_BDMBMD_462360, AUTHOR = {Boschetti, F. and Del Gratta, R. and Monachini, M. and Buzzoni, M. and Monella, P. and Del Turco, R. R.}, TITLE = {"Tea for two": the Archive of the Italian Latinity of the Middle Ages meets the CLARIN infrastructure}, YEAR = {2020}, ABSTRACT = {This paper presents the Archive of the Italian Latinity of the Middle Ages (ALIM) and focuses, particularly, on its structure and metadata for its integration into the ILC4CLARIN repository. Access to this archive of Latin texts produced in Italy during the Middle Ages is of great importance in providing CLARIN-IT and the CLARIN community, at large, with critically reliable texts for the use of philologists, historians of literature, historians of institutions, culture and science of the Middle Ages.}, KEYWORDS = {Latin resources, CLARIN, corpus, repository}, PAGES = {121-125}, URL = {https://office.clarin.eu/v/CE-2020-1738-CLARIN2020_ConferenceProceedings.pdf}, CONFERENCE_NAME = {CLARIN Annual Conference 2020}, CONFERENCE_DATE = {05-07/10/2021}, } @INPROCEEDINGS{BROEDER_2020_INPROCEEDINGS_BEM_472328, AUTHOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, TITLE = {LR4SSHOC: The Future of Language Resources in the Context of the Social Sciences and Humanities Open Cloud}, YEAR = {2020}, ABSTRACT = {This paper outlines the future of language resources and identifies their potential contribution for creating and sustaining the social sciences and humanities (SSH) component of the European Open Science Cloud (EOSC).}, KEYWORDS = {EOSC, Social Science and Humanities Open Cloud}, PAGES = {33-36}, URL = {https://aclanthology.org/2020.lr4sshoc-1.6}, ISBN = {979-10-95546-43-6}, CONFERENCE_NAME = {LREC}, CONFERENCE_PLACE = {virtual}, CONFERENCE_DATE = {10/5/2020}, BOOKTITLE = {Proceedings of the Workshop about Language Resources for the SSH Cloud}, EDITOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, } @INPROCEEDINGS{CALAMAI_2020_INPROCEEDINGS_CPMSBB_466823, AUTHOR = {Calamai, S. and Pretto, N. and Monachini, M. and Stamuli, M. F. and Bianchi, S. and Bonazzoli, P.}, TITLE = {Building a Home for Italian Audio Archives}, YEAR = {2020}, ABSTRACT = {Audio and audiovisual archives are at the crossroads of different fields of knowledge, yet they require common solutions for both their long-term preservation and their description, availability, use and reuse. Archivio Vi.Vo. is an Italian project financed by the Tuscany Region, aiming to (i) explore methods for long-term preservation and secure access to oral sources and (ii) develop an infrastructure under the CLARIN-IT umbrella offering several services for scholars from different domains interested in oral sources. This paper describes the project's infrastructure and its methodology through a case study on the Caterina Bueno's audio archive.}, KEYWORDS = {oral archives, infrastructures}, PAGES = {112-116}, URL = {https://publications.cnr.it/doc/466823}, CONFERENCE_NAME = {CLARIN2020 Annual Conference Proceedings ISSN 2773-2177 (online)}, CONFERENCE_PLACE = {virtual}, CONFERENCE_DATE = {5/10/2020-7/10/2020}, } @INPROCEEDINGS{DIDONATO_2020_INPROCEEDINGS_DMEPMD_425644, AUTHOR = {Di Donato, F. and Monachini, M. and Eskevich, M. and Pohle, S. and Moranville, Y. and Dumouchel, S.}, TITLE = {Social Sciences and Humanities Pathway. Towards the European Open Science Cloud}, YEAR = {2020}, ABSTRACT = {The paper describes a journey which starts from various social sciences and humanities (SSH) Research Infrastructures (RI) in Europe and arrives at the comprehensive "ecosystem of infrastructures", namely the European Open Science Cloud (EOSC). We highlight how the SSH Open Science infrastructures contribute to the goal of establishing the EOSC. First, through the example of OPERAS, the European Research Infrastructure for Open Scholarly Communication in the SSH, to see how its services are conceived to be part of the EOSC and to address the communities' needs. The next two sections highlight collaboration practices between partners in Europe to build the SSH component of the EOSC and a SSH discovery platform, as a service of OPERAS and the EOSC. The last two sections focus on an implementation network dedicated to SSH data fairification.}, KEYWORDS = {EOSC, Open science, SSH, Infrastructures}, PAGES = {5-9}, URL = {https://www.aclweb.org/anthology/2020.lr4sshoc-1.2.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-43-6}, CONFERENCE_NAME = {Language Resources and Evaluation Conference (LREC 2020)}, CONFERENCE_PLACE = {Marseille}, CONFERENCE_DATE = {11-16/05/2020}, BOOKTITLE = {Proceedings of the Workshop about Language Resources for the SSH Cloud}, EDITOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, } @INPROCEEDINGS{BIANCHI_2020_INPROCEEDINGS_BCMPS_466816, AUTHOR = {Bianchi, S. and Calamai, S. and Monachini, M. and Pretto, N. and Stamuli, M. F.}, TITLE = {The grey-side of audio archives}, YEAR = {2020}, ABSTRACT = {Archives often include documents that can hardly be considered publications or grey literature as such, yet they maintain their documentary value and play a role of primary sources for the specialists. These documents, indeed, can help archivists to reveal the sedimentation process of the archive itself and to preserve the authentic context of the documentary production. They also appear to be very useful for the community of researchers and scholars. This happens more frequently with oral archives which include 'non-conventional sources', thus bringing together audio documents, fieldworks notes, correspondence, slipcases, analogic compact cassettes or open reels. At the cross-road of two disciplines, Archival Science and Grey Literature, this paper aims to argue the applicability of the concept of grey literature to this wide range of documentary materials, by showing the experience of Archivio Vi.Vo, a regional project aiming at building a model for archiving, preserving, managing and disseminating audio documents.}, KEYWORDS = {archivi orali}, URL = {https://publications.cnr.it/doc/466816}, VOLUME = {2020-November}, PUBLISHER = {TransAtlantic (Amsterdam, Paesi Bassi)}, ISSN = {1386-2316}, CONFERENCE_NAME = {GL2020 22nd International Conference on Grey Literature}, CONFERENCE_DATE = {20/11/2020}, BOOKTITLE = {The GL-conference series. Conference proceedings}, } @INPROCEEDINGS{BOSCHETTI_2020_INPROCEEDINGS_BDM_416444, AUTHOR = {Boschetti, F. and Del Gratta, R. and Monachini, M.}, TITLE = {Latin digital archives and research infrastructures: just a trendy option or a substantive need?}, YEAR = {2020}, ABSTRACT = {Latin digital archives and research infrastructures: just a trendy option or a substantive need?}, KEYWORDS = {Research Infrastructure, Digital Libraries, Latin}, URL = {https://publications.cnr.it/doc/416444}, CONFERENCE_NAME = {ALIM and beyond}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {27-28/01/2020}, } @TECHREPORT{BARTOLINI_2020_TECHREPORT_BQMA_453502, AUTHOR = {Bartolini, R. and Quochi, V. and Monachini, M. and Affé, F.}, TITLE = {Relazione di fine progetto "PIM-Piattaforma Integrata Monitoraggio"}, YEAR = {2020}, ABSTRACT = {Il documento presenta l'attività svolta dal CNR-ILC nel ruolo di subcontraente di COMDATA per la realizzazione di moduli di trattamento automatico del linguaggio e la consulenza per l'integrazione di metodi di clustering automatico di documenti nella Digital Library del progetto PIM.}, KEYWORDS = {accesso intelligente al testo, digital library, natural language processing}, PAGES = {156}, URL = {https://publications.cnr.it/doc/453502}, } @TECHREPORT{DIDONATO_2020_TECHREPORT_DLBCDEGGMMOTT_437796, AUTHOR = {Di Donato, F. and Lombardo, T. and Breitfuss, G. and Chen, Y. and Dumouchel, S. and Eskevich, M. and Gingold, A. and Gorgaini, E. and Monachini, M. and Moranville, Y. and Ocansey, J. T. and Thiel, C. and Tóth Czifra, E.}, TITLE = {TRIPLE D 6. 1-Report on the General Interoperability Requirements}, YEAR = {2020}, ABSTRACT = {TRIPLE - Transforming Research Through Innovative Practices for Linked Interdisciplinary Exploration is a EU funded project under the INFRAEOSC-02-2019 - Prototyping new innovative services topic, which started in October 2019 and will end in March 2023. Its main objective is to design and develop a discovery platform for SSH, called GOTRIPLE. This deliverable is the main outcome of Task 6.1 which started at M4 at ends at M36, whose aim is to deal with the definition and the set-up of general TRIPLE's interoperability requirements, considering all the components which are composing the TRIPLE ecosystem (data, resources and tools). As preliminary results, we thus present here a general overview of the main EOSC interoperability requirements and specifications, both coming from a mapping of the EOSC Working Groups outputs, and of the most relevant results of EOSC related projects related to TRIPLE. We also attempt to provide TRIPLE's answers, proposals and solutions to the above mentioned requirements. The final picture presents different levels of precision, which depends on the fluidity of the EOSC definition on the one hand, and on the consequent fact that some implications are still unclear, and a discussion on the measures to address EOSC requirements is still on-going. While tackling interoperability, we introduce TRIPLE in its context in order to locate the GOTRIPLE platform in the EOSC frame, and more specifically in the SSH cluster of the EOSC (section 1). Section 2 defines the general interoperability requirements, starting with the software (2.2) and then presents an analysis of the main outputs released by the EOSC Working groups (2.3), taking into consideration as a general reference, the EOSC Interoperability Framework, and, more specifically, the FAIR and Architecture WGs documents (2.3.2, 2.3.4). These are the main guiding references for the design and realization of the EOSC, considering specifically interoperability. Section 2.3.3 illustrates how TRIPLE is translating into practice the FAIR requirements, while section 2.3.5 is focused on TRIPLE current decisions regarding the integration of the TRIPLE solution into the EOSC. To present an enriched scenario, the section includes as well a brief overview of other relevant outputs released by the EOSC WGs (Landscape, RoP, Sustainability and Skills and Training) (2.3.6). With the aim to provide a comprehensive analysis of the EOSC interoperability requirements, the WP6 partners have analyzed relevant deliverables produced by the main EOSC related projects as preparatory activity. The analysis was useful to understand the EOSC environment and its evolution in terms of interoperability and at the same time to understand which external deliverables have to be taken into consideration for the overall project development in TRIPLE. Section 3 includes a synthesis of this work, which is fully presented in Annex I. Section 4 - Conclusions and Outlook, outlines TRIPLE's the next steps to achieve interoperability and the strategies that will be adopted.}, KEYWORDS = {TRIPLE, GOTRIPLE, EOSC}, URL = {https://zenodo.org/record/4322806}, } @ARTICLE{GOGGI_2019_ARTICLE_GPBMBC_411599, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic Query Analysis from the Global Science Gateway}, YEAR = {2019}, ABSTRACT = {Nowadays web portals play an essential role in searching and retrieving information in the several fields of knowledge: they are ever more technologically advanced and designed for supporting the storage of a huge amount of information in natural language originating from the queries launched by users worldwide. Given this scenario, we focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal: the aim is to retrieve information related to social media which as of today represent a considerable source of data more and more widely used for research ends.}, KEYWORDS = {Information Extraction, Query Log, WorldWideScience Alliance, Information gateways, Social Media}, PAGES = {147-155}, URL = {https://publications.cnr.it/doc/411599}, VOLUME = {15}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{STEFANINI_2019_ARTICLE_SNM_452483, AUTHOR = {Stefanini, A. E. and Nicolosi, A. and Monachini, M.}, TITLE = {A Mock-up for the Development of a Digital Edition for Ancient Greek Fragmentary Poetry: Results of Its Evaluation}, YEAR = {2019}, ABSTRACT = {Ancient Greek poetry is an essential part of the western cultural heritage; thus, it is important that people have access to its texts and whatever relates to their understanding in a reliable and easy way. Whenever user evaluation is concerned, mock-ups are used by designers to acquire feedback from users. A mock-up is defined as a model of the final product, and may be used for demonstration, evaluation and other purposes. The authors prototyped a mock-up for focusing on the requirements of a scholarly digital edition of Archilochus. This was put under evaluation to assess its usability: it was submitted to extensive use and testing by a sample of prospective users, to better focus on the requirements from a product's perspective. Experimentation involved a group of university students, attending a Greek Philology course at Parma University. More than half of the respondents considered the mock-up a useful study support. The evaluation also pointed out that the mock-up had to be revised, so as to guarantee better cognitive simplicity of the user interface.}, KEYWORDS = {Ancient Greek Poetry, Digital Edition, Greek Philology, Digital Humanities, Digital Philology, Didactics, Evaluation}, PAGES = {41-57}, URL = {https://www.igi-global.com/article/a-mock-up-for-the-development-of-a-digital-edition-for-ancient-greek-fragmentary-poetry/237162}, VOLUME = {8}, DOI = {10.4018/IJACDT.2019070103}, PUBLISHER = {IGI Global (Hershey, PA, Stati Uniti d'America)}, ISSN = {2155-420X}, JOURNAL = {International journal of art, culture and design technologies (Online)}, } @INCOLLECTION{BELLANDI_2019_INCOLLECTION_BMK_429245, AUTHOR = {Bellandi, A. and Monachini, M. and Khan, F.}, TITLE = {LexO: Where Lexicography Meets the Semantic Web}, YEAR = {2019}, ABSTRACT = {LexO is a collaborative web editor used for the creation and management of (multilingual) lexical and terminological resources as linked data resources. The editor makes use of Semantic Web technologies (which enrich web data with semantic information in order to make them machine readable) and the linked data publishing paradigm in order to ensure that lexical resources can be more easily shared and reused by the scientific community.}, KEYWORDS = {Semantic Web technologies, multilingual lexical resources, collaborative web editor}, PAGES = {43-47}, URL = {https://publications.cnr.it/doc/429245}, VOLUME = {2}, BOOKTITLE = {Tour de CLARIN volume two}, EDITOR = {Fiser, D. and Lenardic, J.}, } @INCOLLECTION{RUSSO_2019_INCOLLECTION_RMCM_429036, AUTHOR = {Russo, I. and Marconi, L. and Cutugno, P. and Monachini, M.}, TITLE = {Le parole sono ponti: risorse digitali per l'integrazione in contesti multilingue}, YEAR = {2019}, ABSTRACT = {Nel presente lavoro esporremo due esperienze inerenti all'uso e alla produzione di risorse linguistiche multilingui, svolte da alcuni ricercatori dell'Istituto di Linguistica Computazionale "Antonio Zampolli" (ilc) del cnr. Più nello specifico verrà descritta la realizzazione di un glossario nell'ambito del progetto Ascolto Accoglienza Azioni Offresi (aaa Offresi) e l'uso sperimentale di ImagAct (Moneglia et alii 2012) - una risorsa lessicale multilingue sui verbi d'azione - in una scuola primaria caratterizzata da una forte presenza di alunni stranieri. Il fine della ricerca è quello di favorire l'emergere delle competenze metalinguistiche degli apprendenti, valorizzando la diversità linguistica e culturale.}, KEYWORDS = {Multilingual lexical resources, I2 teaching, Translanguaging, Public administration terminology}, PAGES = {127-136}, URL = {https://publications.cnr.it/doc/429036}, VOLUME = {I}, DOI = {10.36173/PLURIMI-2019-1/09}, PUBLISHER = {Consiglio Nazionale delle Ricerche (Roma, ITA)}, ISBN = {9788880803775}, BOOKTITLE = {Linguaggi, ricerca, comunicazione. Focus CNR}, EDITOR = {Cadeddu, M. E. and Marras, C.}, } @INPROCEEDINGS{GOGGI_2019_INPROCEEDINGS_GPBMBC_400343, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic query analysis from the global science gateway}, YEAR = {2019}, ABSTRACT = {We focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal.}, KEYWORDS = {Information Extraction, Terminology}, PAGES = {105-113}, URL = {https://publications.cnr.it/doc/400343}, VOLUME = {20}, ISBN = {978-90-77484-33-3}, CONFERENCE_NAME = {GL20-Twentieth International Conference on Grey Literature: Research Data Fuels and Sustains Grey Literature}, CONFERENCE_PLACE = {New Orleans, USA}, CONFERENCE_DATE = {3-4 December 2018}, BOOKTITLE = {Research Data Fuels and Sustains Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_M_429318, AUTHOR = {Monachini, M.}, TITLE = {Ricerche di alta qualità negli Studi umanistici: l'infrastruttura CLARIN-IT}, YEAR = {2019}, ABSTRACT = {Nella lezione a invito presso la Scuola di Dottorato di Ateneo al Corso di UMANESIMO E TECNOLOGIE vengono descritti, il ruolo, i vantaggi e le opportunità offerte dalla infrastruttura di ricerca CLARIN. Una platea di giovani in formazione (che costituiranno i ricercatori del futuro) potrà approfondire la conoscenza degli strumenti della infrastruttura che consentono di coniugare studi umanistiche ed approcci tecnologici, al fine di compiere ricerche di alta qualità.}, KEYWORDS = {CLARIN, studi umanistici, tecnologie linguistiche}, URL = {https://publications.cnr.it/doc/429318}, CONFERENCE_NAME = {Scuola di Dottorato di Ateneo Corso di UMANESIMO E TECNOLOGIE}, CONFERENCE_DATE = {13/11/2019}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_M_429336, AUTHOR = {Monachini, M.}, TITLE = {Success stories of collaboration in Social Sciences and Humanities (between Italy and Slovenia)}, YEAR = {2019}, ABSTRACT = {The collaboration Italy-Slovenia in the sector of SSH revolves, since many years, around Digital methods for language, in particular, language resources and standards for language data; with the development of e-technology and explosion of data, the support to language studies goes through RI; another hot topic linked to the digital era is "word meaning" which involves a new type of lexicography; the even greater challenge, the concept of Open Science, sees the two countries together in in the new big cluster project SSHOC, which aims to implement the EOSC vision and build the Open Cloud for the SSH sector.}, KEYWORDS = {collaboration Italy Slovenija, Social Sciences and Humanities}, URL = {https://publications.cnr.it/doc/429336}, CONFERENCE_NAME = {RESEARCH DAY ITALY-SLOVENIA Bilateral meeting Italy Slovenia on the role of research in the society}, CONFERENCE_PLACE = {University of Nova Gorica, Vipava, Glavni trg 8}, CONFERENCE_DATE = {16/4/2019}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_M_429355, AUTHOR = {Monachini, M.}, TITLE = {CLARIN-IT nella prospettiva delle Digital Humanities}, YEAR = {2019}, ABSTRACT = {Fornire una panoramica relativa alla infrastruttura europea CLARIN e la sua emanazione italiana CLARIN-IT rispondere ai quesiti relativi alla sua missione e ai suoi obiettivi e fare il punto sui vantaggi per la comunità a cui è diretto, ricercatori del settore delle scienze umane e sociali}, KEYWORDS = {digital public humanities, infrastrutture di ricerca}, URL = {https://publications.cnr.it/doc/429355}, CONFERENCE_NAME = {Seminars in Digital Public Humanities}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {23 ottobre 2019}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_MSC_429370, AUTHOR = {Monachini, M. and Stamuli, M. F. and Calamai, S.}, TITLE = {Folk in Tuscany: the Caterina Bueno sound archive}, YEAR = {2019}, ABSTRACT = {Caterina Bueno's sound archive is composed of 476 carriers (audio reels and compact cassettes), corresponding to nearly 714 hours of recording and was digitised during the PAR-FAS project Gra.fo (Grammo-foni. Le soffitte della voce, UNISI \& SNS, http://sns.grafo.it). It was located at two different owners': part of it was stored at Caterina's heirs' house, while the rest was kept by the former culture counsellor of the Municipality of San Marcello Pistoiese, in the Montagna Pistoiese, where a multi-media library was supposed to be set up. Unfortunately, disagreements and misunderstandings between the two parties have so far made the archive fragmented and inaccessible to the community. Both owners, independently, have turned to Silvia Calamai for the reassembly of the whole archive in the digital domain, in respect of the artist's wishes. After digitising, the carriers were returned to their owners, who helped in finding an arrangement for the sound archive, which can be divided according to the following categories: field-research (investigations carried out in the Tuscan countryside from the late 50s to the end of the artist's life); live performances (recordings of concerts and events); performances' rehearsals (recordings of rehearsals with musicians). In 2019 Regione Toscana decided to support the project of cataloguing and disseminating Caterina Bueno Archive and the following partners were involved: Università degli Studi di Siena (Silvia Calamai), Soprintendenza Archivistica e Bibliografica della Toscana (Maria Francesca Stamuli), CLARIN-IT (Monica Monachini), and Unione dei comuni del Casentino (Pierangelo Bonazzoli). Archivio Vi.vo will thus constitute a pilot study within CLARIN-IT to experiment methods and offer services to disciplines interested in oral sources. The ILC4CLARIN Italian node offers archiving preservation access and tools for linguistic data of a written type; within Archivio Vi.vo. the repository will be improved through experimental approach to conservation, management and access to audio and audio-video data and metadata. Archivio Vi.Vo. will develop a model which can be replicated on other audio-visual archives, even outside the context of Tuscany. The experimental activity will aim to adopt the model and high-performance computing and archiving services of the new GARR network infrastructure, built along the Cloud paradigm. This model will be disseminated both to the scientific community interested in accessing these data, and to the general public who enjoy ethnomusical materials produced in the territory.}, KEYWORDS = {long-term preservation, oral archives, infrastructures, conservation, access, metadata}, URL = {https://www.clarin.eu/sites/default/files/clarin2019_bazaar_calamai-stmuli-monachini.pdf}, CONFERENCE_NAME = {CLARIN 2019 Annual Conference}, CONFERENCE_PLACE = {Leipzig}, CONFERENCE_DATE = {30/09/2019-2/10/2019}, BOOKTITLE = {CLARIN Annual Conference 2019 Abstracts}, } @ARTICLE{GOGGI_2018_ARTICLE_GPRBM_388612, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2018}, ABSTRACT = {"In the electronic age, the World Wide Web has played a major role in making scientific information accessible to a wide audience more rapidly and efficiently. This democratic approach to information dissemination in science is changing the way science is perceived and implemented in our daily lives" (Weintraub, 2000).}, KEYWORDS = {CLARIN-IT, CLARIN-European Research Infrastructure for Language Resources and Technology, Grey Literature}, PAGES = {87-93}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85048643343\&origin=inward}, VOLUME = {14}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @INCOLLECTION{MONACHINI_2018_INCOLLECTION_MNS_387374, AUTHOR = {Monachini, M. and Nicolosi, A. and Stefanini, A.}, TITLE = {Digital Classics and CLARIN-IT: What Italian Scholars of Ancient Greek Expect from Digital Resources and Technology}, YEAR = {2018}, ABSTRACT = {This paper presents and discusses the findings of a survey carried out to assess the use of digital resources and digital technologies with respect to work in ancient Greek scholarship, with the aim to identify the factors that are likely to constrain its use as well as to elicit needs and requirements of ancient Greek scholars in Italy. The survey is in line with the principles behind the user engagement strategy developed by CLARIN-ERIC and constitutes one of the national efforts undertaken by CLARIN-IT to contribute to the wider impact of CLARIN on Digital Classicists. The survey, as well as other surveys carried out in the sector in the last decade, points out that most of the available resources do not respond to users' requirements. This motivated us to develop a mock-up of a digital editor of Archilochus, which, mostly grounded on previous studies by Nicolosi, draws on the outcomes of the survey. The experiment includes a sample prototype to submit for evaluation by end-users. The final aim is to identify good practices and new models to enable new approaches to the study of classical texts and profile a new workbench for scholarly digital edition.}, KEYWORDS = {Digital Classics, User Involvement, User requirements, CLARIN ERIC, CLARIN Infrastructure}, PAGES = {61-74}, URL = {https://ep.liu.se/ecp/147/006/ecp17147006.pdf}, VOLUME = {147}, ISBN = {978-91-7685-273-6}, BOOKTITLE = {Selected papers from the CLARIN Annual Conference 2017, Budapest, 18-20 September 2017}, } @INPROCEEDINGS{BARTOLINI_2018_INPROCEEDINGS_BGMP_387159, AUTHOR = {Bartolini, R. and Goggi, S. and Monachini, M. and Pardelli, G.}, TITLE = {The LREC Workshops Map}, YEAR = {2018}, ABSTRACT = {The aim of this work is to present an overview of the research presented at the LREC workshops over the years 1998-2016 with the aim to shed light on the community represented by workshop participants in terms of country of origin, type of affiliation, gender. There has been also an effort towards the identification of the major topics dealt with as well as of the terminological variations noticed in this time span. Data has been retrieved from the portal of the European Language Resources Association (ELRA) which organizes the conference and the resulting corpus made up of workshops titles and of the related presentations has then been processed using a term extraction tool developed at ILC-CNR.}, KEYWORDS = {corpus creation, terminology, LREC}, PAGES = {557-562}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/summaries/639.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{GOGGI_2018_INPROCEEDINGS_GPRBM_385571, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2018}, ABSTRACT = {This work will provide a map of the documentation archived in the CLARIN infrastructure, whose purpose is to share language resources produced and managed in the various European countries but finally merged into the CLARIN data centers for allowing access, interoperability, reuse and preservation of scientific documentation as well as Grey Literature.}, KEYWORDS = {CLARIN Infrastructure, Language Resources, Grey Literature}, PAGES = {93-99}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2018/03/GL19_Conference_Proceedings.pdf}, VOLUME = {19}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-31-9}, CONFERENCE_NAME = {Nineteenth International Conference on Grey Literature, GL19}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {October 23-24, 2017}, BOOKTITLE = {Nineteenth International Conference on Grey Literature "Public Awareness and Access to Grey Literature"}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{KHAN_2018_INPROCEEDINGS_KBFM_387178, AUTHOR = {Khan, F. and Bellandi, A. and Frontini, F. and Monachini, M.}, TITLE = {One Language to rule them all: modelling Morphological Patterns in a Large Scale Italian Lexicon with SWRL}, YEAR = {2018}, ABSTRACT = {We present an application of Semantic Web Technologies to computational lexicography. More precisely we describe the publication of the morphological layer of the Italian Parole Simple Clips lexicon (PSC-M) as linked open data. The novelty of our work is in the use of the Semantic Web Rule Language (SWRL) to encode morphological patterns, thereby allowing the automatic derivation of the inflectional variants of the entries in the lexicon. By doing so we make these patterns available in a form that is human readable and that therefore gives a comprehensive morphological description of a large number of Italian word.}, KEYWORDS = {Morphology, Linked Open Data, Italian Lexicon, SWRL, SQVRL}, PAGES = {4385-4389}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/844.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N.}, } @INPROCEEDINGS{MONACHINI_2018_INPROCEEDINGS_MK_387203, AUTHOR = {Monachini, M. and Khan, A. F.}, TITLE = {Towards the Construction of a Lexical Data and Technology Ecosystem: The Experience of ILC-CNR}, YEAR = {2018}, ABSTRACT = {This paper describes the activities and projects being carried on at the "A. Zampolli" Institute for Computational Linguistics (ILC) at the crossroads between computational lexicography and e- lexicography and that are intended to assist in the creation of a queryable and interconnected ecosystem of standardised lexicographic datasets and technologies.}, KEYWORDS = {e-lexicography, computational lexicography, lexical resources, standards, LOD}, PAGES = {52-54}, URL = {https://globalex.link/globalex2018/wp-content/uploads/2018/03/Globalex-2018_proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-28-3}, CONFERENCE_NAME = {LREC 2018 Workshop "Globalex 2018-Lexicography \& WordNets}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the LREC 2018 Workshop "Globalex 2018-Lexicography \& WordNets"}, EDITOR = {Kernerman, I. and Krek, S.}, } @INPROCEEDINGS{NICOLAS_2018_INPROCEEDINGS_NKMDCAEBQS_387361, AUTHOR = {Nicolas, L. and König, A. and Monachini, M. and Del Gratta, R. and Calamai, S. and Abel, A. and Enea, A. and Biliotti, F. and Quochi, V. and Stella, F. V.}, TITLE = {CLARIN-IT: State of Affairs, Challenges and Opportunities}, YEAR = {2018}, ABSTRACT = {his paper gives an overview on the Italian national CLARIN consortium as it currently stands two years after its creation at the end of 2015. It thus discusses the current state of affairs of the consortium on several aspects, especially with regards to members. It also discusses the events and initiatives that have been undertaken, as well as the ones that are planned in the close future. It finally outlines the conclusions of a user survey performed to understand the expectations of a targeted user population and provides indications regarding the next steps planned.}, KEYWORDS = {CLARIN-IT Consortium Pisa Bolzano Siena}, PAGES = {1-14}, URL = {http://www.ep.liu.se/ecp/contents.asp?issue=147}, VOLUME = {147}, ISBN = {978-91-7685-273-6}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {18-20 September, 2017}, BOOKTITLE = {Selected papers from the CLARIN Annual Conference 2017, Budapest, 18-20 September 2017}, } @INPROCEEDINGS{GOGGI_2018_INPROCEEDINGS_GPBMBC_395584, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic query analysis from the global science gateway}, YEAR = {2018}, ABSTRACT = {We focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal: the aim is to retrieve information related to social media which as of today represent a considerable source of data more and more widely used for research ends. This project includes eight months of query logs3 registered between July 2017 and February 2018 for a total of 445,827 queries. The analysis mainly concentrates on the semantics of the queries received from the portal clients: it is a process of information retrieval from a rich digital catalogue whose language is dynamic, is evolving and follows - as well as reflects - the cultural changes of our modern society.}, KEYWORDS = {Global Science Gateway, Semantic Query Analysis, Terminology}, PAGES = {93-95}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2018/12/GL20_ProgramBook.pdf}, VOLUME = {20}, ISBN = {978-90-77484-34-0}, CONFERENCE_NAME = {Twentieth International Conference on Grey Literature "Research Data Fuels and Sustains Grey Literature"}, CONFERENCE_PLACE = {New Orleans, USA (Loyola University)}, CONFERENCE_DATE = {December 3-4, 2018}, BOOKTITLE = {Research Data Fuels and Sustains Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{STEFANINI_2018_INPROCEEDINGS_SNM_385585, AUTHOR = {Stefanini, A. and Nicolosi, A. and Monachini, M.}, TITLE = {An experiment on the development of a digital edition for ancient Greek fragmentary poetry: A case study on Archilochus of Paros}, YEAR = {2018}, ABSTRACT = {This paper overviews ongoing experiments on a digital edition of Archilochus which is based on the readings, translations and comments by Nicolosi [1] and also integrates feedback and requirements from the Digital Classics community. The experiment encompasses a few fragments of the poet of Paros, so as to provide a mock-up of the prototype for evaluation by its intended end-users, in view of developing a fully fledged digital edition. The mock-up provides the philologist with a set of resources and tools that ease a critical appraisal of the text.}, KEYWORDS = {Digital methods in the humanities, Interfaces and user-friendly data presentation}, PAGES = {86-89}, URL = {http://amsacta.unibo.it/5997/1/AIUCD-2018-BoA-rev.pdf}, DOI = {10.6092/unibo/amsacta/5997}, ISBN = {9788894253528}, CONFERENCE_NAME = {Settimo Convegno Annuale AIUCD 2018}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {31/01/2018-2/02/2018}, BOOKTITLE = {Settimo Convegno Annuale AIUCD 2018. Patrimoni culturali nell'era digitale. Memorie, culture umanistiche e tecnologia. Book of Abstracts}, EDITOR = {Daria, S.}, } @INCOLLECTION{MANZELLA_2017_INCOLLECTION_MBBDDFMMMNS_368363, AUTHOR = {Manzella, G. M. R. and Bartolini, R. and Bustaffa, F. and D'Angelo, P. and De Mattei, M. and Frontini, F. and Maltese, M. and Medone, D. and Monachini, M. and Novellino, A. and Spada, A.}, TITLE = {Semantic Search Engine for Data Management and Sustainable Development: Marine Planning Service Platform}, YEAR = {2017}, ABSTRACT = {This chapter presents a computer platform supporting a Marine Information and Knowledge System based on a repository that gathers, classify and structures marine scientific literature and data, guaranteeing their accessibility by means of standard protocols. This requires the access to quality controlled data and to information that is provided in grey literature and/or in relevant scientific literature. There exist efforts to develop search engines to find author's contributions to scientific literature or publications. This implies the use of persistent identifiers. However very few efforts are dedicated to link publications to data that was used, or cited in them or that can be of importance for the published studies. Full-text technologies are often unsuccessful since they assume the presence of specific keywords in the text; to fix this problem,it is suggested to use different semantic technologies for retrieving the text and data and thus getting much more complying results.}, KEYWORDS = {Marine Information and Knowledge System}, PAGES = {127-154}, URL = {http://www.igi-global.com/chapter/semantic-search-engine-for-data-management-and-sustainable-development/166839#}, VOLUME = {Volume 7}, DOI = {10.4018/978-1-5225-0700-0.ch006}, PUBLISHER = {IGI Global (Hershey, USA)}, BOOKTITLE = {Oceanographic and Marine Cross-Domain Data Management for Sustainable Development}, EDITOR = {Diviacco, P. and Leadbetter, A. and Glaves, H.}, } @INPROCEEDINGS{PARDELLI_2017_INPROCEEDINGS_PGBRM_367782, AUTHOR = {Pardelli, G. and Goggi, S. and Bartolini, R. and Russo, I. and Monachini, M.}, TITLE = {A Geographical Visualization of GL Communities: A Snapshot}, YEAR = {2017}, ABSTRACT = {This quotation stresses the important role of the several international organizations in producing and disseminating knowledge in the field of Grey Literature (GL): the paper aims to provide a first snapshot of the geographical distribution of GL organizations and their participation to the annual International Conference on Grey Literature over the time (in the period from 2003 to 2015. See List of Conferences on Table 2 ). Nowadays a visual representation of data is often associated with the traditional statistical graphs, in particular for representing complex phenomena by means of maps and diagrams, which allow a deeper and more focused analysis of the data. In our case the geographical representation of stakeholders in government, academics, business and industry aims at visualizing the GL community across the globe: it concerns 674 organizations which over the years have contributed to the development of a common vision on the most pressing issues of the field by using new paradigms such as Open Access and the social networks.}, KEYWORDS = {Geographical Visualization, Grey Literature Communities}, PAGES = {109-113}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2017/04/GL18_Conference_Proceedings.pdf}, VOLUME = {18}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-30-2}, CONFERENCE_NAME = {Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {Washington}, CONFERENCE_DATE = {November 28-29, 2016}, BOOKTITLE = {GL18 Conference Proceedings Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{BELLANDI_2017_INPROCEEDINGS_BBKDM_366612, AUTHOR = {Bellandi, A. and Boschetti, F. and Khan, A. F. and Del Grosso, A. M. and Monachini, M.}, TITLE = {Provando e riprovando modelli di dizionario storico digitale: collegare voci, citazioni, interpretazioni}, YEAR = {2017}, ABSTRACT = {Il dizionario storico è il luogo d'incontro privilegiato di linguistica e lessicografia e filologia e critica letteraria. Nella prima parte prendiamo in considerazione un caso di studio piuttosto noto, relativo all'espressione "provando e riprovando", per mostrare come perfino i luoghi citati nei dizionari, che sono introdotti con lo scopo di disambiguare i termini in contesto, non siano privi di controversie interpretative. Nella seconda parte, molto più dettagliata e più tecnica, tentiamo di aggiungere ai modelli lessicali e citazionali già esistenti ed aperti soluzioni minime che ci permettano di collegare voci, citazioni e interpretazioni all'interno dell'universo dei Linked Open Data.}, KEYWORDS = {Linked Open Data LOD}, PAGES = {119-125}, URL = {http://aiucd2017.aiucd.it/wp-content/uploads/2017/01/book-of-abstract-AIUCD-2017.pdf}, CONFERENCE_NAME = {AIUCD 2017 Conference \& 3rd EADH Day}, CONFERENCE_PLACE = {Roma, Università "Sapienza"}, CONFERENCE_DATE = {24-28 January 2017}, BOOKTITLE = {AIUCD 2017 Conference}, } @INPROCEEDINGS{GOGGI_2017_INPROCEEDINGS_GPRBM_377070, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2017}, ABSTRACT = {This work will provide a map of the documentation archived in the CLARIN infrastructure, whose purpose is to share language resources produced and managed in the various European countries but finally merged into the CLARIN data centers for allowing access, interoperability, reuse and preservation of scientific documentation as well as Grey Literature.}, KEYWORDS = {CLARIN ERIC, Terminological Resources, Grey Literature}, PAGES = {60-62}, URL = {https://publications.cnr.it/doc/377070}, VOLUME = {19}, ISBN = {978-90-77484-32-6}, CONFERENCE_NAME = {Nineteenth International Conference on Grey Literature, GL19}, CONFERENCE_PLACE = {Rome, National Research Council, CNR}, CONFERENCE_DATE = {October 23-24, 2017}, BOOKTITLE = {Nineteenth International Conference on Grey Literature Public Awareness and Access to Grey Literature. Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_382175, AUTHOR = {Monachini, M.}, TITLE = {Discipline umanistiche: vantaggi, opportunità e benefici dell'Infrastruttura di Ricerca CLARIN e del nodo nazionale CLARIN-IT per la comunità italiana}, YEAR = {2017}, ABSTRACT = {L'interesse da parte delle scienze umane e sociali per le tecnologie del linguaggio non è mai stato così attuale come in questo momento storico. Le principali conferenze di Digital Humanities vedono sempre più la partecipazione di linguisti computazionali, mentre nelle conferenze di Trattamento Automatico del Linguaggio (TAL), l'applicazione di soluzioni TAL alle scienze umane e sociali costituisce una tematica che si affianca a quella delle ricadute industriali. Il bisogno di rispondere alle esigenze di una platea di utenti diversa apre nuove prospettive e offre una sfida rilevante per il settore delle tecnologie del linguaggio. I testi da trattare in ambito umanistico possono essere spesso eterogenei per genere, per periodo storico, per tipologia e nuovi tipi di analisi testuale acquistano particolare rilevanza. I software di analisi devono permettere una elaborazione automatica affidabile di tipologie di dati diversi da quelli che comunemente vengono usati nel TAL. La qualità delle risorse, in particolare la qualità dei vari livelli di annotazione acquista maggiore importanza quando queste devono essere usate per fare ricerca. Diventa cruciale sviluppare strumenti facilmente usabili e adattabili a diverse tipologie di contenuto e fornire soluzioni volte facilitare il reperimento e la condivisione di risorse e di tecnologie. E' proprio per rispondere a queste esigenze e per far incontrare chi produce e sviluppa risorse e tecnologie linguistiche con chi le usa, che è stata creata CLARIN (Common Language Resources Infrastructure for Social Sciences and Humanities), l'infrastruttura di ricerca europea per le risorse linguistiche al servizio delle scienze umane e sociali. CLARIN favorisce lo sviluppo di soluzioni tecnologiche volte a rendere le risorse e le tecnologie linguistiche visibili e disponibili per studiosi, ricercatori, studenti e cittadini, attraverso una modalità unificata e standardizzata di accesso. Tale innovazione consente di adottare nuovi e diversi approcci alla disciplina tradizionale determinando, in prospettiva, nuove consuetudini di studio che, sulla base delle buone pratiche lasciate in eredità dalla tradizione precedente, permettono lo sviluppo di una diversa e più attuale metodologia di ricerca e di prassi didattica.}, KEYWORDS = {Digital Humanities, CLARIN-IT}, URL = {https://apps.unive.it/server/eventi/13818/master%202017-2018%2011-2017-1.pdf}, CONFERENCE_NAME = {Università Ca' Foscari. Cerimonia conclusiva Master Digital Humanities}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {3/11/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_382188, AUTHOR = {Monachini, M.}, TITLE = {Infrastrutture di Ricerca e Studi Classici. CLARIN-IT: opportunità e prospettive}, YEAR = {2017}, ABSTRACT = {L'interesse da parte delle scienze umane e sociali per le tecnologie del linguaggio non è mai stato così attuale come in questo momento storico. Le principali conferenze di Digital Humanities vedono sempre più la partecipazione di linguisti computazionali, mentre nelle conferenze di Trattamento Automatico del Linguaggio (TAL), l'applicazione di soluzioni TAL alle scienze umane e sociali costituisce una tematica che si affianca a quella delle ricadute industriali. Il bisogno di rispondere alle esigenze di una platea di utenti diversa apre nuove prospettive e offre una sfida rilevante per il settore delle tecnologie del linguaggio. I testi da trattare in ambito umanistico possono essere spesso eterogenei per genere, per periodo storico, per tipologia e nuovi tipi di analisi testuale acquistano particolare rilevanza. I software di analisi devono permettere una elaborazione automatica affidabile di tipologie di dati diversi da quelli che comunemente vengono usati nel TAL. La qualità delle risorse, in particolare la qualità dei vari livelli di annotazione acquista maggiore importanza quando queste devono essere usate per fare ricerca. Diventa cruciale sviluppare strumenti facilmente usabili e adattabili a diverse tipologie di contenuto e fornire soluzioni volte facilitare il reperimento e la condivisione di risorse e di tecnologie. E' proprio per rispondere a queste esigenze e per far incontrare chi produce e sviluppa risorse e tecnologie linguistiche con chi le usa, che è stata creata CLARIN (Common Language Resources Infrastructure for Social Sciences and Humanities), l'infrastruttura di ricerca europea per le risorse linguistiche al servizio delle scienze umane e sociali. CLARIN favorisce lo sviluppo di soluzioni tecnologiche volte a rendere le risorse e le tecnologie linguistiche visibili e disponibili per studiosi, ricercatori, studenti e cittadini, attraverso una modalità unificata e standardizzata di accesso. Tale innovazione consente di adottare nuovi e diversi approcci alla disciplina tradizionale determinando, in prospettiva, nuove consuetudini di studio che, sulla base delle buone pratiche lasciate in eredità dalla tradizione precedente, permettono lo sviluppo di una diversa e più attuale metodologia di ricerca e di prassi didattica.}, KEYWORDS = {Digital Humanities, CLARIN-IT}, URL = {http://www.clarin-it.it/sites/default/files/documents/UniParma_Workshop_2017_Locandina.pdf}, CONFERENCE_NAME = {DIGITAL HUMANITIES E FILOLOGIA GRECA: risorse e infrastrutture di ricerca applicate allo studio del greco antico}, CONFERENCE_PLACE = {Parma}, CONFERENCE_DATE = {20/11/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_382191, AUTHOR = {Monachini, M.}, TITLE = {Nuove tecnologie e nuovi sviluppi di indagine: CLARIN-IT e alcuni esempi di applicazione allo studio del greco antico}, YEAR = {2017}, ABSTRACT = {l lavoro tradizionale del filologo necessita oggi di una disponibilità sempre più ampia di dati e di testi (letteratura secondaria, bibliografia specifica, fonti primarie), il lavoro del singolo studioso sembra oramai accompagnarsi alla necessità di un team di ricerca che collabori su progetti di ampia scale, quali le edizioni dei testi. Molte delle informazioni indispensabili per il filologo sono oggi (o potrebbero essere) disponibili e maggiormente accessibili grazie all'utilizzo di strumenti informatici, ma spesso si tratta di materiali dispersi e poco connessi tra loro; talora la loro esistenza è persino ignota agli studiosi tradizionali. 2 Il trend dei dati che si registra nella disciplina, grazie alla diffusione del web, con la circolazione di risorse utili per l'analisi e la ricostruzione del testo, fa ripensare al rapporto tra filologia - in ogni suo aspetto - e nuove tecnologie e lascia ampio spazio alle riflessioni metodologiche sui procedimenti d'indagine. Si tratta di far dialogare questi dati e implementarli. Il primo passo riguarda l'individuazione delle opportunità offerte dal settore delle DH in relazione a ogni singola disciplina nella sua specificità e, d'altro lato definire quali siano le esigenze di ciascun singolo settore. Nel fare questo è necessario mantenere alto lo standard sia dello strumento sia del tipo di dati inseriti. Come tratta il testo lo studioso affiancato dall'ausilio delle nuove tecnologie? Cosa trova online? Che bisogni emergono nelle pratiche di uso odierne? Si tenterà di dare una risposta a queste domande con esempi pratici di metodo applicato allo studio - ad esempio - di un autore specifico. Nel contesto verranno inoltre presentate le attività sino ad ora svolte dal gruppo di ricerca. 1) Tramite Survey si sono identificati la pratica d'uso oggi, e i punti di forza e mancanze degli strumenti esistenti. 2) Si sono così definite le esigenze di una comunità specifica e le relative richieste e aspettative. 3) Si è definito un prototipo di strumento che risponda alle esigenze individuate, ora in fase di valutazione. 4) realizzazione, in prospettiva, di uno strumento che possa offrire una piattaforma collaborativa che metta a disposizione i dati (testo, apparato, commento, analisi a diversi livelli, etc.), variamente fruibili, assieme alla possibilità di accedere facilmente a tutti i dati relativi disponibili in rete.}, KEYWORDS = {Digital Humanities, Computational Philology}, URL = {http://www.clarin-it.it/sites/default/files/documents/UniParma_Workshop_2017_Locandina.pdf}, CONFERENCE_NAME = {2° Workshop di Studio Insegnamenti di Storia della Lingua Greca (LT) e Filologia Greca (LM)}, CONFERENCE_PLACE = {Parma}, CONFERENCE_DATE = {1/12/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_429407, AUTHOR = {Monachini, M.}, TITLE = {Digital Humanities and Research Infrastructures: CLARIN and CLARIN-IT}, YEAR = {2017}, ABSTRACT = {La lezione al Corso "Digital Humanities: Web Resources, Tools and Infrastructures" Venice International University (a.a. 2017-2018) ha lo scopo di dimostrare i vantaggi, i benefici e le opportunità offerte da una infrastruttura di ricerca come CLARIN-ERIC per rispondere ai quesiti di ricerca e le sfide nel settore delle Digital Humanities.}, KEYWORDS = {digital humanities, research infrastructures, data deluge}, URL = {https://publications.cnr.it/doc/429407}, CONFERENCE_NAME = {Course "Digital Humanities: Web Resources, Tools and Infrastructures" Venice International University}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {4/12/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_MNS_375982, AUTHOR = {Monachini, M. and Nicolosi, A. and Stefanini, A.}, TITLE = {Digital Classics: A Survey of the Needs of Ancient Greek Scholars in Italy}, YEAR = {2017}, ABSTRACT = {This paper presents and discusses the findings of a survey carried out in order to assess the use of digital resources and digital technologies with respect to work in Ancient Greek scholarship, as well as to identify the factors that are likely to constrain its use and to elicit needs and requirements of Ancient Greek scholars in Italy. The survey is in line with the principles behind the recent user engagement strategy developed by CLARIN-ERIC and constitutes one of the national efforts undertaken by CLARIN-IT to contribute to the wider impact of CLARIN on Digital Classicists.}, KEYWORDS = {CLARIN-ERIC, CLARIN-IT, CLARIN on Digital Classicists}, PAGES = {5}, URL = {https://www.clarin.eu/event/2017/clarin-annual-conference-2017-budapest-hungary}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {18-20 September, 2017}, } @INPROCEEDINGS{NICOLAS_2017_INPROCEEDINGS_NKMDCAEBQ_375984, AUTHOR = {Nicolas, L. and Konig, A. and Monachini, M. and Del Gratta, R. and Calamai, S. and Abel, A. and Enea, A. and Biliotti, F. and Quochi, V.}, TITLE = {CLARIN-IT: State of Affairs, Challenges and Opportunities}, YEAR = {2017}, ABSTRACT = {This paper provides an overview on the Italian national CLARIN consortium and the status of CLARIN-IT in general. It thus discusses the current state of affairs of the consortium and provi-des information on the members, especially with regards to what they offer to CLARIN in terms of resources, services and expertise, and what CLARIN offers them to further their own research.}, KEYWORDS = {Italian CLARIN consortium, CLARIN-IT}, PAGES = {4}, URL = {https://www.clarin.eu/event/2017/clarin-annual-conference-2017-budapest-hungary}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {18-20 September, 2017}, } @ARTICLE{GOGGI_2016_ARTICLE_GPBFMMDB_359144, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2016}, ABSTRACT = {Here we present the final results of the MAPS (Marine Planning and Service Platform) project, an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. The system takes as input non-textual data (measurements) and text - both published papers and documentation - and it provides an advanced search facility thanks to the rich set of metadata and, above all, to the possibility of a refined and domain targeted key-word indexing of texts using Natural Language Processing (NLP) techniques. The paper describes the system in its details providing also evidence of evaluation.}, KEYWORDS = {Information Extraction, Search Engine, Operative Oceanography}, PAGES = {155-161}, URL = {http://www.greynet.org/thegreyjournal/currentissue.html}, VOLUME = {12}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{MONACHINI_2016_ARTICLE_MF_373630, AUTHOR = {Monachini, M. and Frontini, F.}, TITLE = {CLARIN, l'infrastruttura europea delle risorse linguistiche per le scienze umane e sociali e il suo network italiano CLARIN-IT}, YEAR = {2016}, ABSTRACT = {ll 1°ottobre 2015 il MIUR firma l'adesione dell'Italia a CLARIN-ERIC, l'infrastruttura di ricerca che offre risorse e tecnologie linguistiche dedicate al settore delle scienze del linguaggio e delle scienze umane e sociali. Questo articolo intende fornire alla comunità italiana una ampia panoramica di CLARIN, la sua missione, i suoi pilastri, i servizi, la sua organizzazione tecnica ed amministrativa e la struttura di governance, sia a livello europeo che locale. Viene introdotto il network italiano, con il primo centro nazionale ILC4CLARIN, ospitato ed in via di sviluppo presso l'ILC-CNR, le funzionalità, le risorse ed i servizi offerti; viene presentato infine il primo nucleo del consorzio nazionale CLARIN-IT, illustrando i criteri di costituzione, le attività previste e le prospettive future.}, KEYWORDS = {Infrastrutture di ricerca, Tecnologie linguistiche, Network italiano CLARIN-IT}, PAGES = {1-30}, URL = {http://www.ai-lc.it/IJCoL/v2n2/1-monachini_and_frontini.pdf}, VOLUME = {Vol. 2}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{REHM_2016_ARTICLE_RUABBBBBCDGGGVHHJKKKLMMMMMMOOPPPRRPSDTTTVVVZ_355592, AUTHOR = {Rehm, G. and Uszkoreit, H. and Ananiadou, S. and Bel, N. and Bieleviciene, A. and Borin, L. and Branco, A. and Budin, G. and Calzolari, N. and Daelemans, W. and Garabik, R. and Grobelnik, M. and Garcia Mateo, C. and Van Genabith, J. and Hajic, J. and Hernaez, I. and Judge, J. and Koeva, S. and Krek, S. and Krstev, C. and Linden, K. and Magnini, B. and Mariani, J. and McNaught, J. and Melero, M. and Monachini, M. and Moreno, A. and Odijk, J. and Ogrodniczuk, M. and Pezik, P. and Piperidis, S. and Przepiorkowski, A. and Rognvaldsson, E. and Rosner, M. and Pedersen, B. S. and Skadina, I. and De Smedt, K. and Tadic, M. and Thompson, P. and Tufis, D. and Varadi, T. and Vasiljevs, A. and Vider, K. and Zabarskaite, J.}, TITLE = {The strategic impact of META-NET on the regional, national and international level}, YEAR = {2016}, ABSTRACT = {This article provides an overview of the dissemination work carried out in META-NET from 2010 until 2015; we describe its impact on the regional, national and international level, mainly with regard to politics and the funding situation for LT topics. The article documents the initiative's work throughout Europe in order to boost progress and innovation in our field.}, KEYWORDS = {Language technology, Multilingual technologies, Machine translation, Language resources, META-NET, META-SHARE}, PAGES = {351-374}, URL = {http://link.springer.com/article/10.1007/s10579-015-9333-4}, VOLUME = {50}, DOI = {10.1007/s10579-015-9333-4}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{RHEM_2016_ARTICLE_RUCM_344298, AUTHOR = {Rhem, G. and Uzkoreit, H. and Calzolari, N. and Monachini, M.}, TITLE = {The strategic impact of META-NET on the regional, national and international level}, YEAR = {2016}, ABSTRACT = {This article provides an overview of the dissemination work carried out in META-NET from 2010 until 2015; we describe its impact on the regional, national and international level, mainly with regard to politics and the funding situation for LT topics. The article documents the initiative's work throughout Europe in order to boost progress and innovation in our field.}, KEYWORDS = {Language technology, Multilingual technologies, Machine translation, Language resources, META-NET, META-SHARE}, PAGES = {26}, URL = {http://www.springer.com/home?SGWID=0-0-1003-0-0\&aqId=2981193\&download=1\&checkval=6c0c2a6da36ef097f2a5e48a49f794e4}, DOI = {10.1007/s10579-015-9333-4}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @INCOLLECTION{DELGRATTA_2016_INCOLLECTION_DBDKM_353799, AUTHOR = {Del Gratta, R. and Boschetti, F. and Del Grosso, A. and Khan, F. and Monachini, M.}, TITLE = {Cooperative philology on the way to web services: The case of the cophiwordnet platform}, YEAR = {2016}, ABSTRACT = {In this paper we present ongoing research carried out at the Institute for Computational Linguistics "A. Zampolli" (ILC) in Pisa. The institute has been active since many years in the field of Digital Humanities providing resources, tools and solutions to address issues of the to digital humanists. Starting from those previous initiatives, we show how to re-engineer them as Web Services in order to make connections between lexicons, semantic resources and a fine grained text management. Linked Open Data is chosen as the paradigm used to link the different resources as well as the modality of data presentation.}, KEYWORDS = {Canonical text services, Cooperative philology, Linked open data, Web services}, PAGES = {173-187}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84961744725\&partnerID=q2rCbXpz}, VOLUME = {9442}, DOI = {10.1007/978-3-319-31468-6_13}, PUBLISHER = {Springer International Publishing (Switzerland, CHE)}, ISBN = {978-3-319-31468-6}, BOOKTITLE = {Worldwide Language Service Infrastructure: Second International Workshop, WLSI 2015, Kyoto, Japan, January 22-23, 2015. Revised Selected Papers}, EDITOR = {Murakami, Y. and Li, D.}, } @INCOLLECTION{FRONTINI_2016_INCOLLECTION_FDM_357638, AUTHOR = {Frontini, F. and Del Gratta, R. and Monachini, M.}, TITLE = {GeoDomainWordNet: Linking the Geonames Ontology to WordNet}, YEAR = {2016}, ABSTRACT = {This paper illustrates the transformation of GeoNames' ontology concepts, with their English labels and glosses, into a GeoDomain WordNet-like resource in English, its translation into Italian, and its linking to the existing generic WordNets of both languages. The paper describes the criteria used for the linking of domain synsets to each other and to the generic ones and presents the published resource in RDF according to the w3c and lemon schema.}, KEYWORDS = {GeoNames, WordNet, Language resources, Lexicons, Linguistic linked data, lemon, RDF}, PAGES = {229-242}, URL = {http://link.springer.com/chapter/10.1007/978-3-319-43808-5_18}, VOLUME = {9561}, DOI = {10.1007/978-3-319-43808-5}, ISBN = {978-3-319-43808-5}, BOOKTITLE = {Human Language Technology. Challenges for Computer Science and Linguistics}, EDITOR = {Vetulani, Z. and Uszkoreit, H. and Kubis, M.}, } @INPROCEEDINGS{DELGRATTA_2016_INPROCEEDINGS_DFMPRBKSC_355425, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Khan, F. and Soria, C. and Calzolari, N.}, TITLE = {LREC as a Graph: People and Resources in a Network}, YEAR = {2016}, ABSTRACT = {This proposal describes a new way to visualise resources in the LREMap, a community-built repository of language resource descriptions and uses. The LREMap is represented as a force-directed graph, where resources, papers and authors are nodes. The analysis of the visual representation of the underlying graph is used to study how the community gathers around LRs and how LRs are used in research.}, KEYWORDS = {Language Resources, Resources Documentation, Data Visualisation}, PAGES = {2529-2532}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{GOGGI_2016_INPROCEEDINGS_GPBFMMDB_350374, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2016}, ABSTRACT = {Here we present the final results of the MAPS (Marine Planning and Service Platform) project, an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. The system takes as input non-textual data (measurements) and text - both published papers and documentation - and it provides an advanced search facility thanks to the rich set of metadata and, above all, to the possibility of a refined and domain targeted key-word indexing of texts using Natural Language Processing (NLP) techniques. The paper describes the system in its details providing also evidence of evaluation.}, KEYWORDS = {Information Extraction, Search Engine, Operative Oceanography}, PAGES = {104-111}, URL = {https://publications.cnr.it/doc/350374}, VOLUME = {17}, ISBN = {978-90-77484-27-2}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {December 1st-2nd 2015}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KBM_366525, AUTHOR = {Khan, A. F. and Bellandi, A. and Monachini, M.}, TITLE = {Tools and Instruments for Building and Querying Diachronic Computational Lexica}, YEAR = {2016}, ABSTRACT = {This article describes work on enabling the addition of temporal information to senses of words in linguistic linked open data lexica based on the lemonDia model. Our contribution in this article is twofold. On the one hand, we demonstrate how lemonDia enables the querying of diachronic lexical datasets using OWL-oriented Semantic Web based technologies. On the other hand, we present a preliminary version of an interactive interface intended to help users in creating lexical datasets that model meaning change over time.}, KEYWORDS = {OWL-oriented Semantic Web based technologies}, PAGES = {164-171}, URL = {https://www.clarin-d.net/images/lt4dh/pdf/LT4DH22.pdf}, ISBN = {978-4-87974-708-2}, CONFERENCE_NAME = {Language Technology Resources and Tools for Digital Humanities (LT4DH 2016)}, CONFERENCE_PLACE = {Osaka, Japan}, CONFERENCE_DATE = {December 11-16, 2016}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KDM_355470, AUTHOR = {Khan, F. and Díaz Vera, J. E. and Monachini, M.}, TITLE = {Representing Polysemy and Diachronic Lexico-Semantic Data on the Semantic Web}, YEAR = {2016}, ABSTRACT = {In this article we will outline two different vocabularies, both extensions of the lemon model, for representing diachronic lexico-semantic data on the Semantic Web. This is especially useful for repre-senting the evolution of scientific terminologies where many terms are polysemous and or imported from other languages. The first vocabulary, polyLemon , allows for the representation of data about polysemy; the second, lemonDIA the representation of meaning shift over time.}, KEYWORDS = {Language Resources, Resource Data Framework (RDF)}, PAGES = {37-45}, URL = {http://ceur-ws.org/Vol-1595/paper4.pdf}, VOLUME = {1595}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Second International Workshop on Semantic Web for Scientific Heritage co-located with 13th Extended Semantic Web Conference (ESWC 2016)}, CONFERENCE_PLACE = {Heraklion, Greece}, CONFERENCE_DATE = {May 30th, 2016}, BOOKTITLE = {SWASH 2016 Semantic Web for Scientific Heritage Proceedings of the Second International Workshop on Semantic Web for Scientific Heritage co-located with 13th Extended Semantic Web Conference (ESWC 2016)}, EDITOR = {Draelants, I. and Zucker, C. F. and Monnin, A. and Zucker, A.}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KDM_355442, AUTHOR = {Khan, F. and Díaz Vera, J. and Monachini, M.}, TITLE = {The Representation of an Old English Emotion Lexicon as Linked Open Data}, YEAR = {2016}, ABSTRACT = {We present the ongoing conversion of a lexicon of emotion terms in Old English (OE) into RDF using an extension of lemon called lemonDIA and which we briefly describe. We focus on the translation of the subset of the lexicon dealing with terms for shame and guilt and give a number of illustrative example.}, KEYWORDS = {Linguistic Linked Open Data, Old English, Lexicon}, PAGES = {73-76}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, BOOKTITLE = {LDL 2016 5th Workshop on Linked Data in Linguistics: Managing, Building and Using Linked Language Resources}, EDITOR = {McCrae, J. P. and Chiarcos, C. and Ponsoda, E. M. and Declerck, T. and Osenova, P. and Hellmann, S.}, } @INPROCEEDINGS{NAHLI_2016_INPROCEEDINGS_NFMKZK_355436, AUTHOR = {Nahli, O. and Frontini, F. and Monachini, M. and Khan, F. and Zarghili, A. and Khalfi, M.}, TITLE = {Al Qamus al Muhit, a Medieval Arabic Lexicon in LMF}, YEAR = {2016}, ABSTRACT = {This paper describes the conversion into LMF, a standard lexicographic digital format of 'al-q?m?s al-mu???, a Medieval Arabic lexicon. The lexicon is first described, then all the steps required for the conversion are illustrated. The work is will produce a useful lexicographic resource for Arabic NLP, but is also interesting per se, to study the implications of adapting the LMF model to the Arabic language. Some reflections are offered as to the status of roots with respect to previously suggested representations. In particular, roots are, in our opinion are to be not treated as lexical entries, but modeled as lexical metadata for classifying and identifying lexical entries. In this manner, each root connects all entries that are derived from it.}, KEYWORDS = {Arabic Lexicon, LMF, Al Qamus al Muhi}, PAGES = {943-950}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{RUSSO_2016_INPROCEEDINGS_RM_367412, AUTHOR = {Russo, I. and Monachini, M.}, TITLE = {Samskara minimal structural features for detecting subjectivity and polarity in Italian tweets}, YEAR = {2016}, ABSTRACT = {Sentiment analysis classification tasks strongly depend on the properties of the medium that is used to communicate opinionated content. There are some limitations in Twitter that force the user to exploit structural properties of this social network with features that have pragmatic and communicative functions. Samskara is a system that uses minimal structural features to classify Italian tweets as instantiations of a textual genre, obtaining good results for subjectivity classification, while polarity classification needs substantial improvements.}, KEYWORDS = {sentiment analysis, twitter}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009270160\&origin=inward}, VOLUME = {1749}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop EVALITA 2016}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {7/12/2016}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{ARRIGONI_2016_INPROCEEDINGS_AKMB_363708, AUTHOR = {Arrigoni, S. and Khan, F. and Monachini, M. and Boschetti, F.}, TITLE = {Misurare Memorata Poetis: prime statistiche}, YEAR = {2016}, KEYWORDS = {intertestualità, temi e motivi}, PAGES = {151-155}, URL = {http://www.himeros.eu/aiucd2016/c47.pdf}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/9/2016}, BOOKTITLE = {AIUCD 2016-Book of Abstracts}, EDITOR = {Boschetti, F.}, } @INPROCEEDINGS{MANZELLA_2016_INPROCEEDINGS_MBBDDFMMMNS_355476, AUTHOR = {Manzella, G. M. R. and Bartolini, R. and Bustaffa, F. and D'Angelo, P. and De Mattei, M. and Frontini, F. and Maltese, M. and Medone, D. and Monachini, M. and Novellino, A. and Spada, A.}, TITLE = {Marine Planning and Service Platform: Specific Ontology Based semantic Search Engine Serving Data Management and Sustainable Development}, YEAR = {2016}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is aiming at building a computer platform supporting a Marine Information and Knowledge System. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. In oceanography the cost related to data collection is very high and the new paradigm is based on the concept to collect once and re-use many times (for re-analysis, marine environment assessment, studies on trends, etc). This concept requires the access to quality controlled data and to information that is provided in reports (grey literature) and/or in relevant scientific literature. Hence, creation of new technology is needed by integrating several disciplines such as data management, information systems, knowledge management...}, KEYWORDS = {Marine Information, Knowledge System}, PAGES = {2}, URL = {http://meetingorganizer.copernicus.org/EGU2016/orals/20144}, VOLUME = {18}, PUBLISHER = {Copernicus GmbH (Katlenburg-Lindau, Germania)}, ISSN = {1607-7962}, CONFERENCE_NAME = {European Geosciences Union General Assembly (EGU 2016)}, CONFERENCE_PLACE = {Vienna, Austria}, CONFERENCE_DATE = {17-22 aprile 2016}, BOOKTITLE = {Geophysical research abstracts (Online)}, } @INPROCEEDINGS{MONACHINI_2016_INPROCEEDINGS_M_368274, AUTHOR = {Monachini, M.}, TITLE = {CLARIN-IT The Italian Common Language Resources and Technology Infrastructure CLARIN-IT: l'infrastruttura di ricerca per le scienze umane e sociali}, YEAR = {2016}, ABSTRACT = {The CLARIN-IT National Coordinator presented a keynote CLARIN-IT, l'Infrastruttura di Ricerca per le Scienze Umane e Sociali, in the 5th Annual Conference of the Associazione per l'Informatica Umanistica e la Cultura Digitale (AIUCD) held in Venezia from 7th to 9th September 2016. It is time for research infrastructures to be able to guarantee interoperability and integration between the instruments for philological studies and the instruments for the analysis of large textual corpora, breaking down the rigid barriers between digital and computational philology, on the one hand, and corpus linguistics on the other hand. Programma: https://docs.google.com/viewer?a=v\&pid=sites\&srcid=dW5pdmUuaXR8YWl1Y2QyMDE2fGd4OjIyMDhhMzk2ODk0MjUyNDQ}, KEYWORDS = {CLARIN-IT, scienze umane e sociali}, URL = {http://www.clarin-it.it/en/content/clarin-it-aiucd-2016}, CONFERENCE_NAME = {5th Annual Conference of the Associazione per l'Informatica Umanistica e la Cultura Digitale (AIUCD)}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {7th to 9th September 2016}, } @INPROCEEDINGS{MONACHINI_2016_INPROCEEDINGS_M_382195, AUTHOR = {Monachini, M.}, TITLE = {Infrastrutture e ricerca nel settore umanistico}, YEAR = {2016}, ABSTRACT = {L'informatica applicata allo studio del testo ha una lunga storia che parte dagli anni '50 dello scorso secolo. Nel corso del tempo allo sviluppo di risorse e strumenti prevalentemente pensati per l'analisi linguistica, come ad esempio la lemmatizzazione, si sono affiancati metodi, risorse e strumenti più squisitamente filologici, come la codifica delle varianti, i repertori digitali di molteplici edizioni del medesimo testo e gli strumenti per l'allineamento automatico delle stesse. Tuttavia la conoscenza di queste tecniche ed il loro utilizzo è ancora piuttosto limitato nel panorama degli studi italiani di filologia classica, nonostante alcune acquisizioni concettuali e tecniche di prima grandezza nel settore siano dovute a studiosi italiani. Il workshop si propone di discutere temi e problematiche attinenti la filologia digitale sulla scorta di una rassegna del settore iniziata all'università di Parma con la collaborazione del CNR-ILC di Pisa: o Motivazioni per l'adozione di tecniche di filologia digitale nel campo della ricerca e dell'insegnamento. Barriere al loro utilizzo. o I risultati di una rassegna sull'impiego di tecniche di filologia digitale da parte di studiosi italiani sulla base di un questionario o Diffusione della conoscenza del settore in Italia o Correnti e future tematiche di ricerca}, KEYWORDS = {Digital Humanities, Computational Philology}, URL = {https://publications.cnr.it/doc/382195}, CONFERENCE_NAME = {Utilizzo e diffusione di metodi, strumenti e tecnologie digitali per gli studi filologici: l'applicazione della filologia digitale al greco antico}, CONFERENCE_PLACE = {Parma}, CONFERENCE_DATE = {10. 10. 2016}, } @INPROCEEDINGS{MONACHINI_2016_INPROCEEDINGS_MEF_368272, AUTHOR = {Monachini, M. and Enea, A. and Frontini, F.}, TITLE = {CLARIN-IT: servizi per la comunità italiana delle scienze umane e sociali}, YEAR = {2016}, ABSTRACT = {CLARIN-IT -The Italian Common Language Resources and Technology Infrastructure: Monica Monachini - CLARIN Italian National Coordinator Alessandro Enea - Responsible of ILCforCLARIN \& contact person for IDEM Francesca Frontini - Standing Committee for CLARIN Technical Centres (SCCTC) ILC-CNR National Representative}, KEYWORDS = {CLARIN-IT, The Italian Common Language Resources and Technology Infrastructure}, URL = {http://www.clarin-it.it/en/content/clarin-it-idem-day-2016}, CONFERENCE_NAME = {CLARIN-IT @ IDEM Day 2016}, CONFERENCE_PLACE = {Roma [Università degli Studi di Roma Tre]}, CONFERENCE_DATE = {6-8 giugno 2016}, } @INPROCEEDINGS{PARDELLI_2016_INPROCEEDINGS_PGMBR_362073, AUTHOR = {Pardelli, G. and Goggi, S. and Monachini, M. and Bartolini, R. and Russo, I.}, TITLE = {A Geographical Visualization of GL Community: a Snapshot}, YEAR = {2016}, ABSTRACT = {"Today, in the spirit of science, grey literature communities are called to demonstrate their know-how and merit to wider audiences" [Farace Dominic J., 2011]. This quotation stresses the important role of the several international organizations in producing and disseminating knowledge in the field of Grey Literature (GL): the paper aims to provide a first snapshot of the geographical distribution of GL organizations and their participation to the annual International Conference on Grey Literature over the time (in the period from 2003 to 2015). Nowadays a visual representation of data is often associated with the traditional statistical graphs, in particular for representing complex phenomena by means of maps and diagrams, which allow a deeper and more focused analysis of the data. In our case the geographical representation of stakeholders in government, academics, business and industry aims at visualizing the GL community across the globe: it concerns 675 organizations which over the years have contributed to the development of a common vision on the most pressing issues of the field by using new paradigms such as Open Acces and the social networks.}, KEYWORDS = {Geographical Visualization, Grey Literature}, PAGES = {67-67}, URL = {https://publications.cnr.it/doc/362073}, VOLUME = {18}, ISBN = {978-90-77484-29-6}, CONFERENCE_NAME = {Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {New York}, CONFERENCE_DATE = {November 28-29, 2016}, BOOKTITLE = {GL18 Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @ARTICLE{DELGRATTA_2015_ARTICLE_DFKM_287051, AUTHOR = {Del Gratta, R. and Frontini, F. and Khan, F. and Monachini, M.}, TITLE = {Converting the PAROLE SIMPLE CLIPS Lexicon into RDF with lemon}, YEAR = {2015}, ABSTRACT = {This paper describes the publication and linking of (parts of) PAROLE SIMPLE CLIPS (PSC), a large scale Italian lexicon, to the Semantic Web and the Linked Data cloud using the lemon model. The main challenge of the conversion is discussed, namely the reconciliation between the PSC semantic structure which contains richly encoded semantic information, following the qualia structure of the Generative Lexicon theory and the lemon view of lexical sense as a reified pairing of a lexical item and a concept in an ontology. The result is two datasets: one consists of a list of lemon lexical entries with their lexical properties, relations and senses; the other consists of a list of OWL individuals representing the referents for the lexical senses. These OWL individuals are linked to each other by a set of semantic relations and mapped onto the SIMPLE OWL ontology of higher level semantic types.}, KEYWORDS = {lemon, linked data, generative lexicon, RDF, OWL, lexical resource}, PAGES = {387-392}, URL = {http://www.semantic-web-journal.net/content/converting-parole-simple-clips-lexicon-rdf-lemon-0}, VOLUME = {6}, DOI = {10.3233/SW-140168}, PUBLISHER = {IOS Press (Amsterdam, Paesi Bassi)}, ISSN = {1570-0844}, JOURNAL = {Semantic web (Print)}, } @ARTICLE{GOGGI_2015_ARTICLE_GMFBPDBM_334894, AUTHOR = {Goggi, S. and Monachini, M. and Frontini, F. and Bartolini, R. and Pardelli, G. and De Mattei, M. and Bustaffa, F. and Manzella, G.}, TITLE = {Marine Planning and Service Platform (MAPS) An Advanced Research Engine for Grey Literature in Marine Science}, YEAR = {2015}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting a Marine Information and Knowledge System, as part of the data management activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. We will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced search engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the great impact that the processing, re-use as well as application of grey data have on societal needs/problems and their answers.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {171-178}, URL = {https://publications.cnr.it/doc/334894}, VOLUME = {11}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @INPROCEEDINGS{DELGRATTA_2015_INPROCEEDINGS_DFMPRBGKQSC_342213, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Goggi, S. and Khan, F. and Quochi, V. and Soria, C. and Calzolari, N.}, TITLE = {Visualising Italian Language Resources: a Snapshot}, YEAR = {2015}, ABSTRACT = {This paper aims to provide a first snapshot of Italian Language Resources (LRs) and their uses by the community, as documented by the papers presented at two different conferences, LREC2014 and CLiC-it 2014. The data of the former were drawn from the LOD version of the LRE Map, while those of the latter come from manually analyzing the proceedings. The results are presented in the form of visual graphs and confirm the initial hypothesis that Italian LRs require concrete actions to enhance their visibility.}, KEYWORDS = {Italian Language Resources}, PAGES = {100-104}, URL = {https://books.openedition.org/aaccademia/1277?lang=it}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 December 2015}, BOOKTITLE = {Proceedings of the Second Italian Conference on Computational Linguistics CLiC-it 2015}, EDITOR = {Bosco, C. and Tonelli, S. and Zanzotto, F. M.}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FQM_304304, AUTHOR = {Frontini, F. and Quochi, V. and Monachini, M.}, TITLE = {Generative Lexicon and polysemy: inducing logical alternations}, YEAR = {2015}, ABSTRACT = {The current paper brings together the results of a series of experiments for inducing regular sense alternations, or regular/ logical polysemy, from a computational lexicon based on the Generative Lexicon theory. The results are discussed in light of the potential benefits and uses of the amended algorithm.}, KEYWORDS = {Polysemy, Generative Lexicon, Logical Alternations}, PAGES = {7}, URL = {https://publications.cnr.it/doc/304304}, PUBLISHER = {MAPLEX2015 Multiple Approaches to Lexicon Conference (Yamagata, JPN)}, CONFERENCE_NAME = {MAPLEX2015 Multiple Approaches to Lexicon Conference}, CONFERENCE_PLACE = {Yamagata, Japan}, CONFERENCE_DATE = {February 9-10, 2015}, EDITOR = {Hsieh, S. and Kanzaki, K.}, } @INPROCEEDINGS{GOGGI_2015_INPROCEEDINGS_GMFBPDBM_329370, AUTHOR = {Goggi, S. and Monachini, M. and Frontini, F. and Bartolini, R. and Pardelli, G. and De Mattei, M. and Bustaffa, F. and Manzella, G.}, TITLE = {Marine Planning and Service Platform (MAPS): An Advanced Research Engine for Grey Literature in Marine Science}, YEAR = {2015}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting a Marine Information and Knowledge System, as part of the data management activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. We will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced search engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the great impact that the processing, re-use as well as application of grey data have on societal needs/problems and their answers.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {108-114}, URL = {http://www.textrelease.com/gl16program.html}, VOLUME = {16}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-23-4}, CONFERENCE_NAME = {Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change}, CONFERENCE_PLACE = {Library of Congress Washington D. C., USA}, CONFERENCE_DATE = {December 8-9 2014}, BOOKTITLE = {Grey Literature Lobby: Engines and Requesters for Change}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{RUSSO_2015_INPROCEEDINGS_RCM_332590, AUTHOR = {Russo, I. and Caselli, T. and Monachini, M.}, TITLE = {Extracting and Visualising Biographical Events from Wikipedia}, YEAR = {2015}, ABSTRACT = {This work presents a proposal for the development of a natural language processing module for event and temporal analysis of biographies as available in Wikipedia. At the current level of development, we restricted the extraction to temporally anchored events as they represent salient information which can be further used to extract additional events and facilitate their chronological ordering and the representation of a person's timeline. Visualising data about basic facts concerning groups of people helps with historical reasoning and enables comparisons among them.}, KEYWORDS = {mining biographies for structured information, visualising biographical data, temporal information}, PAGES = {111-115}, URL = {http://ceur-ws.org/Vol-1399/paper17.pdf}, VOLUME = {Vol-1399}, CONFERENCE_NAME = {BD2015 Biographical Data in a Digital World 2015}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {April 9, 2015}, BOOKTITLE = {BD2015 Biographical Data in a Digital World 2015}, EDITOR = {Braake, S. T. and Fokkens, A. and Sluijter, R. and Declerck, T. and Wandl Vogt, E.}, } @INPROCEEDINGS{BOSCHETTI_2015_INPROCEEDINGS_BDDMDN_295474, AUTHOR = {Boschetti, F. and Del Gratta, R. and Del Grosso, A. and Monachini, M. and Diakoff, H. and Nahli, O.}, TITLE = {Collaborative Philology on the way to Web Services: the case of CoPhiWordnet}, YEAR = {2015}, ABSTRACT = {Starting from previous initiatives of the CoPhiLab, we show how they can be reinterpreted as Web Services, especially when they become part of a wider scenario: Web Services are used to make connections between lexicons, semantic resources and a fine grained text management. Linked Open Data is chosen to be the paradigm used to link the dierent resources, but also as the modality of data presentation.}, KEYWORDS = {Collaborative Philology, Web Services, Linked Open Data, Text Services, Text Interpretation}, URL = {http://langrid.org/wlsi2015/program.html}, CONFERENCE_NAME = {The Second International Workshop on Worldwide Language Service Infrastructure, WLSI 2015}, CONFERENCE_PLACE = {Kyoto}, CONFERENCE_DATE = {22-23rd January 2015}, } @INPROCEEDINGS{GOGGI_2015_INPROCEEDINGS_GPBFMMDB_342221, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2015}, ABSTRACT = {Here we present the final results of MAPS (Marine Planning and Service Platform), an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. In previous publications the general architecture of the system as well as the set of metadata (Common Data Index) used to describe the documents were presented [3]; it was shown how individual oceanographic data-sets could be indexed within the MAPS library by types of measure, measurement tools, geographic areas, and also linked to specific textual documentation. Documentation is described using the current international standards: Title, Authors, Publisher, Language, Date of publication, Body/Institution, Abstract, etc.; serial publications are described in terms of ISSN, while books are assigned ISBN; content of various types on electronic networks is described by means of doi and url. Each description is linked to the document. Thanks to this, the MAPS library already enables researchers to go from structured oceanographic data to documents describing it. But this was not enough: documents may contain important information that has not been encoded in the metadata. Thus an advanced Search Engine was put in place that uses semantic-conceptual technologies in order to extract key concepts from unstructured text such as technical documents (reports and grey literature) and scientific papers and to make them indexable and searchable by the end user in the same way as the structured data (such as oceanographic observations and metadata) is. More specifically once a document is uploaded in the MAPS library, key domain concepts in documents are extracted via a natural language processing pipeline and used as additional information for its indexing. The key term identification algorithm is based on marine concepts that were pre-defined in a domain ontology, but crucially it also allows for the discovery of new related concepts. So for instance starting from the domain term salinity, related terms such as sea salinity and average sea salinity will also be identified as key terms and used for indexing and searching documents. A hybrid search system is then put in place, where users can search the library by metadata or by free text queries. In the latter case, the NLP pipeline performs an analysis of the text of the query, and when key concepts are matched, the relevant documents are presented. The results may be later refined by using other structured information (e.g. date of publication, area, ...). Currently a running system has been put in place, with data from satellites, buoys and sea stations; such data is documented and searchable by its relevant metadata and documentation. Results of quantitative evaluation in terms of information retrieval measures will be presented in the poster; more specifically, given an evaluation set defined by domain experts and composed of pre-defined queries together with documents that answer such queries, it will be shown how the system is highly accurate in retrieving the correct documents from the library. Though this work focuses on oceanography, its results may be easily extended to other domains; more generally, the possibility of enhancing the visibility and accessibility of grey literature via its connection to the data it describes and to an advanced full text indexing are of great relevance for the topic of this conference.}, KEYWORDS = {Information Extraction, Search Engine, Oceanography}, PAGES = {76-77}, URL = {https://publications.cnr.it/doc/342221}, VOLUME = {17}, ISBN = {978-90-77484-26-5}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {December 1-2}, BOOKTITLE = {GL17 Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @ARTICLE{SORIA_2014_ARTICLE_SCMQBCMOP_285553, AUTHOR = {Soria, C. and Calzolari, N. and Monachini, M. and Quochi, V. and Bel, N. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {The language resource Strategic Agenda: the FLaReNet synthesis of community recommendations}, YEAR = {2014}, ABSTRACT = {The main purpose of this paper is to serve as a landmark for future research and in particular for future strategic, infrastructural and coordination initiatives. It presents a preliminary plan for actions and infrastructures that could become the basis for future initiatives in the sector of Language Resources and Technologies (LRTs). The FLaReNet Language Resource Strategic Agenda presents a set of recommen- dations for the development and progress of LRT in Europe, as issued from a three- year consultation of the FLaReNet European project. Recommendations cover a broad range of topics and activities, spanning over production and use of language resources, licensing, maintenance and preservation issues, infrastructures for language resour- ces, resource identification and sharing, evaluation and validation, interoperability and policy issues. The intended recipients belong to a large set of players and stakeholders in LRT, ranging from individuals to research and education institutions, to policy- makers, funding agencies, SMEs and large companies, service and media providers}, KEYWORDS = {Strategic agenda, Language resources planning, Recommended priority actions}, PAGES = {753-775}, URL = {https://publications.cnr.it/doc/285553}, VOLUME = {48}, DOI = {10.1007/s10579-014-9279-y}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @INCOLLECTION{CALZOLARI_2014_INCOLLECTION_CNMQST_286868, AUTHOR = {Calzolari and Nicoletta and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Lexicons, Terminologies, Ontologies: Reflections from Experiences in Resource Construction}, YEAR = {2014}, ABSTRACT = {This contribution aims at highlighting the strong interconnection between lexicons, terminologies and ontologies and especially the fundamental role that ontologies and lexica mutually play. Our view is that lexical resources are evolving in nature, from ontologically based lexicons we are going towards lexically based ontologies. We explore different instantiations of the current trend of using formal ontologies as a core module of computational lexicons, presenting the advantages especially in multilingual and terminological contexts. We present work showing that the lexical knowledge already present in non formal computational lexicons can be exploited to derive or enrich a formal ontology without much manual effort. In the terminology domain, we describe the construction of a resource for biology, directly linked to a parallel domain-ontology, that combines characteristics of both lexicons and terminologies, so that is can allow for intelligent access to content. Finally, we describe our experience in two projects in which formal ontologies play a central role in the context of multilingual computational lexicons, where the ontology is what acts as the glue among the different monolingual lexicons and what provides cross-lingual reasoning capabilities.}, KEYWORDS = {Computational Lexicons, Ontology, Terminology, Interoperability, Standards}, PAGES = {103-121}, URL = {http://www.springer.com/computer/ai/book/978-3-642-45326-7}, VOLUME = {8003}, DOI = {10.1007/978-3-642-45327-4_7}, PUBLISHER = {Springer (Berlin Heidelberg, DEU)}, ISBN = {978-3-642-45326-7}, BOOKTITLE = {Language, Culture, Computation. Computational Linguistics and Linguistics. Essays Dedicated to Yaacov Choueka on the Occasion of His 75th Birthday, Part III}, EDITOR = {Dershowitz, N. and Nissan, E.}, } @INPROCEEDINGS{ANTICO_2014_INPROCEEDINGS_AQMM_286882, AUTHOR = {Antico, G. and Quochi, V. and Monachini, M. and Martinelli, M.}, TITLE = {Marrying Technical Writing with LRT}, YEAR = {2014}, ABSTRACT = {In the last years the Technical Writer operational scenarios and the workflow sensibly changed; specifically,"free style" writing - or manual writing - has become outdated and technical writing is now much more concerned with structured management of content than in the past. Technical writing has become more demanding due to a number of factors among which the rise and spread of mobile devices usage. This paper discusses the new needs of technical writing and content management business and how LRT can help it improve quality and productivity.}, KEYWORDS = {controlled language, technical writing, content management systems}, PAGES = {19-25}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may 2014}, EDITOR = {Isahara, H. and Lee, K. C. S. and Nam, S.}, } @INPROCEEDINGS{BARTOLINI_2014_INPROCEEDINGS_BQDRM_286944, AUTHOR = {Bartolini, R. and Quochi, V. and De Felice, I. and Russo, I. and Monachini, M.}, TITLE = {From Synsets to Videos: Enriching ItalWordNet Multimodally}, YEAR = {2014}, ABSTRACT = {The paper describes the multimodal enrichment of ItalWordNet action verbs' entries by means of an automatic mapping with a conceptual ontology of action types instantiated by video scenes (ImagAct). The two resources present significative differences as well as interesting complementary features, such that a mapping of these two resources can lead to a an enrichment of IWN, through the connection between synsets and videos apt to illustrate the meaning described by glosses. Here, we describe an approach inspired by ontology matching methods for the automatic mapping of ImagAct video scenes onto ItalWordNet. The experiments described in the paper are conducted on Italian, but the same methodology can be extended to other languages for which WordNets have been created, since ImagAct is available also for English, Chinese and Spanish. This source of multimodal information can be exploited to design second language learning tools, as well as for language grounding in action recognition in video sources and potentially for robotics.}, KEYWORDS = {Action ontology, Multimodality, WordNet}, PAGES = {3110-3117}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {LREC 2014. European Language Resources Association ELRA: Paris (Francia)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{BIZZONI_2014_INPROCEEDINGS_BBDDMC_286958, AUTHOR = {Bizzoni, Y. and Boschetti, F. and Diakoff, H. and Del Gratta, R. and Monachini, M. and Crane, G.}, TITLE = {The Making of Ancient Greek WordNet}, YEAR = {2014}, ABSTRACT = {This paper describes the process of creation and review of a new lexico-semantic resource for the classical studies: AncientGreekWord- Net. The candidate sets of synonyms (synsets) are extracted from Greek-English dictionaries, on the assumption that Greek words translated by the same English word or phrase have a high probability of being synonyms or at least semantically closely related. The process of validation and the web interface developed to edit and query the resource are described in detail. The lexical coverage of Ancient Greek WordNet is illustrated and the accuracy is evaluated. Finally, scenarios for exploiting the resource are discussed.}, KEYWORDS = {Ancient Greek, Multilingualism, Classical Philology}, PAGES = {1140-1147}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, CONFERENCE_NAME = {LREC 2014. European Language Resources Association ELRA: Paris (Francia)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{DEFELICE_2014_INPROCEEDINGS_DBRQM_291282, AUTHOR = {De Felice, I. and Bartolini, R. and Russo, I. and Quochi, V. and Monachini, M.}, TITLE = {Evaluating ImagAct-WordNet mapping for English and Italian through videos}, YEAR = {2014}, ABSTRACT = {In this paper we present the results of the evaluation of an automatic mapping between two lexical resources, WordNet/ItalWordNet and ImagAct, a conceptual ontology of action types instantiated by video scenes. Results are compared with those obtained from a previous experiment performed only on Italian data. Differences between the two evaluation strategies, as well as between the quality of the mappings for the two languages considered in this paper, are iscussed.}, KEYWORDS = {Language Resources (LRs)}, PAGES = {128-131}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-CLICit-2014.pdf}, DOI = {10.12871/CLICIT2014126}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-88-67-41472-7}, CONFERENCE_NAME = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 \& the Fourth International Workshop EVALITA 2014. Pisa University Press srl: Pisa (Italia)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 December 2014, Pisa}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{FRONTINI_2014_INPROCEEDINGS_FQM_291452, AUTHOR = {Frontini, F. and Quochi, V. and Monachini, M.}, TITLE = {Polysemy alternations extraction using the PAROLE SIMPLE CLIPS Italian lexicon}, YEAR = {2014}, ABSTRACT = {This paper presents the results of an experiment of polysemy alternations induction from a lexicon (Utt and Pad´o, 2011; Frontini et al., 2014), discussing the results and proposing an amendment in the original algorithm.}, KEYWORDS = {Language Resources and Technologies}, PAGES = {175-179}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-CLICit-2014.pdf}, DOI = {10.12871/CLICIT2014134}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-88-67-41472-7}, CONFERENCE_NAME = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 \& the Fourth International Workshop EVALITA 2014}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 December 2014, Pisa}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{FRONTINI_2014_INPROCEEDINGS_FQPUM_286984, AUTHOR = {Frontini, F. and Quochi, V. and Padó, S. and Utt, J. and Monachini, M.}, TITLE = {Polysemy Index for Nouns: an Experiment on Italian using the PAROLE SIMPLE CLIPS Lexical Database}, YEAR = {2014}, ABSTRACT = {An experiment is presented to induce a set of polysemous basic type alternations (such as ANIMAL-FOOD, or BUILDING-INSTITUTION) by deriving them from the sense alternations found in an existing lexical resource. The paper builds on previous work and applies those results to the Italian lexicon PAROLE SIMPLE CLIPS. The new results show how the set of frequent type alternations that can be induced from the lexicon is partly different from the set of polysemy relations selected and explicitly applied by lexicographers when building it. The analysis of mismatches shows that frequent type alternations do not always correspond to prototypical polysemy relations, nevertheless the proposed methodology represents a useful tool offered to lexicographers to systematically check for possible gaps in their resource.}, KEYWORDS = {Polysemy, lexical resources, semantics}, PAGES = {2955-2963}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, BOOKTITLE = {LREC 2014 Ninth International Conference on Language Resources and Evaluation Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{MONEGLIA_2014_INPROCEEDINGS_MBFGKMP_286990, AUTHOR = {Moneglia, M. and Brown, S. and Frontini, F. and Gagliardi, G. and Khan, F. and Monachini, M. and Panunzi, A.}, TITLE = {The IMAGACT Visual Ontology. an Extendable Multilingual Infrastructure for the Representation of Lexical Encoding of Action}, YEAR = {2014}, ABSTRACT = {Action verbs have many meanings, covering actions in different ontological types. Moreover, each language categorizes action in its own way. One verb can refer to many different actions and one action can be identified by more than one verb. The range of variations within and across languages is largely unknown, causing trouble for natural language processing tasks. IMAGACT is a corpus-based ontology of action concepts, derived from English and Italian spontaneous speech corpora, which makes use of the universal language of images to identify the different action types extended by verbs referring to action in English, Italian, Chinese and Spanish. This paper presents the infrastructure and the various linguistic information the user can derive from it. IMAGACT makes explicit the variation of meaning of action verbs within one language and allows comparisons of verb variations within and across languages. Because the action concepts are represented with videos, extension into new languages beyond those presently implemented in IMAGACT is done using competence-based judgments by mother-tongue informants without intense lexicographic work involving underdetermined semantic description}, KEYWORDS = {Lexicon, Lexical Database, Ontologies}, PAGES = {3425-3432}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{PALLOTTI_2014_INPROCEEDINGS_PFAMF_287029, AUTHOR = {Pallotti, G. and Frontini, F. and Affè, F. and Monachini, M. and Ferrari, S.}, TITLE = {Presenting a System of Human-Machine Interaction for Performing Map Tasks}, YEAR = {2014}, ABSTRACT = {A system for human machine interaction is presented, that offers second language learners of Italian the possibility of assessing their competence by performing a map task, namely by guiding the a virtual follower through a map with written instructions in natural language. The underlying natural language processing algorithm is described, and the map authoring infrastructure is presented.}, KEYWORDS = {Language learning, human machine interaction, map tasks}, PAGES = {3963-3966}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {2}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{PANUNZI_2014_INPROCEEDINGS_PDGJMMQR_285381, AUTHOR = {Panunzi, A. and De Felice, I. and Gregori, L. and Jacoviello, S. and Monachini, M. and Moneglia, M. and Quochi, V. and Russo, I.}, TITLE = {Translating action verbs using a dictionary of images: the IMAGACT ontology}, YEAR = {2014}, ABSTRACT = {Action verbs have many meanings, covering actions in different ontological types. Moreover, each language categorizes action in its own way. One verb can refer to many different actions and one action can be identified by more than one verb. The range of variations within and across languages is largely unknown, causing trouble in all translation tasks. IMAGACT is a corpus-based ontology of action concepts, derived from English and Italian spontaneous speech corpora, which makes use of the universal language of images to identify the different action types extended by verbs referring to action in English, Italian, Chinese and Spanish. This paper presents the IMAGACT search interface and the various kinds of linguistic information the user can derive from it. IMAGACT makes explicit the variation of meaning of action verbs within one language and allows comparisons of verb variations within and across languages. Because the action concepts are represented with videos, extension into new languages beyond those presently implemented in IMAGACT is done using competence-based judgments by mother-tongue informants, without intense lexicographic work involving underdetermined semantic descriptions.}, KEYWORDS = {Action verbs, Image ontology, Multilingual dictionary, Computer-aided translation}, PAGES = {1163-1170}, URL = {http://euralex2014.eurac.edu/en/callforpapers/Documents/EURALEX%202014_gesamt.pdf}, DOI = {10.13140/2.1.3719.2320}, PUBLISHER = {EURAC (Bolzano, ITA)}, ISBN = {978-88-88906-97-3}, CONFERENCE_NAME = {XVI EURALEX International Congress: The User in Focus}, CONFERENCE_PLACE = {Bolzano}, CONFERENCE_DATE = {15-19/07/2014}, BOOKTITLE = {Proceedings of the XVI EURALEX International Congress: The User in Focus}, EDITOR = {Abel, A. and Vettori, C. and Ralli, N.}, } @INPROCEEDINGS{RHEM_2014_INPROCEEDINGS_RUCM_287035, AUTHOR = {Rhem, G. and Uzkoreit, H. and Calzolari, N. and Monachini, M.}, TITLE = {The Strategic Impact of META-NET on the Regional, National and International Level}, YEAR = {2014}, ABSTRACT = {This article provides an overview of the dissemination work carried out in META-NET from 2010 until early 2014; we describe its impact on the regional, national and international level, mainly with regard to politics and the situation of funding for LT topics. This paper documents the initiative's work throughout Europe in order to boost progress and innovation in our field.}, KEYWORDS = {LR National/International Projects, Infrastructural/Policy Issues, Multilinguality, Machine Translation}, PAGES = {1517-1524}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{GOGGI_2014_INPROCEEDINGS_GMFBPDBM_291816, AUTHOR = {Goggi, S. and Monachini, M. and Frontini, F. and Bartolini, R. and Pardelli, G. and De Mattei, M. and Bustaffa, F. and Manzella, G.}, TITLE = {Marine Planning and Service Platform (MAPS): An Advanced Research Engine for Grey Literature in Marine Science}, YEAR = {2014}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting Operative Oceanography in its activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. Community and Requirements. Operative Oceanography is the branch of marine research which deals with the development of integrated systems for examining and modeling the ocean monitoring and forecast. Experts need access to real-time data on the state of the sea such as forecasts on temperatures, streams, tides and the relevant scientific literature. This finds application in many areas, ranging from civilian and military safety to protection of off-shore and coastal infrastructures. The metadata. The set of metadata associated with marine data is defined in the CDI (Common Data Index) documented standard. They encode: the types of sizes which have been measured; the measurement tools the platform which has been employed; the geographic area where measures have been taken; the environmental matrix; the descriptive documentation. As concerns the scientific documentation, at the current stage of the CDI standard, a document is shaped around the following metadata: Title, Authors, Version, ISBN/DOI, Topic, Date of publication, Body/Institution, Abstract. The search engine. The query system (which is actually under development) has been designed for operating with structured data - the metadata - and raw data - the associated technical and scientific documentation. Full-text technologies are often unsuccessful when applied to this type of queries since they assume the presence of specific keywords in the text; in order to fix this problem, the MAPS project suggests to use different emantic technologies for retrieving the text and data and thus getting much more complying results. In the Poster we will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced earch engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the 2 great impact that the processing, re-use as well as application of grey data have on societal needs/problems and their answers.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {93-94}, URL = {http://greyguide.isti.cnr.it/dfdownloadnew.php?ident=GLConference/GL16/2014-G01-015\&langver=en\&scelta=Metadata}, ISBN = {978-90-77484-24-1}, CONFERENCE_NAME = {Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change}, CONFERENCE_PLACE = {Library of Congress Washington D. C., USA}, CONFERENCE_DATE = {December 8-9, 2014}, EDITOR = {Farace, C. B. D. and Frantzen, J.}, } @INPROCEEDINGS{KHAN_2014_INPROCEEDINGS_KFM_291637, AUTHOR = {Khan, F. and Frontini, F. and Monachini, M.}, TITLE = {A Model for Representing Diachronic Semantic Information in Lexico-Semantic Resources on the Semantic Web}, YEAR = {2014}, ABSTRACT = {The Semantic Web offers a way of publishing structured data online that facilitates the interlinking of different datasets stored at different online locations? indeed one of the main aims of the Semantic Web movement is to actively encourage this enrichment of online datasets with information from other resources, in order to avoid the problem of so called 'data islands'. In contrast to conventional hyperlinks however the links between different resources on the Semantic Web can be given semantic types and classified hierarchically. Data published on the Semantic Web is referred to as Linked Data? if, in addition, this data is available with an open license then it can be referred to as Linked Open Data (Heath 2011).}, KEYWORDS = {Cultural resources, Heritage resources}, PAGES = {1-3}, URL = {http://www.dh.uni-leipzig.de/wo/wp-content/uploads/2014/11/Fahad-Khan-Francesca-Frontini-and-Monica-Monachini-A-Model-for-Representing.pdf}, CONFERENCE_NAME = {Greek and Latin in an age of Open Data. Open Philology Project}, CONFERENCE_PLACE = {University of Leipzig, GERMANY}, CONFERENCE_DATE = {December 1-4, 2014}, } @TECHREPORT{DEMATTEI_2014_TECHREPORT_DMDMBF_335399, AUTHOR = {De Mattei, M. and Medone, D. and D'Angelo, P. and Monachini, M. and Bartolini, R. and Frontini, F.}, TITLE = {MAPS: Architettura del Sistema}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il presente documento è il deliverable "D3.1 - Architettura del Sistema" del progetto MAPS (Marine Planning and Service Platform). Il progetto MAPS è un'evoluzione del progetto precedente Marine. Tale evoluzione si articola su tre aspetti diversi: - Un meccanismo di federazione dei dati, che consenta di rendere disponibili ai propri utenti non soltanto i dati prodotti internamente da sistema Marine ma anche quelli resi disponibili da altri sistemi similari, soddisfacendo così un più ampio ambito di esigenze informative. Il deliverable D2.2, Modello della Soluzione specifica in dettaglio queste nuove funzionalità. - Un Catalogo dei Documenti che, conservando la documentazione tecnica e scientifica dei prodotti offerti, possa documentare in modo accurato le modalità di misurazione, elaborazione e controllo dei prodotti forniti e quindi i relativi ambiti di applicabilità. - Un sistema di ricerca capace di selezionare i dati necessari ad uno scopo determinato non soltanto sulla base della loro tipologia, della loro dislocazione territoriale o di altre informazioni simili contenute nei metadati associati come avviene oggi nella maggior parte dei sistemi esistenti, ma anche sulla base delle informazioni contenute nella documentazione tecnica e scientifica. Tali funzionalità sono specificate nel deliverable D1.3 - Modello della Soluzione.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {1-35}, URL = {https://publications.cnr.it/doc/335399}, } @TECHREPORT{DEMATTEI_2014_TECHREPORT_DMMFBM_335403, AUTHOR = {De Mattei, M. and Medone, D. and Maltese, M. and Frontini, F. and Bartolini, R. and Monachini, M.}, TITLE = {META: Report di progettazione degli algoritmi individuati}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il deliverable definisce l'architettura del Sistema di Estrazione Eventi Meteo realizzato dagli autori nell'ambito del progetto META. Il sistema estrae da contenuti online informazione su eventi meteo critici verificatesi in Liguria e nel nord della Toscana.}, KEYWORDS = {Ontology, Information Extraction, Taxonomy}, PAGES = {1-19}, URL = {https://publications.cnr.it/doc/335403}, } @TECHREPORT{FRONTINI_2014_TECHREPORT_FBM_335400, AUTHOR = {Frontini, F. and Bartolini, R. and Monachini, M.}, TITLE = {MAPS: Stato dell'Arte}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012 Il documento descrive lo stato dell'arte delle tecnologie linguistiche applicate ai sistemi di ricerca semantica.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {1-21}, URL = {https://publications.cnr.it/doc/335400}, } @TECHREPORT{FRONTINI_2014_TECHREPORT_FBM_335402, AUTHOR = {Frontini, F. and Bartolini, R. and Monachini, M.}, TITLE = {META:-Report sui modelli e tecniche linguistiche}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il deliverable riassume lo stato dell'arte delle tecnologie semantiche che possono essere impiegate nella realizzazione del progetto META. Il progetto META è una progetto di ricerca e sviluppo tecnologico finanziato dalla Regione Liguria con i fondi POR-FESR 2007-2013 della Comunità Europea che mira alla realizzazione di un sistema per l'allerta di eventi meteo critici in Liguria e nel nord della Toscana. Nell'ambito del progetto META le tecnologie semantiche sono utilizzate per estrarre eventi meteo di interesse da articoli pubblicati in rete o sui social network.}, KEYWORDS = {Ontology, Information Extraction, Semantic Web, Search Engine}, PAGES = {1-20}, URL = {https://publications.cnr.it/doc/335402}, } @TECHREPORT{FRONTINI_2014_TECHREPORT_FBMPG_287039, AUTHOR = {Frontini, F. and Bartolini, R. and Monachini, M. and Pardelli, G. and Goggi, S.}, TITLE = {Stato dell'arte dei motori semantici. Progetto MAPS, programma operativo regionale POR-FESR (2007-2013)}, YEAR = {2014}, ABSTRACT = {Il presente documento è il deliverable "D1.1 - Stato dell'Arte dei motori semantici del progetto MAPS (Marine Planning and Service Platform). Il progetto MAPS è una evoluzione del progetto precedente Marine. Tramite il progetto Marine (Bando Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013 - pos n.1) è stata realizzata una piattaforma informatica di supporto all'Oceanografia Operativa capace di raccogliere dati marini per renderli poi disponibili ai ricercatori e alle organizzazioni interessate tramite protocolli standard. Lo scopo del progetto MAPS è quello di realizzare una Catalogo di Documenti contenente informazioni per la piattaforma Marine. Caratteristica di MAPS è di fornire accesso ai dati oceanografici sia attraverso la ricerca per metadati, sia attraverso la ricerca semantica contenuta nella manualistica tecnico scientifica di riferimento.}, PAGES = {1-22}, URL = {https://publications.cnr.it/doc/287039}, } @INCOLLECTION{CALZOLARI_2013_INCOLLECTION_CBLM_231482, AUTHOR = {Calzolari, N. and Bertagna, F. and Lenci, A. and Monachini, M.}, TITLE = {Boosting Lexical Resources for the Semantic Web: Generative Lexicon and Lexicon Interoperability}, YEAR = {2013}, ABSTRACT = {To make the vision of a European Information Infrastructure and of the Semantic Web a reality, two key issues are tackled: (i) content, which must be dealt with in a multilingual environment; (ii) standards, which are critical to achieve interoperability and integration. In the Semantic Web scenario, ontologies are the key components to manage knowledge, whereas, in Human Language Technology, semantic description is committed to computational lexicons, which have to squarely address the complexity of natural language. Answers to the above issues are found within two frameworks: first, in the framework of Generative Lexicon (GL) theory and GL-based lexicons that account for the complex, multidimensional and multifaceted nature of meaning in lexicon and ontology design; second, in the context of the ISLE enterprise which, with the MILE, represents an essential interface between advanced research in the field of multilingual lexical semantics and the practical task of developing resources for HLT.}, KEYWORDS = {Generative Lexicon, Computational Lexicons, Standards, Semantic Web, Human Language Technology}, PAGES = {415-431}, URL = {http://download-v2.springer.com/static/pdf/679/chp%253A10.1007%252F978-94-007-5189-7_18.pdf?token2=exp=1430751723~acl=%2Fstatic%2Fpdf%2F679%2Fchp%25253A10.1007%25252F978-94-007-5189-7_18.pdf*~hmac=2ebe0f4a7ba5903ef47dbc16674a886bcbb26b9d6e4e9c9f209e35787522e5f1}, VOLUME = {46}, DOI = {10.1007/978-94-007-5189-7_18}, PUBLISHER = {Springer (Dordrecht, NLD)}, ISBN = {9789400751880}, BOOKTITLE = {Text, Speech and Language Technology}, EDITOR = {Pustejovsky, J. and Bouillon, P. and Isahara, H. and Kanzaki, K. and Lee, C.}, } @INCOLLECTION{CALZOLARI_2013_INCOLLECTION_CMS_280537, AUTHOR = {Calzolari, N. and Monachini, M. and Soria, C.}, TITLE = {LMF-Historical Context and Perspectives}, YEAR = {2013}, ABSTRACT = {The importance of designing standards for language resources (LR) is firmly established, starting with the Expert Advisory Group for Language Engineering (EAGLES) and International Standards for Language Engineering (ISLE) initiatives. Both EAGLES and ISLE stress the importance of reaching a consensus on (linguistic and nonlinguistic) "content", in addition to agreement on formats and encoding issues, and also address the needs of content processing and Semantic Web technologies. The recommendations for standards and best practices issued within the projects became, through the INTERA and mainly the LIRICS project, the International Organization for Standardization (ISO) within the ISO TC37/SC4 committee, where Lexical Markup Framework (LMF) was developed. Standards are fundamental to exchange, preserve, maintain and integrate data and LRs, to achieve interoperability in general, and they are an essential basis of any LR infrastructure.}, KEYWORDS = {EAGLES, international standards for language engineering, interoperability, lexical markup framework (LMF)}, PAGES = {1-18}, URL = {http://dx.doi.org/10.1002/9781118712696.ch1}, DOI = {10.1002/9781118712696.ch1}, PUBLISHER = {John Wiley \& Sons, Inc (Hoboken, USA)}, ISBN = {978-1-118-71259-7}, BOOKTITLE = {LMF Lexical Markup Framework}, EDITOR = {Gil, F. and Patrick, P.}, } @INCOLLECTION{HAYASHI_2013_INCOLLECTION_HMSSC_285427, AUTHOR = {Hayashi, Y. and Monachini, M. and Savas, B. and Soria, C. and Calzolari, N.}, TITLE = {LMF as a Foundation for Servicized Lexical Resources}, YEAR = {2013}, ABSTRACT = {This chapter argues that the lexical markup framework (LMF) can play a significant role in realizing servicized lexical resources on the Web. To accomplish this goal, it begins with a brief introduction of the notion of servicized resources, and then presents a technical architecture of, what is called, LMF-aware lexicon access services. It presents two implementation showcases to demonstrate the applicability of the LMF and to discuss its possible extensions. The first example deals with WordNet-type computational semantic lexicons, while the other takes up a machine-readable bilingual dictionary primarily compiled for human usage. To conclude the chapter, the final sections summarize the results while reviewing related work.}, KEYWORDS = {lexical markup framework (LMF), LMF-aware lexicon access services, servicized lexical resources}, PAGES = {201-213}, URL = {http://onlinelibrary.wiley.com/doi/10.1002/9781118712696.ch14/references}, DOI = {10.1002/9781118712696.ch14}, PUBLISHER = {Wiley-ISTE (Hoboken, USA)}, ISBN = {9781118712696}, BOOKTITLE = {LMF-Lexical Markup Framework}, EDITOR = {Francopoulo, G.}, } @INCOLLECTION{VOSSEN_2013_INCOLLECTION_VSM_285402, AUTHOR = {Vossen, P. and Soria, C. and Monachini, M.}, TITLE = {Wordnet-LMF: A Standard Representation for Multilingual Wordnets}, YEAR = {2013}, ABSTRACT = {Wordnet-lexical markup framework (LMF) is an instantiation of LMF for representing Wordnet-like semantic dictionaries. Wordnet is a widely accepted resource and thus provides a good case for testing the viability of a representation in LMF and the acceptance by a wide range of users. Wordnet-LMF was developed in the framework of the EU project KYOTO for the specific purpose of endowing a set of wordnets with a standardized interoperability format allowing the interchange of semantic information. This chapter explains the choices that were made to model the wordnet information in LMF. It provides a preliminary assessment of LMF, by large-scale application to real lexical resources, endowing wordnet with a format representation that allows easier integration among resources sharing the same structure and, more importantly, across resources with different theoretical and implementation approaches.}, KEYWORDS = {KYOTO project, multilingual wordnets, Wordnet-lexical markup framework}, PAGES = {51-66}, URL = {http://dx.doi.org/10.1002/9781118712696.ch4}, DOI = {10.1002/9781118712696.ch4}, PUBLISHER = {Wiley-ISTE (Hoboken, USA)}, ISBN = {9781118712696}, BOOKTITLE = {LMF-Lexical Markup Framework}, EDITOR = {Francopoulo, G.}, } @EDITORIAL{SAUR_2013_EDITORIAL_SCHLMP_288143, AUTHOR = {Saurí, R. and Calzolari, N. and Huang, C. R. and Lenci, A. and Monachini, M. and Pustejovsky, J.}, TITLE = {Proceedings of the 6th International Conference on Generative Approaches to the Lexicon Generative Lexicon and Distributional Semantics}, YEAR = {2013}, ABSTRACT = {The papers in this volume represent some of the most recent and exciting work being carried out both within the framework of Generative Lexicon and related approaches to the lexicon and lexical resources. With the recent emphasis in natural language processing on the development of machine learning algorithms, it has become even more important for computational linguists to work on the development of linguistically informed lexical resources, for use in the annotation of corpora and creation of gold standard data for training, as well as the collation of larger theoretical datasets for investigating linguistic phenomena in greater detail and sophistication. These works contribute to this trend as well as to the further development of the mechanisms within GL for describing and explaining semantic and lexical phenomena in language}, KEYWORDS = {Generative Lexicon, Language Resources}, PAGES = {i-126}, URL = {https://aclweb.org/anthology/W/W13/W13-5400.pdf}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-937284-98-5}, } @INPROCEEDINGS{FRONTINI_2013_INPROCEEDINGS_FDM_287280, AUTHOR = {Frontini, F. and Del Gratta, R. and Monachini, M.}, TITLE = {Linking the Geonames ontology to WordNet}, YEAR = {2013}, ABSTRACT = {This paper illustrates the transformation of the GeoNames ontology concepts, with their English labels and glosses, into a GeoDomain WordNet-like resource in English, its translation into Italian, and its linking to the existing generic WordNets of both languages.}, KEYWORDS = {GeoNames, WordNet, lemon}, PAGES = {263-267}, URL = {http://hnk.ffzg.hr/bibl/ltc2013/book/papers/OWN-2.pdf}, PUBLISHER = {Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {6th Language \& Technology Conference: Human Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan, Poland}, CONFERENCE_DATE = {December 7-9, 2013}, BOOKTITLE = {Human Language Technologies as a Challenge for Computer Science and Linguistics. Proceedings, 6th Language \& Technology Conference, December 7-9, 2013, Poznañ, Poland}, EDITOR = {Vetulani, Z. and Uszkoreit, H.}, } @INPROCEEDINGS{MARCHETTI_2013_INPROCEEDINGS_MTALDFM_287331, AUTHOR = {Marchetti, A. and Tesconi, M. and Abbate, S. and Lo Duca, A. and D'Errico, A. and Frontini, F. and Monachini, M.}, TITLE = {Tour-pedia: a web application for the analysis and visualization of opinions for tourism domain}, YEAR = {2013}, ABSTRACT = {We present Tour-pedia an interactive web application that extracts opinions from reviews of accommodations from different sources available on-line. Polarity markers display on a map the different opinions. This tool is intended to help business operators to manage reputation on-line.}, KEYWORDS = {Visualization tools, opinion mining, NLP on social media, tourism reviews}, PAGES = {594-595}, URL = {http://www.iit.cnr.it/sites/default/files/ltc2013_opener_demo.pdf}, PUBLISHER = {Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)}, ISBN = {978-83-932640-4-9}, CONFERENCE_NAME = {6th Language \& Technology Conference: Human Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan, Poland}, CONFERENCE_DATE = {December 7-9, 2013}, EDITOR = {Vetulani, Z. and Uszkoreit, H.}, } @INPROCEEDINGS{MONEGLIA_2013_INPROCEEDINGS_MPGMRDKF_287346, AUTHOR = {Moneglia, M. and Panunzi, A. and Gagliardi, G. and Monachini, M. and Russo, I. and De Felice, I. and Khan, F. and Frontini, F.}, TITLE = {IMAGACT E-learning Platform for Basic Action Types. In: Pixel (ed.), Proceedings of the 6th International Conference ICT for Language Learning}, YEAR = {2013}, ABSTRACT = {Action verbs express important information in a sentence and they are the most frequent elements in speech, but they are also one of the most difficult part of the lexicon to learn for L2 language learners, because languages segment these concepts in very different ways. The two sentences "Mary folds her shirt" and "Mary folds her arms" refer to two completely different types of action, as becomes evident when they are translated into another language (e.g., in Italian they would be translated as "Maria piega la camicia" and "Maria incrocia le braccia" respectively). IMAGACT e-learning platform aims to make these differences evident by creating a cross-linguistic ontology of action types, whose nodes consist of 3D scenes, each of which relates to one action type. In order to identify these types, contexts of use have been extracted from English and Italian spontaneous speech corpora for around 600 high frequency action verbs (for each language). All instances that refer to similar events (e.g., fold the shirt/ the blanket) are grouped under one single action type: each one of these types is then represented by a linguistic best example and a short video that represents simple actions (e.g. a man taking a glass from a table).The action types extracted for Italian and English are compared and merged into one cross-linguistic ontology of action. IMAGACT has provided an internet based annotation infrastructure to derive this information from corpora. The project is now completed for the Italian and English lexicon, data extraction for Chinese and Spanish is ongoing. Reference to prototypical imagery is crucial in order to bootstrap the learning process. By selecting the set of 3D scenes referred to by a verb in one language and viewing the type of activity represented therein learners can directly understand the range of applicability of each verb. Thanks to an easy interface, a user can access the English/Italian/Chinese lexicon by lemma or directly by 3D scenes. For example, searching for the verb "to turn",s/he will be presented with a number of scenes, showing the various action types associated to that verb.Clicking on a scene s/he or she will know how this type of action is referred to in other the languages}, KEYWORDS = {Ontology}, PAGES = {85-89}, URL = {https://publications.cnr.it/doc/287346}, PUBLISHER = {libreriauniversitaria. it (Limena, ITA)}, ISBN = {978-88-6292-423-8}, CONFERENCE_NAME = {International Conference "ICT for Language Learning", 6th edition}, CONFERENCE_PLACE = {Florence, Italy}, CONFERENCE_DATE = {14-15 november 2013}, BOOKTITLE = {Conference Proceedings. ICT for Language Learning}, EDITOR = {Pixel}, } @INPROCEEDINGS{RUSSO_2013_INPROCEEDINGS_RDFKM_285373, AUTHOR = {Russo, I. and De Felice, I. and Frontini, F. and Khan, F. and Monachini, M.}, TITLE = {(Fore)seeing actions in objects. Acquiring distinctive affordances from language}, YEAR = {2013}, ABSTRACT = {In this paper we investigate if conceptual information concerning objects' affordances as possibilities for actions anchored to an object can be at least partially acquired through language. Considering verb-noun pairs as the linguistic realizations of relations between actions performed by an agent and objects we collect this information from the ImagAct dataset, a linguistic resource obtained from manual annotation of basic action verbs, and from a web corpus(itTenTen). The notion of affordance verb as the most distinctive verb in ImagAct enables a comparison with distributional data that reveal how lemmas ranking based on a semantic association measure that mirror that of affordances as the most distinctive actions an object can be involved in.}, PAGES = {151-161}, URL = {https://docs.google.com/viewer?a=v\&pid=sites\&srcid=ZGVmYXVsdGRvbWFpbnxubHBjczIwMTN8Z3g6MTI0ZGMzYWYwYmMxNjY1Mg}, CONFERENCE_NAME = {NLPCS 2013-10th International Workshop on Natural Language Processing and Cognitive Science}, CONFERENCE_PLACE = {Marseille}, CONFERENCE_DATE = {15-17/10/2013}, BOOKTITLE = {Proceedings of NLPCS 2013-10th International Workshop on Natural Language Processing and Cognitive Science}, EDITOR = {Sharp, B. and Zock, M.}, } @INPROCEEDINGS{RUSSO_2013_INPROCEEDINGS_RFDKM_287456, AUTHOR = {Russo, I. and Frontini, F. and De Felice, I. and Khan, F. and Monachini, M.}, TITLE = {Disambiguation of Basic Action Types through Nouns' Telic Qualia}, YEAR = {2013}, ABSTRACT = {Knowledge about semantic associations between words is effective to disambiguate word senses. The aim of this paper is to investigate the role and the relevance of telic information from SIMPLE in the disambiguation of basic action types of Italian HOLD verbs ( prendere, 'to take', raccogliere, 'to pick up', pigliare 'to grab' etc.). We propose an experiment to compare the results obtained with telic information from SIMPLE with basic co-occurrence information extracted from corpora (most salient verbs modifying nouns) classified in terms of general semantic classes to avoid data sparseness.}, PAGES = {70-75}, URL = {http://www.aclweb.org/anthology/W13-5410}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-937284-98-5}, CONFERENCE_NAME = {6th International Conference on Generative Approaches to the Lexicon Generative Lexicon and Distributional Semantics}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {24-25/09/2013}, BOOKTITLE = {Proceedings of the 6th International Conference on Generative Approaches to the Lexicon. Generative Lexicon and Distributional Semantics}, EDITOR = {Saurí, R. and Calzolari, N. and Huang, C. and Lenci, A. and Monachini, M. and Pustejovsky, J.}, } @ARTICLE{HAYASHI_2012_ARTICLE_HSMSC_218777, AUTHOR = {Hayashi, Y. and Savas, B. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LMF-aware Web services for accessing semantic lexicons}, YEAR = {2012}, ABSTRACT = {This paper demonstrates that Wordnet-LMF, a version of ISO LMF, allows us to effectively design and implement Web services for accessing WordNettype semantic lexicons that conform to the REST Web service architecture. The implemented prototype service currently provides access to native wordnets as well as to a bilingual concept dictionary. This paper thus describes slight revisions that were made to the Wordnet-LMF specifications to model and accommodate a nonwordnet-native bilingual concept dictionary.}, KEYWORDS = {Lexical markup framework Semantic lexicons Wordnets Language services RESTful Web service design}, PAGES = {253-264}, URL = {http://link.springer.com/content/pdf/10.1007%2Fs10579-012-9181-4.pdf}, VOLUME = {46}, DOI = {10.1007/s10579-012-9181-4}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{TORAL_2012_ARTICLE_TFMM_218786, AUTHOR = {Toral, A. and Ferrández, S. and Monachini, M. and Munoz, R.}, TITLE = {Web 2. 0, Language Resources and standards to automatically build a multilingual Named Entity Lexicon}, YEAR = {2012}, ABSTRACT = {This paper proposes to advance in the current state-of-the-art of automatic Language Resource (LR) building by taking into consideration three elements: (1) the knowledge available in existing LRs, (2) the vast amount of information available from the collaborative paradigm that has emerged from the Web 2.0 and (3) the use of standards to improve interoperability. We present a case study in which a set of LRs for different languages (WordNet for English and Spanish and Parole-Simple-Clips for Italian) are extended with Named Entities (NE) by exploiting Wikipedia and the aforementioned LRs. The practical result is a multilingual NE lexicon connected to these LRs and to two ontologies: SUMO and SIMPLE. Furthermore, the paper addresses an important problem which affects the Computational Linguistics area in the present, interoperability, by making use of the ISO LMF standard to encode this lexicon. The different steps of the procedure (mapping, disambiguation, extraction, NE identification and postprocessing) are comprehensively explained and evaluated. The resulting resource contains 974,567, 137,583 and 125,806 NEs for English, Spanish and Italian respectively. Finally, in order to check the usefulness of the constructed resource, we apply it into a stateof-the-art Question Answering system and evaluate its impact; the NE lexicon improves the system's accuracy by 28.1%. Compared to previous approaches to build NE repositories, the current proposal represents a step forward in terms of automation, language independence, amount of NEs acquired and richness of the information represented.}, KEYWORDS = {Language Resources Named Entities Web 2. 0 Standards}, PAGES = {383-419}, URL = {http://link.springer.com/content/pdf/10.1007%2Fs10579-011-9148-x.pdf}, VOLUME = {46}, DOI = {10.1007/s10579-011-9148-x}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @INPROCEEDINGS{DELGRATTA_2012_INPROCEEDINGS_DFMQRAL_223098, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Quochi, V. and Rubino, F. and Abrate, M. and Lo Duca, A.}, TITLE = {L-LEME: an Automatic Lexical Merger based on the LMF Standard}, YEAR = {2012}, ABSTRACT = {The present paper describes LMF LExical MErger (L-LEME), an architecture to combine two lexicons in order to obtain new resource(s). L-LEME relies on standards, thus exploiting the benefits of the ISO Lexical Markup Framework (LMF) to ensure interoperability. L-LEME is meant to be dynamic and heavily adaptable: it allows the users to configure it to meet their specific needs. The L-LEME architecture is composed of two main modules: the Mapper, which takes in input two lexicons A and B and a set of user-defined rules and instructions to guide the mapping process (Directives D) and gives in output all matching entries. The algorithm also calculates a cosine similarity score. The Builder takes in input the previous results, a set of Directives D1 and produces a new LMF lexicon C. The Directives allow the user to define its own building rules and different merging scenarios. L-LEME is applied to a specific concrete task within the PANACEA project, namely the merging of two Italian SubCategorization Frame (SCF) lexicons. The experiment is interesting in that A and B have different philosophies behind, being A built by human introspection and B automatically extracted. Ultimately, L-LEME has interesting repercussions in many language technology applications}, KEYWORDS = {LMF, Lexicon mapping, similarity score}, PAGES = {31-40}, URL = {https://publications.cnr.it/doc/223098}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC) 2012}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {2012}, BOOKTITLE = {Proceedings of the LREC 2012 Workshop on Language Resource Merging}, EDITOR = {Bel, N. and Gavrilidou, M. and Monachini, M. and Quochi, V. and Rimell, L.}, } @INPROCEEDINGS{GAVRILIDOU_2012_INPROCEEDINGS_GLDPPMFDFAM_219704, AUTHOR = {Gavrilidou, M. and Labropoulou, P. and Desipri, E. and Piperidis, S. and Papageorgiou, H. and Monachini, M. and Frontini, F. and Declerck, T. and Francopoulo, G. and Arranz, V. and Mapelli, V.}, TITLE = {The META-SHARE Metadata Schema for the Description of Language Resources}, YEAR = {2012}, ABSTRACT = {This paper presents a metadata model for the description of language resources proposed in the framework of the META-SHARE infrastructure, aiming to cover both datasets and tools/technologies used for their processing. It places the model in the overall framework of metadata models, describes the basic principles and features of the model, elaborates on the distinction between minimal and maximal versions thereof, briefly presents the integrated environment supporting the LRs description and search and retrieval processes and concludes with work to be done in the future for the improvement of the model.}, KEYWORDS = {metadata, META-SHARE, LRs description}, PAGES = {1090-1097}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 may 2012}, } @INPROCEEDINGS{MONACHINI_2012_INPROCEEDINGS_MFDRKGP_220211, AUTHOR = {Monachini, M. and Frontini, F. and De Felice, I. and Russo, I. and Khan, F. and Gagliardi, G. and Panunzi, A.}, TITLE = {Verb interpretation for basic action types: annotation, ontology induction and creation of prototypical scenes}, YEAR = {2012}, ABSTRACT = {In the last 20 years dictionaries and lexicographic resources such as WordNet have started to be enriched with multimodal content. Short videos depicting basic actions support the user's need (especially in second language acquisition) to fully understand the range of applicability of verbs. The IMAGACT project has among its results a repository of action verbs ontologically organised around prototypical action scenes in the form of both video recordings and 3D animations. The creation of the IMAGACT ontology, which consists in deriving action types from corpus instances of action verbs, intra and cross linguistically validating them and producing the prototypical scenes thereof, is the preliminary step for the creation of a resouce that users can browse by verb, learning how to match different action prototypes with the correct verbs in the target language. The mapping of IMAGACT types onto WordNet synsets allows for a mutual enrichment of both resources.}, KEYWORDS = {ontology of actions, lexical resource, 3D animations}, PAGES = {69-80}, URL = {https://publications.cnr.it/doc/220211}, CONFERENCE_NAME = {COLING 2012-3rd Workshop on Cognitive Aspects of the Lexicon (CogALex-III)}, CONFERENCE_PLACE = {Mumbai, India}, CONFERENCE_DATE = {15 Dicembre 2012}, } @INPROCEEDINGS{MONEGLIA_2012_INPROCEEDINGS_MGPFRM_220262, AUTHOR = {Moneglia, M. and Gagliardi, G. and Panunzi, A. and Frontini, F. and Russo, I. and Monachini, M.}, TITLE = {IMAGACT: Deriving an Action Ontology from Spoken Corpora}, YEAR = {2012}, ABSTRACT = {This paper presents the IMAGACT annotation infrastructure which uses both corpus - based and competence - based methods for the simultaneous extraction of a language independent Action ontology from English and Italian spontaneous speech corpora. The infrastructure relies on an innovative methodology based on images of prototypical scenes and will identify high frequency action concepts in everyday life, suitable for the implementation of an open set of languages.}, KEYWORDS = {Action verbs Ontology imagery}, PAGES = {42-47}, URL = {https://publications.cnr.it/doc/220262}, ISBN = {978-90-74029-00-1}, CONFERENCE_NAME = {Eighth Joint ISO-ACL SIGSEM Workshop on Interoperable Semantic Annotation (ISA-8)}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {3-5 October 2012}, BOOKTITLE = {Proceedings of the Eight Joint ISO-ACL SIGSEM Workshop on Interoperable Semantic Annotation ISA-8}, EDITOR = {Bunt, H.}, } @INPROCEEDINGS{MONEGLIA_2012_INPROCEEDINGS_MMCPFGR_219656, AUTHOR = {Moneglia, M. and Monachini, M. and Calabrese, O. and Panunzi, A. and Frontini, F. and Gagliardi, G. and Russo, I.}, TITLE = {The IMAGACT Cross-linguistic Ontology of Action. A new infrastructure for natural language disambiguation}, YEAR = {2012}, ABSTRACT = {Action verbs, which are highly frequent in speech, cause disambiguation problems that are relevant to Language Technologies. This is a consequence of the peculiar way each natural language categorizes Action i.e. it is a consequence of semantic factors. Action verbs are frequently "general", since they extend productively to actions belonging to different ontological types. Moreover, each language categorizes action in its own way and therefore the cross-linguistic reference to everyday activities is puzzling. This paper briefly sketches the IMAGACT project, which aims at setting up a cross-linguistic Ontology of Action for grounding disambiguation tasks in this crucial area of the lexicon. The project derives information on the actual variation of action verbs in English and Italian from spontaneous speech corpora, where references to action are high in frequency. Crucially it makes use of the universal language of images to identify action types, avoiding the underdeterminacy of semantic definitions. Action concept entries are implemented as prototypic scenes; this will make it easier to extend the Ontology to other languages.}, KEYWORDS = {Action verbs, Ontology, Imagery}, PAGES = {2606-2613}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/428_Paper.pdf}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 may 2012}, } @INPROCEEDINGS{MONEGLIA_2012_INPROCEEDINGS_MMPFGR_220270, AUTHOR = {Moneglia, M. and Monachini, M. and Panunzi, A. and Frontini, F. and Gagliardi, G. and Russo, I.}, TITLE = {Mapping a corpusinduced ontology of action verbs on ItalWordNet}, YEAR = {2012}, ABSTRACT = {Action verbs are the least predictable linguistic type for bilingual dictionaries and they cause major problems for NLP technologies. This is not only because of language specific phraseology, but it is rather a consequence of the peculiar way each language categorizes events. In ordinary languages the most frequent action verbs are "general", since they extend productively to actions belonging to different ontological types. Moreover, each language categorizes actions in its own way and therefore the cross-linguistic reference to everyday activities is puzzling. A cross-linguistic stable ontology of actions is difficult to achieve because our knowledge on the actual variation of verbs across types of actions is largely unknown. This paper briefly presents the problems and the building strategies of the IMAGACT Ontology, which aims at filling this gap, and compares some early results on a set of Italian verbs with the information contained in ItalWordNet.}, KEYWORDS = {action verbs ontology image}, PAGES = {219-226}, URL = {https://publications.cnr.it/doc/220270}, ISBN = {978-80-263-0244-5}, CONFERENCE_NAME = {Global Wordnet Conference (GWC2012)}, CONFERENCE_PLACE = {Matsue, Japan}, CONFERENCE_DATE = {9-13 January 2012}, BOOKTITLE = {Proceedings of the 6th Global WordNet Conference (GWC2012)}, EDITOR = {Fellbaum, C. and Vossen, P.}, } @INPROCEEDINGS{SORIA_2012_INPROCEEDINGS_SBCMMOPQC_219679, AUTHOR = {Soria, C. and Bel, N. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Calzolari, N.}, TITLE = {The FLaReNet Strategic Language Resource Agenda}, YEAR = {2012}, ABSTRACT = {The FLaReNet Strategic Agenda highlights the most pressing needs for the sector of Language Resources and Technologies and presents a set of recommendations for its development and progress in Europe, as issued from a three-year consultation of the FLaReNet European project. The FLaReNet recommendations are organised around nine dimensions: a) documentation b) interoperability c) availability, sharing and distribution d) coverage, quality and adequacy e) sustainability f) recognition g) development h) infrastructure and i) international cooperation. As such, they cover a broad range of topics and activities, spanning over production and use of language resources, licensing, maintenance and preservation issues, infrastructures for language resources, resource identification and sharing, evaluation and validation, interoperability and policy issues. The intended recipients belong to a large set of players and stakeholders in Language Resources and Technology, ranging from individuals to research and education institutions, to policy-makers, funding agencies, SMEs and large companies, service and media providers. The main goal of these recommendations is to serve as an instrument to support stakeholders in planning for and addressing the urgencies of the Language Resources and Technologies of the future.}, KEYWORDS = {strategic agenda, language resources planning, recommended priority actions}, PAGES = {1379-1386}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 may 2012}, BOOKTITLE = {Proceedings of the 8th international conference on Language Resources and Evaluation (LREC2012)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Dogan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{ABRATE_2012_INPROCEEDINGS_ABFLMM_220733, AUTHOR = {Abrate, M. and Bacciu, C. and Frontini, F. and Lapolla, M. N. and Marchetti, A. and Monachini, M.}, TITLE = {Web Language Identification Testing Tool}, YEAR = {2012}, ABSTRACT = {Nowadays a variety of tools for automatic language identification are available. Regardless of the approach used, at least two features can be identified as crucial to evaluate the performances of such tools: the precision of the presented results and the range of languages that can be detected. In this work we shall focus on a subtask of written language identification that is important to preserve and enhance multilinguality in the Web, i.e. detecting the language of a Web page given its URL. Most specifically, the final aim is to verify to which extent under-represented languages are recognized by available tools. The main specificity of Web Language Identification (WLI) lies in the fact that often an HTML page can provide interesting extralinguistic clues (URL domain name, metadata, encoding, etc) that can enhance accuracy. We shall first provide some data and statistics on the presence of languages on the web, secondly discuss existing practices and tools for language identification according to different metrics - for instance the approaches used and the number of supported languages - and finally make some proposals on how to improve current Web Language Identifiers. We shall also present a preliminary WLI service that builds on the Google Chromium Compact Language Detector; the WLI tool allows us to test the Google n-gram based algorithm against an adhoc gold standard of pages in various languages. The gold standard, based on a selection of Wikipedia projects, contains samples in languages for which no automatic recognition has been attempted; it can thus be used by specialists to develop and evaluate WLI systems.}, KEYWORDS = {Multilingual Web}, URL = {https://publications.cnr.it/doc/220733}, CONFERENCE_NAME = {The Multilingual Web-the Way Ahead}, CONFERENCE_PLACE = {Luxembourg}, CONFERENCE_DATE = {15-16 March 2012}, } @INPROCEEDINGS{FRONTINI_2012_INPROCEEDINGS_FMLMAB_348940, AUTHOR = {Frontini, F. and Monachini, M. and Lapolla, M. N. and Marchetti, A. and Abrate, M. and Bacciu, C.}, TITLE = {Web Language Identification Testing Tool}, YEAR = {2012}, ABSTRACT = {Nowadays a variety of tools for automatic language identification are available. Regardless of the approach used, at least two features can be identified as crucial to evaluate the performances of such tools: the precision of the presented results and the range of languages that can be detected. In this work we shall focus on a subtask of written language identification that is important to preserve and enhance multilinguality in the Web, i.e. detecting the language of a Web page given its URL. Most specifically, the final aim is to verify to which extent under-represented languages are recognized by available tools. The main specificity of Web Language Identification (WLI) lies in the fact that often an HTML page can provide interesting extralinguistic clues (URL domain name, metadata, encoding, etc) that can enhance accuracy. We shall first provide some data and statistics on the presence of languages on the web, secondly discuss existing practices and tools for language identification according to different metrics - for instance the approaches used and the number of supported languages - and finally make some proposals on how to improve current Web Language Identifiers. We shall also present a preliminary WLI service that builds on the Google Chromium Compact Language Detector; the WLI tool allows us to test the Google n-gram based algorithm against an ad-hoc gold standard of pages in various languages. The gold standard, based on a selection of Wikipedia projects, contains samples in languages for which no automatic recognition has been attempted; it can thus be used by specialists to develop and evaluate WLI systems.}, KEYWORDS = {Language Identification Tools, Multilingual Web}, PAGES = {1-1}, URL = {https://publications.cnr.it/doc/348940}, CONFERENCE_NAME = {W3C Workshop, Call for Participation: The Multilingual Web-The Way Ahead}, CONFERENCE_PLACE = {Luxembourg}, CONFERENCE_DATE = {15-16/03/2012}, } @TECHREPORT{DELGRATTA_2012_TECHREPORT_DMTALRBP_484488, AUTHOR = {Del Gratta, R. and Monachini, M. and Tesconi, M. and Abrate, M. and Lo Duca, A. and Rimell, L. and Bel, N. and Padró, M.}, TITLE = {D6. 4 Lexical Merger}, YEAR = {2012}, ABSTRACT = {This document describes the experiments on the merging of lexical resources performed during the project and the development of two merging components for LMF lexicons}, KEYWORDS = {LFM, Lexical Merger}, PAGES = {1-39}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D6.4.pdf}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQ_221631, AUTHOR = {Rimell, L. and Bel, N. and Padró, M. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {D6. 2 Integrated Final Version of the Components for Lexical Acquisition}, YEAR = {2012}, ABSTRACT = {The PANACEA project has addressed one of the most critical bottlenecks that threaten the development of technologies to support multilingualism in Europe, and to process the huge quantity of multilingual data produced annually. Any attempt at automated language processing, particularly Machine Translation (MT), depends on the availability of language-specific resources. Such Language Resources (LR) contain information about the language's lexicon, i.e. the words of the language and the characteristics of their use. In Natural Language Processing (NLP), LRs contribute information about the syntactic and semantic behaviour of words - i.e. their grammar and their meaning - which inform downstream applications such as MT. To date, many LRs have been generated by hand, requiring significant manual labour from linguistic experts. However, proceeding manually, it is impossible to supply LRs for every possible pair of European languages, textual domain, and genre, which are needed by MT developers. Moreover, an LR for a given language can never be considered complete nor final because of the characteristics of natural language, which continually undergoes changes, especially spurred on by the emergence of new knowledge domains and new technologies. PANACEA has addressed this challenge by building a factory of LRs that progressively automates the stages involved in the acquisition, production, updating and maintenance of LRs required by MT systems. The existence of such a factory will significantly cut down the cost, time and human effort required to build LRs. WP6 has addressed the lexical acquisition component of the LR factory, that is, the techniques for automated extraction of key lexical information from texts, and the automatic collation of lexical information into LRs in a standardized format. The goal of WP6 has been to take existing techniques capable of acquiring syntactic and semantic information from corpus data, improving upon them, adapting and applying them to multiple languages, and turning them into powerful and flexible techniques capable of supporting massive applications. One focus for improving the scalability and portability of lexical acquisition techniques has been to extend exiting techniques with more powerful, less "supervised" methods. In NLP, the amount of supervision refers to the amount of manual annotation which must be applied to a text corpus before machine learning or other techniques are applied to the data to compile a lexicon. More manual annotation means more accurate training data, and thus a more accurate LR. However, given that it is impractical from a cost and time perspective to manually annotate the vast amounts of data required for multilingual MT across domains, it is important to develop techniques which can learn from corpora with less supervision. Less supervised methods are capable of supporting both large-scale acquisition and efficient domain adaptation, even in the domains where data is scarce. Another focus of lexical acquisition in PANACEA has been the need of LR users to tune the accuracy level of LRs. Some applications may require increased precision, or accuracy, where the application requires a high degree of confidence in the lexical information used. At other times a greater level of coverage may be required, with information about more words at the expense of some degree of accuracy. Lexical acquisition in PANACEA has investigated confidence thresholds for lexical acquisition to ensure that the ultimate users of LRs can generate lexical data from the PANACEA factory at the desired level of accuracy.}, KEYWORDS = {Lexical Acquisition}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D6.2.pdf}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQD_221650, AUTHOR = {Rimell, L. and Bel, N. and Padró, M. and Frontini, F. and Monachini, M. and Quochi, V. and Del Gratta, R.}, TITLE = {D6. 5 Merged dictionaries}, YEAR = {2012}, ABSTRACT = {This document presents the merged dictionaries delivered in PANACEA. Those dictionaries result from merging already existing lexica, generally for general domain, with domain specific lexica acquired using PANACEA platform. The domain specific lexica are presented and delivered in D6.3 and the merging repository that allowed the multilevel merging in D6.4.}, KEYWORDS = {merged dictionaries, computational lexicon}, URL = {http://www.panacea-lr.eu//en/deliverables/list}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQD_221755, AUTHOR = {Rimell, L. and Bel, N. and Padrò, M. and Frontini, F. and Monachini, M. and Quochi, V. and Del Gratta, R.}, TITLE = {D6. 3 Monolingual lexica for English, Spanish and Italian tuned for a particular domain (LAB and ENV)}, YEAR = {2012}, ABSTRACT = {This document presents the lexica acquired using PANACEA platform for Labour and Environment domains. The languages of the lexica are English, Spanish and Italian. The lexical information acquired depends on the language, according to the available tools in the platform.}, KEYWORDS = {Lexicon Acqusition}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D6.3.pdf}, } @ARTICLE{THOMPSON_2011_ARTICLE_TMMCDLMMPQRSVRA_205232, AUTHOR = {Thompson, P. and McNaught, J. and Montemagni, S. and Calzolari, N. and Del Gratta, R. and Lee, V. and Marchi, S. and Monachini, M. and Pezik, P. and Quochi, V. and Rupp, C. and Sasaki, Y. and Venturi, G. and Rebholz Schuhmann, D. and Ananiadou, S.}, TITLE = {The BioLexicon: a large-scale terminological resource for biomedical text mining}, YEAR = {2011}, ABSTRACT = {Background Due to the rapidly expanding body of biomedical literature, biologists require increasingly sophisticated and efficient systems to help them to search for relevant information. Such systems should account for the multiple written variants used to represent biomedical concepts, and allow the user to search for specific pieces of knowledge (or events) involving these concepts, e.g., protein-protein interactions. Such functionality requires access to detailed information about words used in the biomedical literature. Existing databases and ontologies often have a specific focus and are oriented towards human use. Consequently, biological knowledge is dispersed amongst many resources, which often do not attempt to account for the large and frequently changing set of variants that appear in the literature. Additionally, such resources typically do not provide information about how terms relate to each other in texts to describe events. Results This article provides an overview of the design, construction and evaluation of a large-scale lexical and conceptual resource for the biomedical domain, the BioLexicon. The resource can be exploited by text mining tools at several levels, e.g., part-of-speech tagging, recognition of biomedical entities, and the extraction of events in which they are involved. As such, the BioLexicon must account for real usage of words in biomedical texts. In particular, the BioLexicon gathers together different types of terms from several existing data resources into a single, unified repository, and augments them with new term variants automatically extracted from biomedical literature. Extraction of events is facilitated through the inclusion of biologically pertinent verbs (around which events are typically organized) together with information about typical patterns of grammatical and semantic behaviour, which are acquired from domain-specific texts. In order to foster interoperability, the BioLexicon is modelled using the Lexical Markup Framework, an ISO standard. Conclusions The BioLexicon contains over 2.2 M lexical entries and over 1.8 M terminological variants, as well as over 3.3 M semantic relations, including over 2 M synonymy relations. Its exploitation can benefit both application developers and users. We demonstrate some such benefits by describing integration of the resource into a number of different tools, and evaluating improvements in performance that this can bring.}, KEYWORDS = {Text Mining, Information Extraction, Computational Lexicon}, PAGES = {1-29}, URL = {http://www.biomedcentral.com/1471-2105/12/397}, VOLUME = {12}, DOI = {10.1186/1471-2105-12-397}, PUBLISHER = {BioMed Central ([London], Regno Unito)}, ISSN = {1471-2105}, JOURNAL = {BMC bioinformatics}, } @INCOLLECTION{HAYASHI_2011_INCOLLECTION_HDCMSB_205409, AUTHOR = {Hayashi, Y. and Declerck, T. and Calzolari, N. and Monachini, M. and Soria, C. and Buitelaar, P.}, TITLE = {Language Service Ontology}, YEAR = {2011}, ABSTRACT = {The Language Grid is a distinctive language service infrastructure in the sense that it accommodates a wide variety of user needs, ranging from technical novices to experts; language resource consumers to language resource providers. As these language services are various in type and each of them can be idiosyncratic in many aspects, the service infrastructure has to address the issue of interoperability. A key to solve this issue is not only to build the services around standardized resources and interfaces, but also to establish a knowledge structure that copes effectively with a range of language services. Given this knowledge structure, referred to as a service ontology, each language service can be systematically classified and its usage specified by a corresponding API. This not only enables the utilization of existing language resources but facilitates the dissemination of newly created language resources as services.}, KEYWORDS = {Language grid, ontology}, PAGES = {85-100}, URL = {https://publications.cnr.it/doc/205409}, DOI = {10.1007/978-3-642-21178-2_6}, PUBLISHER = {Springer-Verlag (Berlin/Heidelberg, DEU)}, ISBN = {978-3-642-21177-5}, BOOKTITLE = {The Language Grid}, EDITOR = {Ishida, T.}, } @EDITORIAL{CALZOLARI_2011_EDITORIAL_CBSGMQ_206410, AUTHOR = {Calzolari, N. and Baroni, P. and Soria, C. and Goggi, S. and Monachini, M. and Quochi, V.}, TITLE = {Proceedings of the 3rd European Language Resources and Technologies Forum: Language Resources in the Sharing Age-the Strategic Agenda}, YEAR = {2011}, ABSTRACT = {Proceedings of the third FLaReNet forum on the European Language Resources and Technologies, held in Venezia, at the Auditorium Santa Margherita of the Università Ca' Foscari, on 26-27 May 2011.}, KEYWORDS = {Language Resources, Language Technologies}, PAGES = {86}, URL = {http://www.flarenet.eu/sites/default/files/FLaReNet_Forum_2011_Proceedings.pdf}, } @INPROCEEDINGS{CALZOLARI_2011_INPROCEEDINGS_CMQ_205719, AUTHOR = {Calzolari, N. and Monachini, M. and Quochi, V.}, TITLE = {Interoperability Framework: The FLaReNet action plan proposal}, YEAR = {2011}, ABSTRACT = {Standards are fundamental to ex-change, preserve, maintain and integrate data and language resources, and as an essential basis of any language resource infrastructure. This paper promotes an Interoperability Framework as a dynamic environment of standards and guidelines, also intended to support the provision of language-(web)service interoperability. In the past two decades, the need to define common practices and formats for linguistic resources has been increasingly recognized and sought. Today open, collaborative, shared data is at the core of a sound language strategy, and standardisation is actively on the move. This paper first describes the current landscape of standards, and presents the major barriers to their adoption; then, it describes those scenarios that critically involve the use of standards and provide a strong motivation for their adoption; lastly, a series of actions and steps needed to operationalise standards and achieve a full interoperability for Language Resources and Technologies are proposed.}, KEYWORDS = {Language Resources, standards}, PAGES = {41-49}, URL = {https://publications.cnr.it/doc/205719}, ISBN = {978-974-466-564-5}, CONFERENCE_NAME = {Workshop on Language Resources, Technology and Services in the Sharing Paradigm}, CONFERENCE_PLACE = {Chiang Mai}, CONFERENCE_DATE = {12 Novembre 2011}, } @INPROCEEDINGS{FRONTINI_2011_INPROCEEDINGS_FMGLPFAM_205601, AUTHOR = {Frontini, F. and Monachini, M. and Gavrilidou, M. and Labropoulou, P. and Piperidis, S. and Francopoulo, G. and Arranz, V. and Mapelli, V.}, TITLE = {A Metadata Schema for the Description ofLanguage Resources (LRs)}, YEAR = {2011}, ABSTRACT = {This paper presents the metadata schema for describing language resources (LRs) currently under development for the needs of META-SHARE, an open distributed facility for the exchange and sharing of LRs. An essential ingredient in its setup is the existence of formal and standardized LR descriptions, cornerstone of the interoperability layer of any such initiative. The description of LRs is granular and abstractive, combining the taxonomy of LRs with an inventory of a structured set of descriptive elements, of which only a minimal subset is obligatory; the schema additionally proposes recommended and optional elements. Moreover, the schema includes a set of relations catering for the appropriate inter-linking of resources. The current paper presents the main principles and features of the metadata schema, focusing on the description of text corpora and lexical / conceptual resources.}, KEYWORDS = {metadata, language resources}, PAGES = {84-92}, URL = {https://publications.cnr.it/doc/205601}, ISBN = {978-974-466-564-5}, CONFERENCE_NAME = {Workshop on Language Resources, Technology and Services in the Sharing Paradigm}, CONFERENCE_PLACE = {Chiang Mai}, CONFERENCE_DATE = {12 Novembre 2011}, } @INPROCEEDINGS{FRONTINI_2011_INPROCEEDINGS_FM_205738, AUTHOR = {Frontini, F. and Monachini, M.}, TITLE = {Towards interfacing lexical and ontological resources}, YEAR = {2011}, ABSTRACT = {During the last two decades, the Computational Linguistics community has dedicated considerable effort to the research and development Lexical Resources (LRs), especially Computational Lexicons. These LRs, even though belonging to different linguistic approaches and theories, share a common element; all of them contain, explicitly or implicitly, an ontology as the means of organizing their structure.}, KEYWORDS = {language resources, ontologies}, PAGES = {26}, URL = {https://publications.cnr.it/doc/205738}, CONFERENCE_NAME = {ONTOLOGIES AND LEXICAL SEMANTICS}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {01 Ottobre 2011}, } @TECHREPORT{ARRANZ_2011_TECHREPORT_ABBCCDFGMQRR_290606, AUTHOR = {Arranz, V. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Del Gratta, R. and Frontini, F. and Goggi, S. and Monachini, M. and Quochi, V. and Rubino, F. and Russo, I.}, TITLE = {The FLaReNet Databook}, YEAR = {2011}, ABSTRACT = {The FLaReNet Databook is not only the collection of all the factual material collected during the activities of the project, but also a set on innovative initiatives and instruments that will remain in place for the continuous collection of such "facts". The purpose of the Databook is in fact, on one side, to consolidate the analyses carried out in the project and, at the same time, to set up the proper mechanisms that will enable the provision of a continuous stream of relevant factual material, also after the end of the project.}, KEYWORDS = {Language Resources (LRs)}, PAGES = {1-8}, URL = {http://www.flarenet.eu/?q=FLaReNet_Databook}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CBCMMOPQS_206397, AUTHOR = {Calzolari, N. and Bel, N. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C.}, TITLE = {Final FLaReNet deliverable: Language Resources for the Future-The Future of Language Resources}, YEAR = {2011}, ABSTRACT = {Language Technologies (LT), together with their backbone, Language Resources (LR), provide an essential support to the challenge of Multilingualism and ICT of the future. The main task of language technologies is to bridge language barriers and to help creating a new environment where information flows smoothly across frontiers and languages, no matter the country, and the language, of origin. To achieve this goal, all players involved need to act as a community able to join forces on a set of shared priorities. However, until now the field of Language Resources and Technology has long suffered from an excess of individuality and fragmentation, with a lack of coherence concerning the priorities for the field, the direction to move, not to mention a common timeframe. The context encountered by the FLaReNet project was thus represented by an active field needing a coherence that can only be given by sharing common priorities and endeavours. FLaReNet has contributed to the creation of this coherence by gathering a wide community of experts and making them participate in the definition of an exhaustive set of recommendations.}, KEYWORDS = {language resources and technologies, infrastructures}, PAGES = {97}, URL = {https://publications.cnr.it/doc/206397}, } @TECHREPORT{DESIPRI_2011_TECHREPORT_DGLPFMAMFD_206406, AUTHOR = {Desipri, E. and Gavrilidou, M. and Labropoulou, P. and Piperidis, S. and Frontini, F. and Monachini, M. and Arranz, V. and Mapelli, V. and Francopoulo, G. and Declerck, T.}, TITLE = {Documentation and User Manual of the META-SHARE Metadata Model}, YEAR = {2011}, ABSTRACT = {The current deliverable presents the META-SHARE metadata schema v1.0, as implemented in the META-SHARE XSD's v1.0 released to (META-NET and PSP partners) in July 2011 for text corpora and lexical/conceptual resources and its supplement for audio corpora, tools and language descriptions (simplified/refactored version) as implemented in November. It is meant to act as a user manual, providing explanations on the model contents for LRs providers and LRs curators that wish to describe their resources in accordance to it. Work on the schema is ongoing and changes/updates to the model are constantly being made; where appropriate, some changes that are already under way are documented in this deliverable.}, KEYWORDS = {Language resources, metadata, standards}, PAGES = {150}, URL = {https://publications.cnr.it/doc/206406}, } @TECHREPORT{MONACHINI_2011_TECHREPORT_MFS_206457, AUTHOR = {Monachini, M. and Frontini, F. and Soria, C.}, TITLE = {KYOTO-LMF WordNet Representation Format}, YEAR = {2011}, ABSTRACT = {The format described in the following pages is the final revised proposal for representing wordnets inside the Kyoto project (henceforth "Kyoto-LMF wordnet format"). The reference model is Lexical Markup Framework (LMF), version 16, probably one of the most widely recognized standards for the representation of NLP lexicons. The goals of LMF are to provide a common model for the creation and use of such lexical resources, to manage the exchange of data between and among them, and to enable the merging of a large number of individual resources to form extensive global electronic respurces. LMF was specifically designed to accomodate as many models of lexical representations as possible. Purposefully, it is designed as a mea-model, i.e a high-level specification for lexical resources defining the structural constraints of a lexicon.}, KEYWORDS = {Wordnets, LMF, ISO, Representation formats, standards}, PAGES = {32}, URL = {https://publications.cnr.it/doc/206457}, } @TECHREPORT{MONACHINI_2011_TECHREPORT_MQCBBCCFHKLMOPPRSUW_206507, AUTHOR = {Monachini, M. and Quochi, V. and Calzolari, N. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Francopoulo, G. and Hinrichs, E. and Krauwer, S. and Lemnitzer, L. and Mariani, J. and Odijk, J. and Piperidis, S. and Przepiorkowski, A. and Romary, L. and Schmidt, H. and Uszkoreit, H. and Wittenburg, P.}, TITLE = {The Standards' Landscape Towards an Interoperability Framework}, YEAR = {2011}, ABSTRACT = {This document proposes an overview of the current scene towards an Interoperability Framework and acts as a reference point for the current standards that the community fosters and encourages to adopt/improve. This initiative is in close synchronization with other relevant initiatives such as CLARIN, ELRA, ISO and TEI and META-Share. The document builds on the CLARIN Standardisation Action Plan and adapts and extends it to the needs of the broader LT Community, beyond the SSH research areas including the industry. The main goal of this document is to give a practical orientation for various LT players, both commercial and academic; the main message being that a harmonized domain of language resources and technology can be achieved stepwise, but that an effort to adopt standards is necessary to overcome fragmentation. NB: This is to be intended by no means as a static, closed document, rather a dynamic one which needs to be constantly/periodically revised and updated by the community itself.}, KEYWORDS = {Standards, interoperability}, PAGES = {23}, URL = {https://publications.cnr.it/doc/206507}, } @TECHREPORT{VOSSEN_2011_TECHREPORT_VBRASADHMBF_206329, AUTHOR = {Vossen, P. and Bosma, W. and Rigau, G. and Agirre, E. and Soroa, A. and Aliprandi, C. and De Jonge, J. and Hielkema, F. and Monachini, M. and Bartolini, R. and Frontini, F.}, TITLE = {KyotoCore: integrated system for knowledge mining from text}, YEAR = {2011}, ABSTRACT = {In this deliverable, we describe KyotoCore, an integrated system for applying text mining. We describe the software architecture of KyotoCore, the single modules and the process flows. Finally, we describe a use case where we apply the complete process toan English database on estuaries.}, KEYWORDS = {Knowledge and text mining software}, PAGES = {56}, URL = {https://publications.cnr.it/doc/206329}, } @INCOLLECTION{RONZANO_2010_INCOLLECTION_RMMTC_169862, AUTHOR = {Ronzano, F. and Monachini, M. and Marchetti, A. and Tesconi, M. and Calzolari, N.}, TITLE = {Bootstrapping and Collaboratively Enriching the Italian Domain WordNet through the WiKyoto Knowledge Editor}, YEAR = {2010}, ABSTRACT = {Enhancing the development of multilingual resources is of utmost importance for use in computer applications. The need of ever growing resources for effective multilingual content processing has given impulse to a radical change in the perspective of language resource (LR) creation, structuring, exploitation and maintenance. The Web has played a key role in this process: indeed the possibility to access growing amounts of structured and unstructured data as well as the ease of creating and sharing contents between distributed communities of users have strongly affected the methodologies and techniques to bootstrap, enrich and access LRs. From static knowledge bases usually created and maintained by groups of experts and tailored to the specific exploitation contexts, LRs have turned into dynamic repositories of linguistic knowledge. Their content is usually easily accessible over the Web and often exploited aggregated and optimized on-the-fly by on-line information mining services. In this context, the adoption of standardized data formats to facilitate interoperability and data exchange is essential. Moreover, the creation and maintenance of these resources has taken great advantage from the possibility to harvest Web data in order to bootstrap or enrich them. Several new frameworks have been proposed to support access, search, integration and interoperability of "new generation" LRs. Wide distributed communities of Web users are more and more directly or indirectly involved in keeping language resources updated or in extending them. After a brief description of modern LRs, we focus our attention on two essential issues involving them: the need for standard formats that support interoperability in a distributed Web context and the possibility for the Web communities to collaboratively maintain and enrich these resources. In particular, we present the Italian WordNet (IWN) and its exploitation in the context of the KYOTO Project, as a real-world scenario where standardization, interlinking, enrichment as well as collaborative editing are put into practice.}, KEYWORDS = {NLP, collaborative editing, wordnet, knowledge representation, wiki}, PAGES = {181-208}, URL = {http://www.racai.ro/Multilinguality%20and%20Interoperability/TOC.html}, PUBLISHER = {Romanian Academy Publishing House (Bucharest, ROU)}, ISBN = {978-973-27-1972-5}, BOOKTITLE = {Multilinguality and Interoperability in Language Processing with Emphasis on Romanian}, EDITOR = {Tufis, D. and Forascu, I.}, } @EDITORIAL{CALZOLARI_2010_EDITORIAL_CBMS_136417, AUTHOR = {Calzolari, N. and Baroni, P. and Monachini, M. and Soria, C.}, TITLE = {Proceedings of the 2nd European Language Resources and Technologies Forum: Language Resources of the future-the future of Language Resources}, YEAR = {2010}, ABSTRACT = {Proceedings of the second FLaReNet forum on the European Language Resources and Technologies, held in Barcelona, at the Institut d'Estudis Catalans, on 11-12 February 2010.}, KEYWORDS = {Language Resources, Language Technologies, Future}, PAGES = {120}, URL = {http://www.flarenet.eu/sites/default/files/FLaReNet_Forum_2010_Proceedings.pdf}, } @INPROCEEDINGS{AGIRRE_2010_INPROCEEDINGS_ALFHTMVVS_172865, AUTHOR = {Agirre, E. and López, D. L. O. and Fellbaum, C. and Hsieh, S. and Tesconi, M. and Monachini, M. and Vossen, P. and Vossen, P. and Segers, R.}, TITLE = {SemEval-2010 task 17: All-words word sense disambiguation on a specific domain}, YEAR = {2010}, ABSTRACT = {Domain portability and adaptation of NLP components and Word Sense Disambiguation systems present new challenges. The difficulties found by supervised systems to adapt might change the way we assess the strengths and weaknesses of supervised and knowledge-based WSD systems. Unfortunately, all existing evaluation datasets for specific domains are lexical-sample corpora. This task presented all-words datasets on the environment domain for WSD in four languages (Chinese, Dutch, English, Italian). 11 teams participated, with supervised and knowledge-based systems, mainly in the English dataset. The results show that in all languages the participants where able to beat the most frequent sense heuristic as estimated from general corpora. The most successful approaches used some sort of supervision in the form of hand-tagged examples from the domain.}, KEYWORDS = {I. 2. 7 Natural Language Processing, Word Sense Disambiguation systems, Semantic Annotation, Word-sense disambiguation}, PAGES = {75-80}, URL = {https://publications.cnr.it/doc/172865}, ISBN = {978-1-932432-70-1}, CONFERENCE_NAME = {ACL 2010-SemEval 2010: 5th International Workshop on Semantic Evaluation}, CONFERENCE_PLACE = {Uppsala, Sweden}, CONFERENCE_DATE = {15-16 Luglio 2010}, EDITOR = {Erk, K. and Strapparava, C.}, } @INPROCEEDINGS{ATTIA_2010_INPROCEEDINGS_ATTMV_84787, AUTHOR = {Attia, M. and Toral, A. and Tounsi, L. and Monachini, M. and Van Genabith, J.}, TITLE = {An Automatically Built Named Entity Lexicon for Arabic}, YEAR = {2010}, ABSTRACT = {We have successfully adapted and extended the automatic Multilingual, Interoperable Named Entity Lexicon approach to Arabic, using Arabic WordNet (AWN) and Arabic Wikipedia (AWK). First, we extract AWN's instantiable nouns and identify the corresponding categories and hyponym subcategories in AWK. Then, we exploit Wikipedia inter-lingual links to locate correspondences between articles in ten different languages in order to identify Named Entities (NEs). We apply keyword search on AWK abstracts to provide for Arabic articles that do not have a correspondence in any of the other languages. In addition, we perform a post-processing step to fetch further NEs from AWK not reachable through AWN. Finally, we investigate diacritization using matching with geonames databases, MADA-TOKAN tools and different heuristics for restoring vowel marks of Arabic NEs. Using this methodology, we have extracted approximately 45,000 Arabic NEs and built, to the best of our knowledge, the largest Named Entity repository available.}, KEYWORDS = {Acquisition, Lexicon, database, Named Entity recognition}, URL = {https://publications.cnr.it/doc/84787}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{DELGRATTA_2010_INPROCEEDINGS_DDBCEMQSTC_84782, AUTHOR = {Del Gratta, R. and D'Onofrio, L. and Bartolini, R. and Caselli, T. and Enea, A. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A. and Calzolari, N.}, TITLE = {A Web-based Architecture for Interoperability of Lexical Resources}, YEAR = {2010}, ABSTRACT = {In this paper we present aWeb Service Architecture for managing high level interoperability of Language Resources (LRs) by means of a Service Oriented Architecture (SOA) and the use of ISO standards, such as ISO LMF. We propose a layered architecture which separates the management of legacy resources (data collection) from data aggregation (workflow) and data access (user requests). We provide a case study to demonstrate how the proposed architecture is capable of managing data exchange among different lexical services in a coherent way and show how the use of a lexical standard becomes of primary importance when a protocol of interoperability is defined.}, KEYWORDS = {Interoperability, Web sercives, Lexical resources}, PAGES = {53-62}, URL = {http://weblab.iit.cnr.it/kyoto/www2.let.vu.nl/twiki/pub/Kyoto/Publications/icgl2010_DOnofrioetal.pdf}, PUBLISHER = {City university of Hong Kong press (Hong Kong, CHN)}, ISBN = {978-962-442-323-5}, CONFERENCE_NAME = {2nd International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {18-20 January 2010}, BOOKTITLE = {2nd International Conference on Global Interoperability for Language Resources, ICGL 2010}, EDITOR = {Fang, A. C. and Ide, N. and Webster, J.}, } @INPROCEEDINGS{SAVAS_2010_INPROCEEDINGS_SHMSC_84807, AUTHOR = {Savas, B. and Hayashi, Y. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {An LMF-based Web Service for Accessing WordNet-type Semantic Lexicons}, YEAR = {2010}, ABSTRACT = {This paper describes a Web service for accessing WordNet-type semantic lexicons. The central idea behind the service design is: given a query, the primary functionality of lexicon access is to present a partial lexicon by extracting the relevant part of the target lexicon. Based on this idea, we implemented the system as a RESTful Web service whose input query is specified by the access URI and whose output is presented in a standardized XML data format. LMF, an ISO standard for modeling lexicons, plays the most prominent role: the access URI pattern basically reflects the lexicon structure as defined by LMF; the access results are rendered based on Wordnet-LMF, which is a version of LMF XML-serialization. The Web service currently provides accesses to Princeton WordNet, Japanese WordNet, as well as the EDR Electronic Dictionary as a trial. To accommodate the EDR dictionary within the same framework, we modeled it also as a WordNet-type semantic lexicon. This paper thus propose modifications to LMF.}, KEYWORDS = {Standards for LRs, Lexicon, Lexical database, Web Services}, URL = {https://publications.cnr.it/doc/84807}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-21/05/2010}, } @INPROCEEDINGS{SOROA_2010_INPROCEEDINGS_SALBVMLH_84769, AUTHOR = {Soroa, A. and Agirre, E. and López, D. L. O. and Bosma, W. and Vossen, P. and Monachini, M. and Lo, J. and Hsieh, S.}, TITLE = {Kyoto: An Integrated System for Specific Domain WSD}, YEAR = {2010}, ABSTRACT = {This document describes the preliminary release of the integrated Kyoto system for specific domain WSD. The system uses concept miners (Tybots) to extract domain-related terms and produces a domain-related thesaurus, followed by knowledge-based WSD based on wordnet graphs (UKB). The resulting system can be applied to any language with a lexical knowledge base, and is based on publicly available software and resources. Our participation in Semeval task #17 focused on producing running systems for all languages in the task, and we attained good results in all except Chinese. Due to the pressure of the time-constraints in the competition, the system is still under development, and we expect results to improve in the near future.}, KEYWORDS = {Semantic Annotation, Word-sense disambiguation}, PAGES = {417-420}, URL = {https://publications.cnr.it/doc/84769}, ISBN = {978-1-932432-70-1}, CONFERENCE_NAME = {SemeEval2010-5th International Workshop on Semantic Evaluation}, CONFERENCE_PLACE = {Uppsala, Sweden}, CONFERENCE_DATE = {15-16 Luglio 2010}, EDITOR = {Erk, K. and Strapparava, C.}, } @INPROCEEDINGS{TORAL_2010_INPROCEEDINGS_TBMS_84773, AUTHOR = {Toral, A. and Bracale, S. and Monachini, M. and Soria, C.}, TITLE = {Rejuvenating the ItalianWordNet: upgrading, standardising, extending}, YEAR = {2010}, ABSTRACT = {This paper reports on recent activities carried out within the KYOTO project aimed at enhancing the Italian WordNet Language Resource. On the one hand we study the formalisation of this lexicon according to the LMF ISO standard and explore its application into a real-world scenario by means of representing it in the WN-LMF dialect. On the other hand, we report on a semiautomatic procedure to upgrade the connections of the lexicon to WordNet, which obtains over 98% accuracy.}, KEYWORDS = {Lexical Resources, Standards for LRs}, URL = {http://www.globalwordnet-iitb2010.in/proceedings.php}, CONFERENCE_NAME = {5th Global Wordnet Conference}, CONFERENCE_PLACE = {Mumbai (India)}, CONFERENCE_DATE = {31/01-4/02-2010}, EDITOR = {Bhattacharyya, P. and Fellbaum, C. and Vossen, P.}, } @INPROCEEDINGS{TORAL_2010_INPROCEEDINGS_TMSCRBV_84801, AUTHOR = {Toral, A. and Monachini, M. and Soria, C. and Cuadros, M. and Rigau, G. and Bosma, W. and Vossen, P.}, TITLE = {Linking a domain thesaurus toWordNet and conversion toWordNet-LMF}, YEAR = {2010}, ABSTRACT = {We present a methodology to link domain thesauri to general-domain lexica. This is applied in the framework of the KYOTO project to link the Species2000 thesaurus to the synsets of the English WordNet. Moreover, we study the formalisation of this thesaurus according to the ISO LMF standard and its dialect WordNet-LMF. This conversion will allow Species2000 to communicate with the other resources available in the KYOTO architecture.}, KEYWORDS = {Lexical Resources, Thesaurus}, PAGES = {157-165}, URL = {https://publications.cnr.it/doc/84801}, ISBN = {978-962-442-323-5}, CONFERENCE_NAME = {ICGL 2010-The Second International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {18-20 Gennaio 2010}, BOOKTITLE = {ICGL2010-Proceedings of the Second International Conference on Global Interoperability for Language Resources-5th Joint ISO-ACL/SIGSEM Workshop on Interoperable Semantic Annotation}, EDITOR = {Fang, A. C. and Ide, N. and Webster, J.}, } @INPROCEEDINGS{VOSSEN_2010_INPROCEEDINGS_VRASMB_184375, AUTHOR = {Vossen, P. and Rigau, G. and Agirre, E. and Soroa, A. and Monachini, M. and Bartolini, R.}, TITLE = {KYOTO: an Open Platform for Mining Facts}, YEAR = {2010}, ABSTRACT = {This paper describes an open text-mining system that was developed for the Asian-European project KYOTO. The KYOTO system uses an open text representation format and a central ontology to enable extraction of knowledge and facts from large volumes of text in many different languages. We implemented a semantic tagging approach that performs off-line reasoning. Mining of facts and knowledge is achieved through a flexible pattern matching module that can work in much the same way for different languages, can handle efficiently large volumes of documents and is not restricted to a specific domain. We applied the system to an English database on estuaries}, URL = {https://publications.cnr.it/doc/184375}, ISBN = {978-7-900268-00-6}, CONFERENCE_NAME = {OntoLex 2010}, CONFERENCE_PLACE = {Beijing}, CONFERENCE_DATE = {2010}, BOOKTITLE = {ONTOLEX-COLING 2010}, } @TECHREPORT{VOSSEN_2010_TECHREPORT_VHARFMIBHJD_157490, AUTHOR = {Vossen, P. and Hielkema, F. and Aliprandi, C. and Rigau, G. and Fellbaum, C. and Monachini, M. and Isahara, H. and Bond, F. and Hsieh, S. and Jones Walters, L. and De Boom, K.}, TITLE = {Exploitation and Dissemination Plan}, YEAR = {2010}, KEYWORDS = {Ontologie}, URL = {https://publications.cnr.it/doc/157490}, } @TECHREPORT{VOSSEN_2010_TECHREPORT_VSHHRAECLKM_157489, AUTHOR = {Vossen, P. and Segers, R. and Hicks, A. and Herold, A. and Rigau, G. and Agirre, E. and Estarrona, A. and Cuadros, M. and Laparra, E. and Kanzaki, K. and Monachini, M.}, TITLE = {Wordnets mapped to central ontology-revised}, YEAR = {2010}, KEYWORDS = {Ontologie}, URL = {https://publications.cnr.it/doc/157489}, } @ARTICLE{FRANCOPOULO_2009_ARTICLE_FBGCMPS_30882, AUTHOR = {Francopoulo, G. and Bel, N. and George, M. and Calzolari, N. and Monachini, M. and Pet, M. and Soria, C.}, TITLE = {Multilingual resources for NLP in the Lexical Markup Framework (LMF)}, YEAR = {2009}, ABSTRACT = {Optimizing the production, maintenance and extension of lexical resources is one the crucial aspects impacting Natural Language Processing (NLP). A second aspect involves optimizing the process leading to their integration in applications. With this respect, we believe that a consensual specification on monolingual, bilingual and multilingual lexicons can be a useful aid for the various NLP actors. Within ISO, one purpose of Lexical Markup Framework (LMF, ISO-24613) is to define a standard for lexicons that covers multilingual lexical data.}, KEYWORDS = {LMF, Standardization, ISO-TC37}, PAGES = {57-70}, URL = {https://publications.cnr.it/doc/30882}, VOLUME = {43}, DOI = {10.1007/s10579-008-9077-5}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{QUOCHI_2009_ARTICLE_QDSBMC_30876, AUTHOR = {Quochi, V. and Del Gratta, R. and Sassolini, E. and Bartolini, R. and Monachini, M. and Calzolari, N.}, TITLE = {A Standard Lexical-Terminological Resource for the Bio Domain}, YEAR = {2009}, ABSTRACT = {The present paper describes a large-scale lexical resource for the biology domain designed both for human and for machine use. This lexicon aims at semantic interoperability and extendability, through the adoption of ISO-LMF standard for lexical representation and through a granular and distributed encoding of relevant information. The first part of this contribution focuses on three aspects of the model that are of particular interest to the biology community: the treatment of term variants, the representation on bio events and the alignment with a domain ontology. The second part of the paper describes the physical implementation of the model: a relational database equipped with a set of automatic uploading procedures. Peculiarity of the BioLexicon is that it combines features of both terminologies and lexicons. A set verbs relevant for the domain is also represented with full details on their syntactic and semantic argument structure.}, KEYWORDS = {Lexical representation model, Lexical Database, Computational Lexicography, Special Domains, Standards}, PAGES = {325-335}, URL = {https://publications.cnr.it/doc/30876}, VOLUME = {5603}, DOI = {10.1007/978-3-642-04235-5_28}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{SORIA_2009_ARTICLE_SMBCHHMT_170611, AUTHOR = {Soria, C. and Monachini, M. and Bertagna, F. and Calzolari, N. and Huang, C. and Hsieh, S. and Marchetti, A. and Tesconi, M.}, TITLE = {Exploring Interoperability of Language Resources: the Case of Cross-lingual Semi-automatic Enrichment of Wordnets}, YEAR = {2009}, ABSTRACT = {In this paper we present an application fostering the integration and interoperability of computational lexicons, focusing on the particular case of mutual linking and cross-lingual enrichment of two wordnets, the ItalWordNet and Sinica BOW lexicons. This is intended as a case study investigating the needs and requirements of semi-automatic integration and interoperability of lexical resources, in the view of developing a prototype web application to support the GlobalWordNet Grid Initiative.}, KEYWORDS = {H. 3 INFORMATION STORAGE AND RETRIEVAL. Linguistic processing, Distributed language resources, Interoperable lexical resources, Language services}, PAGES = {87-96}, URL = {https://publications.cnr.it/doc/170611}, VOLUME = {43}, DOI = {10.1007/s10579-009-9082-3}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @EDITORIAL{CALZOLARI_2009_EDITORIAL_CBBBCGMMOPQST_183877, AUTHOR = {Calzolari, N. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Goggi, S. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Proceedings of the 1st European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, ABSTRACT = {Proceedings of the first FLaReNet Forum on the European Language Resources and Technologies, held in Vienna, at the Austrian Academy of Science, on 12-13 February 2009.}, KEYWORDS = {Language Resources, Language Technologies, Multilingual, Digital}, PAGES = {105}, URL = {http://www.flarenet.eu/sites/default/files/Vienna09_Proceedings.pdf}, } @INPROCEEDINGS{BOSMA_2009_INPROCEEDINGS_BVSRTMMA_173501, AUTHOR = {Bosma, W. and Vossen, P. and Soroa, A. and Rigau, G. and Tesconi, M. and Marchetti, A. and Monachini, M. and Aliprandi, C.}, TITLE = {KAF: a generic semantic annotation format}, YEAR = {2009}, ABSTRACT = {We present KAF, the KYOTO Annotation Format. KAF is a layered and extendible linguistic annotation format that is specifically developed to arrive at semantic interoperability. KAF is used in seven languages in several applications throughout the KYOTO (Knowledge Yielding Ontologies for Transition-based Organization) project. The goal of these applications is to derive semantic data from linguistically processed text. Separate annotation layers are defined for each annotation process but these can be combined to arrive at a higher level of semantic representation. This paper gives an outline of KAF and a description of how it is applied in the KYOTO project.}, KEYWORDS = {I. 2. 7 Natural Language Processing, Natural language processing, Semantic annotation, Text mining, Semantic Annotation, Standard Formats}, PAGES = {145-152}, URL = {https://publications.cnr.it/doc/173501}, CONFERENCE_NAME = {5th International Conference on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {17-19 September 2009}, } @INPROCEEDINGS{SORIA_2009_INPROCEEDINGS_SMV_84764, AUTHOR = {Soria, C. and Monachini, M. and Vossen, P.}, TITLE = {Wordnet-LMF: Fleshing out a Standardized Format for Wordnet Interoperability}, YEAR = {2009}, ABSTRACT = {In this paper we present Wordnet-LMF, a dialect of ISO Lexical Markup Framework that instantiates LMF for representing wordnets. Wordnet-LMF was developed in the framework of the EU KYOTO project for the specific purpose of endowing a set of wordnets with a standardized interoperability format allowing the interchange of lexicosemantic information encoded in each of them. The aim of this format is twofold a) to give a preliminary assessment of LMF, by large-scale application to real lexical resources; b) to endow WordNet with a format representation that will allow easier integration among resources sharing the same structure (i.e other wordnets) and, more importantly, across resources with different theoretical and implementation approaches.}, KEYWORDS = {Standards, Lexical Markup Framework, Lexical resources, Wordnets, Intercultural collaboration}, PAGES = {139-146}, URL = {https://publications.cnr.it/doc/84764}, PUBLISHER = {ACM, Association for computing machinery (New York, USA)}, ISBN = {978-1-60558-198-9}, CONFERENCE_NAME = {International Workshop on Intercultural Collaboration}, CONFERENCE_PLACE = {Palo Alto, California, USA}, CONFERENCE_DATE = {20-21 Febbraio 2009}, BOOKTITLE = {Proceedings of the International Workshop on Intercultural Collaboration}, } @INPROCEEDINGS{TOKUNAGA_2009_INPROCEEDINGS_TKCMSSCTXHHK_84751, AUTHOR = {Tokunaga, T. and Kaplan, D. and Calzolari, N. and Monachini, M. and Soria, C. and Sornlertlamvanich, V. and Charoenporn, T. and Tesconi, M. and Xia, Y. and Huang, C. and Hsieh, S. and Kiyoaki, S.}, TITLE = {Query Expansion using LMF-Compliant Lexical Resources}, YEAR = {2009}, ABSTRACT = {This paper reports prototype multilingual query expansion system relying on LMF compliant lexical resources. The system is one of the deliverables of a three-year project aiming at establishing an international standard for language resources which is applicable to Asian languages. Our important contributions to ISO 24613, standard Lexical Markup Framework (LMF) include its robustness to deal with Asian languages, and its applicability to cross-lingual query tasks, as illustrated by the prototype introduced in this paper.}, KEYWORDS = {Lexical resources, Lexical Markup Framework (LMF), Standards}, PAGES = {145-152}, URL = {https://publications.cnr.it/doc/84751}, ISBN = {978-1-932432-56-5}, CONFERENCE_NAME = {ACL-IJCNLP 2009-7th Workshop on Asian Language Resources}, CONFERENCE_PLACE = {Singapore}, CONFERENCE_DATE = {6-7 Agosto 2009}, } @INPROCEEDINGS{TORAL_2009_INPROCEEDINGS_TMSR_84754, AUTHOR = {Toral, A. and Monachini, M. and Soroa, A. and Rigau, G.}, TITLE = {Studying the role of Qualia Relations for Word Sense Disambiguation}, YEAR = {2009}, KEYWORDS = {Generative Lexicon, Semantic Information Extraction, Word Sense Disambiguation}, URL = {https://publications.cnr.it/doc/84754}, CONFERENCE_NAME = {5th International Conference on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{MONACHINI_2009_INPROCEEDINGS_M_112943, AUTHOR = {Monachini, M.}, TITLE = {A 15-year journey in Standards for Lexical Resources}, YEAR = {2009}, KEYWORDS = {Standards, Lexicon}, URL = {https://publications.cnr.it/doc/112943}, CONFERENCE_NAME = {New horizons for Linguistic Resources in a Global Context}, CONFERENCE_PLACE = {Barcelona, Spain}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{MONACHINI_2009_INPROCEEDINGS_M_112947, AUTHOR = {Monachini, M.}, TITLE = {The WordNet-LMF Representation Format}, YEAR = {2009}, KEYWORDS = {Standards, Lexicon}, URL = {https://publications.cnr.it/doc/112947}, CONFERENCE_NAME = {The First KYOTO Workshop Environmental Knowledge Transition and Exchange}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {2009}, } @TECHREPORT{ALIPRANDI_2009_TECHREPORT_ANMRTSMVBAADRS_262195, AUTHOR = {Aliprandi, C. and Neri, F. and Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W. and Agirre, E. and Artola, X. and De Ilarraza, A. D. and Rigau, G. and Soroa, A.}, TITLE = {Database Models and Data Formats DELIVERABLE NR. 1/WP NR. 2}, YEAR = {2009}, ABSTRACT = {This deliverable describes data structure and XML formats that have been investigated and defined for data representation of linguistic and semantic resources underlying the KYOTO system.}, URL = {https://publications.cnr.it/doc/262195}, } @TECHREPORT{ALIPRANDI_2009_TECHREPORT_ANMRTSMVBAADRS_157459, AUTHOR = {Aliprandi, C. and Neri, F. and Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W. and Agirre, E. and Artola, X. and Diaz, D. I. A. and Rigau, G. and Soroa, A.}, TITLE = {Database Models and Data Formats}, YEAR = {2009}, KEYWORDS = {XML data format, TMF, SEMAF, OWL/KIF, FACTAF}, URL = {https://publications.cnr.it/doc/157459}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CBGMQST_157465, AUTHOR = {Calzolari, N. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Dissemination Plan}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157465}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CBGMQST_157468, AUTHOR = {Calzolari, N. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157468}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CMSBGQT_157467, AUTHOR = {Calzolari, N. and Monachini, M. and Soria, C. and Baroni, P. and Goggi, S. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 2}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157467}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBCGMQTBBCMOP_157462, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Caselli, T. and Goggi, S. and Monachini, M. and Quochi, V. and Toral, A. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Action Plan}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157462}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBGMQT_157466, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Evaluation Plan for the functioning of the Network}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157466}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBMQ_157463, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Monachini, M. and Quochi, V.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Annual Report No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157463}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBMQT_157469, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Monachini, M. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Project Presentation}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157469}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBBCCMMOPQT_157464, AUTHOR = {Calzolari, N. and Soria, C. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Blueprint of actions and infrastructures No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources, Infrastructures, Recommendations}, URL = {https://publications.cnr.it/doc/157464}, } @TECHREPORT{HEROLD_2009_TECHREPORT_HHSVRALMTS_157472, AUTHOR = {Herold, A. and Hicks, A. and Segers, R. and Vossen, P. and Rigau, G. and Agirre, E. and Laparra, E. and Monachini, M. and Toral, A. and Soria, C.}, TITLE = {WordNets mapped to Central Ontology}, YEAR = {2009}, KEYWORDS = {Ontology}, URL = {https://publications.cnr.it/doc/157472}, } @MISC{CALZOLARI_2009_MISC_CBBBCGMMOPQST_157471, AUTHOR = {Calzolari, N. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Goggi, S. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157471}, } @MISC{CALZOLARI_2009_MISC_CBBCMOPBGMQST_157457, AUTHOR = {Calzolari, N. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Extended Report of: The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157457}, } @MISC{CALZOLARI_2009_MISC_CBBCMOPBGMQST_157460, AUTHOR = {Calzolari, N. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Short Report of The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157460}, } @INPROCEEDINGS{ANANIADOU_2008_INPROCEEDINGS_AMNS_84713, AUTHOR = {Ananiadou, S. and Monachini, M. and Nenadic, G. and Su, J.}, TITLE = {Foreword}, YEAR = {2008}, ABSTRACT = {Key resources of interest in biomedical text mining are lexical and knowledge repositories (controlled vocabularies, terminologies, thesauri, ontologies) and annotated corpora, including both task-specific resources and repositories reengineered from biomedical or general language resources. Of particular interest is the process of building annotated resources, including designing guidelines and annotation schemas (aiming at both syntactic and semantic interoperability) and relying on language engineering standards. Challenging aspects are updates and evolution management of resources, as well as their documentation, dissemination and evaluation.}, KEYWORDS = {Biomedical text mining, Corpora annotated}, URL = {https://publications.cnr.it/doc/84713}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation-Workshop}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{DELGRATTA_2008_INPROCEEDINGS_DBCMSC_84729, AUTHOR = {Del Gratta, R. and Bartolini, R. and Caselli, T. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {UFRA: a UIMA-based Approach to Federated Language Resource Architecture}, YEAR = {2008}, ABSTRACT = {In this paper we address the issue of developing an interoperable infrastructure for language resources and technologies. In our approach, called UFRA, we extend the Federate Database Architecture System adding typical functionalities caming from UIMA. In this way, we capitalize the advantages of a federated architecture, such as autonomy, heterogeneity and distribution of components, monitored by a central authority responsible for checking both the integration of components and user rights on performing different tasks. We use the UIMA approach to manage and define one common front-end, enabling users and clients to query, retrieve and use language resources and technologies. The purpose of this paper is to show how UIMA leads from a Federated Database Architecture to a Federated Resource Architecture, adding to a registry of available components both static resources such as lexicons and corpora and dynamic ones such as tools and general purpose language technologies. At the end of the paper, we present a case-study that adopts this framework to integrate the SIMPLE lexicon and TIMEML annotation guidelines to tag natural language texts.}, KEYWORDS = {LR Infrastructures and Architectures, LR web services, Lexicon, Lexical database}, PAGES = {2634-2639}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/656_paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26 May-1 June 2008}, } @INPROCEEDINGS{FRANCOPOULO_2008_INPROCEEDINGS_FDSDM_84708, AUTHOR = {Francopoulo, G. and Declerck, T. and Sornlertlamvanich, V. and De La Clergerie, E. and Monachini, M.}, TITLE = {Data Category Registry: Morpho-syntactic and Syntactic Profiles}, YEAR = {2008}, ABSTRACT = {After a brief presentation of the data model, we describe a work in progress to define an initial set of morpho-syntactic and syntactic data categories dedicated to NLP applications. The aim is to improve interoperability among language resources and to optimize the process leading to their integration in applications. The main point is to be sure that when a language resource makes use of a value, the other language resources and programs have the same interpretation for this given value. From a practical point of view, these values are collected from existing lists, discussed, extended, and then recorded within a freely accessible data base: the ISO Data Category Registry.}, KEYWORDS = {Data Category, Language resources, ISO}, PAGES = {31-39}, URL = {https://publications.cnr.it/doc/84708}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation-Workshop: Use and usage of language resource-related standards}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-27-28 May 2004}, } @INPROCEEDINGS{HAYASHI_2008_INPROCEEDINGS_HDBM_84723, AUTHOR = {Hayashi, Y. and Declerck, T. and Buitelaar, P. and Monachini, M.}, TITLE = {Ontologies for a Global Language Infrastructure}, YEAR = {2008}, ABSTRACT = {Given a situation where human language technologies have been maturing considerably and a rapidly growing range of language data resources being now available, together with natural language processing (NLP) tools/systems, a strong need for a global language infrastructure (GLI) is becoming more and more evident, if one wants to ensure re-usability of the resources. A GLI is essentially an open and web-based software platform on which tailored language services can be efficiently composed, disseminated and consumed. An infrastructure of this sort is also expected to facilitate further development of language data resources and NLP functionalities. The aims of this paper are twofold: (1) to discuss necessity of ontologies for a GLI, and (2) to draw a high-level configuration of the ontologies, which are integrated into a comprehensive language service ontology. To these ends, this paper first explores dimensions of GLI, and then draws a triangular view of a language service, from which necessary ontologies are derived. This paper also examines relevant ongoing international standardization efforts such as LAF, MAF, SynAF, DCR and LMF, and discusses how these frameworks are incorporated into our comprehensive language service ontology. The paper concludes in stressing the need for an international collaboration on the development of a standardized language service ontology.}, KEYWORDS = {Ontology, Global language infrastructure (GLI)}, PAGES = {105-112}, URL = {https://publications.cnr.it/doc/84723}, CONFERENCE_NAME = {ICGL 2008-The First International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {9-11/01/2008}, EDITOR = {Webster, J. and Ide, N. and Fang, A.}, } @INPROCEEDINGS{HAYASHI_2008_INPROCEEDINGS_HNMSC_84725, AUTHOR = {Hayashi, Y. and Narawa, C. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Ontologizing Lexicon Access Functions based on an LMF-based Lexicon Taxonomy}, YEAR = {2008}, ABSTRACT = {This paper discusses ontologization of lexicon access functions in the context of a service-oriented language infrastructure, such as the Language Grid. In such a language infrastructure, an access function to a lexical resource, embodied as an atomic Web service, plays a crucially important role in composing a composite Web service tailored to a user's specific requirement. To facilitate the composition process involving service discovery, planning and invocation, the language infrastructure should be ontology-based; hence the ontologization of a range of lexicon functions is highly required. In a service-oriented environment, lexical resources however can be classified from a service-oriented perspective rather than from a lexicographically motivated standard. Hence to address the issue of interoperability, the taxonomy for lexical resources should be ground to principled and shared lexicon ontology. To do this, we have ontologized the standardized lexicon modeling framework LMF, and utilized it as a foundation to stipulate the service-oriented lexicon taxonomy and the corresponding ontology for lexicon access functions. This paper also examines a possible solution to fill the gap between the ontological descriptions and the actual Web service API by adopting a W3C recommendation SAWSDL, with which Web service descriptions can be linked with the domain ontology.}, KEYWORDS = {Lexicon, LR web services, Standards for LRs, Lexical database}, PAGES = {916-922}, URL = {https://publications.cnr.it/doc/84725}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{MARCHETTI_2008_INPROCEEDINGS_MTVARCMFHIKNRV_173483, AUTHOR = {Marchetti, A. and Tesconi, M. and Vossen, P. and Agirre, E. and Rigau, G. and Calzolari, N. and Monachini, M. and Fellbaum, C. and Hsieh, S. and Isahara, H. and Kanzaki, K. and Neri, F. and Raffaelli, R. and Vangent, J.}, TITLE = {KYOTO: A System for Mining, Structuring, and Distributing Knowledge Across Languages and Cultures}, YEAR = {2008}, ABSTRACT = {We outline work to be carried out within the framework of an impending EC project. The goal is to construct a language-independent information system for a specific domain (environment/ecology) anchored in a language-independent ontology that is linked to WordNets in several languages. For each language, information extraction and identification of lexicalized concepts with ontological entries will be done by text miners ("Kybots"). The mapping of language-specific lexemes to the ontology allows for crosslinguistic identification and translation of equivalent terms. The infrastructure developed within this project will enable long-range knowledge sharing and transfer to many languages and cultures, addressing the need for global and uniform transition of knowledge beyond the domain of ecology and environment addressed here.}, KEYWORDS = {Global WordNet Grid, Ontologies and WordNets, Multilinguality, Semantic indexing and search, Text mining}, PAGES = {474-484}, URL = {https://publications.cnr.it/doc/173483}, ISBN = {978-963-482-854-9}, CONFERENCE_NAME = {GWC2008-The Fourth Global WordNet Conference}, CONFERENCE_PLACE = {Szeged, Hungary}, CONFERENCE_DATE = {22-25 Gennaio 2008}, EDITOR = {Tanács, A. and Csendes, D. and Vincze, V. and Fellbaum, C. and Vossen, P.}, } @INPROCEEDINGS{MONACHINI_2008_INPROCEEDINGS_MQDC_84731, AUTHOR = {Monachini, M. and Quochi, V. and Del Gratta, R. and Calzolari, N.}, TITLE = {Using LMF to Shape a Lexicon for the Biomedical Domain}, YEAR = {2008}, ABSTRACT = {This paper describes the design, implementation and population of the BioLexicon in the framework of BootStrep, an FP6 project. The BioLexicon (BL) is a lexical resource designed for text mining in the bio-domain. It has been conceived to meet both domain requirements and upcoming ISO standards for lexical representation. The data model and data categories are compliant to the ISO Lexical Markup Framework and the Data Category Registry. The BioLexicon integrates features of lexicons and terminologies: term entries (and variants) derived from existing resources are enriched with linguistic features, including sub-categorization and predicate-argument information, extracted from texts. Thus, it is an extendable resource. Furthermore, the lexical entries will be aligned to concepts in the BioOntology, the ontological resource of the project. The BL implementation is an extensible relational database with automatic population procedures. Population relies on a dedicated input data structure allowing to upload terms and their linguistic properties and "pull-and-push" them in the database. The BioLexicon teaches that the state-of-the-art is mature enough to aim at setting up a standard in this domain. Being conformant to lexical standards, the BioLexicon is interoperable and portable to other areas.}, KEYWORDS = {Domain terminologies, Computational lexicons, Lexical standards, Lexical architectures}, PAGES = {153-157}, URL = {https://publications.cnr.it/doc/84731}, CONFERENCE_NAME = {LangTech 2008-Tecnologia applicata alla linguistica}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {28-29 February 2008}, EDITOR = {Delogu, C. and Falcone, M.}, } @INPROCEEDINGS{QUOCHI_2008_INPROCEEDINGS_QMDC_84700, AUTHOR = {Quochi, V. and Monachini, M. and Del Gratta, R. and Calzolari, N.}, TITLE = {A lexicon for biology and bioinformatics: the BOOTStrep experience}, YEAR = {2008}, KEYWORDS = {Lexicon, Ontologies, Lexical database}, PAGES = {2285-2292}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/576_paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{TAKENOBU_2008_INPROCEEDINGS_TKHHCMSSSCY_84701, AUTHOR = {Takenobu, T. and Kaplan, D. and Huang, C. and Hsieh, S. and Calzolari, N. and Monachini, M. and Soria, C. and Shirai, K. and Sornlertlamvanich, V. and Charoenporn, T. and Yingju, X.}, TITLE = {Adapting International Standard for Asian Language Technologies}, YEAR = {2008}, ABSTRACT = {Corpus-based approaches and statistical approaches have been the main stream of natural language processing research for the past two decades. Language resources play a key role in such approaches, but there is an insufficient amount of language resources in many Asian languages. In this situation, standardisation of language resources would be of great help in developing resources in new languages. This paper presents the latest development efforts of our project which aims at creating a common standard for Asian language resources that is compatible with an international standard. In particular, the paper focuses on i) lexical specification and data categories relevant for building multilingual lexical resources for Asian languages; ii) a core upper-layer ontology needed for ensuring multilingual interoperability and iii) the evaluation platform used to test the entire architectural framework.}, KEYWORDS = {LR national/international projects, Organizational/policy issues, LR Infrastructures and Architectures, Lexicon, Lexical database}, PAGES = {1663}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/422_paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{TORAL_2008_INPROCEEDINGS_TMM_84722, AUTHOR = {Toral, R. A. and Muñoz, R. and Monachini, M.}, TITLE = {Named Entity WordNet}, YEAR = {2008}, ABSTRACT = {This paper presents the automatic extension of Princeton WordNet with Named Entities (NEs). This new resource is called Named Entity WordNet. Our method maps the noun is-a hierarchy of WordNet to Wikipedia categories, identifies the NEs present in the latter and extracts different information from them such as written variants, definitions, etc. This information is inserted into a NE repository. A module that converts from this generic repository to the WordNet specific format has been developed. The paper explores different aspects of our methodology such as the treatment of polysemous terms, the identification of hyponyms within the Wikipedia categorization system, the identification of Wikipedia articles which are NEs and the design of a NE repository compliant with the LMF ISO standard. So far, this procedure enriches WordNet with 310,742 NEs and 381,043 "instance of" relations.}, KEYWORDS = {Lexicon, Named Entity recognition, Ontologies, Lexical database}, PAGES = {741-747}, URL = {https://publications.cnr.it/doc/84722}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{TORAL_2008_INPROCEEDINGS_TQDMSC_84714, AUTHOR = {Toral, R. A. and Quochi, V. and Del Gratta, R. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Lexically-based Ontologies and Ontologically Based Lexicons}, YEAR = {2008}, ABSTRACT = {This paper deals with the relations between ontologies and lexicons. We study the role of these two components and their evolution during the last years in the field of Computational Linguistics. Subsequently, we survey the current lines of research at ILC-CNR which tackle this topic. They involve (I) the reuse of already existing Lexical Resources to derive formal ontologies, (II) the conversion and combination of terminologies into rich and formal Lexical Resources and (III) the use of formal ontologies as the backbone of multilingual Lexical Resources.}, KEYWORDS = {Resource Infrastructure, UIMA, Clarin}, PAGES = {49-59}, URL = {https://publications.cnr.it/doc/84714}, CONFERENCE_NAME = {AI*IA 2008-10th Congress of Italian Association for Artificial Intelligence}, CONFERENCE_PLACE = {Cagliari}, CONFERENCE_DATE = {11-13 Settembre 2008}, } @INPROCEEDINGS{VOSSEN_2008_INPROCEEDINGS_VACFHHIKMMNRRTV_84716, AUTHOR = {Vossen, P. and Agirre, E. and Calzolari, N. and Fellbaum, C. and Hsieh, S. and Huang, C. and Isahara, H. and Kanzaki, K. and Marchetti, A. and Monachini, M. and Neri, F. and Raffaelli, R. and Rigau, G. and Tesconi, M. and Vangent, J.}, TITLE = {KYOTO: A System for Mining, Structuring, and Distributing Knowledge Across Languages and Cultures}, YEAR = {2008}, ABSTRACT = {We outline work performed within the framework of a current EC project. The goal is to construct a language-independent information system for a specific domain (environment/ecology/biodiversity) anchored in a language-independent ontology that is linked to wordnets in seven languages. For each language, information extraction and identification of lexicalized concepts with ontological entries is carried out by text miners ("Kybots"). The mapping of language-specific lexemes to the ontology allows for crosslinguistic identification and translation of equivalent terms. The infrastructure developed within this project enables long-range knowledge sharing and transfer across many languages and cultures, addressing the need for global and uniform transition of knowledge beyond the specific domains addressed here.}, KEYWORDS = {Information Extraction, Information Retrieval, Digital libraries, Lexicon, Lexical database}, URL = {https://publications.cnr.it/doc/84716}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{REBHOLZSCHUHMANN_2008_INPROCEEDINGS_RPLDKSMMMCA_112935, AUTHOR = {Rebholz Schuhmann, D. and Pezik, P. and Lee, V. and Del Gratta, R. and Kim, J. and Sasaki, Y. and McNaught, J. and Montemagni, S. and Monachini, M. and Calzolari, N. and Ananiadou, S.}, TITLE = {BioLexicon: Towards a reference terminological resource in the biomedical domain}, YEAR = {2008}, ABSTRACT = {The BioLexicon is a publicly available large-scale terminological resource which brings together potential terms from several resources representing selected semantic types (genes, proteins, chemicals, species, enzymes, selected ontological terms). The schema of the BioLexicon enables improved resolution of term ambiguity and follows lexical standards for terminological resources.}, KEYWORDS = {BioLexicon}, URL = {https://publications.cnr.it/doc/112935}, ISBN = {978-1-61567-371-1}, CONFERENCE_NAME = {16th Annual International Conference on Intelligent Systems for Molecular Biology}, CONFERENCE_PLACE = {Toronto, Canada}, CONFERENCE_DATE = {19-23 Luglio 2008}, } @TECHREPORT{ALIPRANDI_2008_TECHREPORT_ANMRTSMVBAAARS_157449, AUTHOR = {Aliprandi, C. and Neri, F. and Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W. and Agirre, E. and Artola, X. and Arantza, D. and Rigau, G. and Soroa, A.}, TITLE = {Database models and data formats}, YEAR = {2008}, KEYWORDS = {XML data format, TMF, SEMAF, OWL/KIF, FACTAF}, URL = {https://publications.cnr.it/doc/157449}, } @TECHREPORT{MARCHETTI_2008_TECHREPORT_MRTSMVB_262193, AUTHOR = {Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W.}, TITLE = {XML Schema for Wordnet and Ontology: DELIVERABLE NR. 1 /WP NR. 7}, YEAR = {2008}, ABSTRACT = {This deliverable describes the XML schemata adopted to represent all the data related to the management of the multi-language wordnets and the ontology; they constitute the set of linguistic and semantic resources of KYOTO system.}, URL = {http://www2.let.vu.nl/twiki/pub/Kyoto/WP07:DatabaseSystemsAndWiki/D7.1_XML_Schema_for_Wordnet_and_Ontology_v2.0.pdf}, } @TECHREPORT{MARCHETTI_2008_TECHREPORT_MRTSMVB_157455, AUTHOR = {Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W.}, TITLE = {XML Schema for Wordnet and Ontology}, YEAR = {2008}, ABSTRACT = {This deliverable describes the XML schema adopted to represent all the data related to the management of the multi-language wordnets and the ontology; they constitute the set of linguistic and semantic resources of KYOTO system.}, KEYWORDS = {XML Schema, Wordnet, Ontology, LMF, TMF}, URL = {https://publications.cnr.it/doc/157455}, } @TECHREPORT{MONACHINI_2008_TECHREPORT_MS_157451, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Report on use of LMF for representing WordNets}, YEAR = {2008}, KEYWORDS = {WordNets}, URL = {https://publications.cnr.it/doc/157451}, } @TECHREPORT{MONACHINI_2008_TECHREPORT_MS_157452, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Report on use of TMF and LMF for representing raw terms}, YEAR = {2008}, KEYWORDS = {Terminological Data Collection, Terminological Markup Framework, Terminological Markup Language}, URL = {https://publications.cnr.it/doc/157452}, } @TECHREPORT{MONACHINI_2008_TECHREPORT_MSC_157454, AUTHOR = {Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {The Lexical Grid: Lexical Resources in Language Infrastructures}, YEAR = {2008}, ABSTRACT = {Language Resources are recognized as a central and strategic for the development of any Human Language Technology system and application product. they play a critical role as horizontal technology and have been recognized in many occasions as a priority also by national and spra-national funding a number of initiatives (such as EAGLES, ISLE, ELRA) to establish some sort of coordination of LR activities, and a number of large LR creation projects, both in the written and in the speech areas.}, KEYWORDS = {Human Language Technology, Language Resources}, URL = {https://publications.cnr.it/doc/157454}, } @TECHREPORT{SORIA_2008_TECHREPORT_SM_157450, AUTHOR = {Soria, C. and Monachini, M.}, TITLE = {KYOTO-LMF WordNet Representation Format}, YEAR = {2008}, KEYWORDS = {Ontology linked to wordnets}, URL = {https://publications.cnr.it/doc/157450}, } @TECHREPORT{TOKUNAGA_2008_TECHREPORT_TCHKSYCCHKMPS_157453, AUTHOR = {Tokunaga, T. and Calzolari, N. and Huang, C. and Kiyoaki, S. and Sornlertlamvanich, V. and Yingju, X. and Charoenporn, T. and Chung, S. and Hsieh, S. and Kaplan, D. and Monachini, M. and Prévot, L. and Soria, C.}, TITLE = {Developing International Standards of Language Resources for Semantic Web Applications-Research Report of the International Joint Research Program NEDO}, YEAR = {2008}, ABSTRACT = {This report describes a three-year project aiming at an international standard for language resources that includes Asian languages. We summarise our contribution to an international standard of lexical markup framework (LMF) and introduce a prototype query expansion system using LMF-compliant lexical resources. Since ISO 24613 was in the FDIS stage and fairly stable, we built sample lexicons in Chinese, English, Italian, Japanese, and Thai based on ISO24613. At the same time, we implemented a query expansion system utilising rich linguistic resources including lexicons described in the ISO 24613 framework. We confirmed that a system was feasible which worked on the tested languages (including both Western and Eastern languages) when given lexicons are compliant with the framework.}, KEYWORDS = {International standards, Language resources, Semantic web applications}, URL = {https://publications.cnr.it/doc/157453}, } @ARTICLE{BERTAGNA_2007_ARTICLE_BMSCHHMT_30874, AUTHOR = {Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Huang, C. and Hsieh, S. and Marchetti, A. and Tesconi, M.}, TITLE = {Fostering Intercultural Collaboration: a Web Service Architecture for Cross-Fertilization of Distributed Wordnets}, YEAR = {2007}, ABSTRACT = {Enhancing the development of multilingual lexicons is of foremost importance for intercultural collaboration to take place, as multilingual lexicons are the cornerstone of several multilingual applications. However, the development and maintenance of large-scale, robust multilingual dictionaries is a tantalizing task. In this paper we present a tool, based on a web service architecture, enabling semi-automatic generation of bilingual lexicons through linking of distributed monolingual lexical resources. In addition to lexicon development, the architecture also allows enrichment of monolingual source lexicons through exploitation of the semantic information encoded in corresponding entries. In the paper we describe our case study applied to the Italian and Chinese wordnets, and we illustrate how the architecture can be extended to access distributed multilingual WordNets over the Internet, paving the way to exploitation in a cross-lingual framework of the wealth of information built over the last decade.}, PAGES = {146-158}, URL = {https://publications.cnr.it/doc/30874}, VOLUME = {4568}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @INPROCEEDINGS{BERTAGNA_2007_INPROCEEDINGS_BMSCRTM_172595, AUTHOR = {Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Ronzano, N. and Tesconi, M. and Marchetti, A.}, TITLE = {Cooperative Building of Semantic Resources}, YEAR = {2007}, ABSTRACT = {In this paper we present LexFlow, a framework for the automatic and cooperative enrichment, integration and exploitation of semantic resources. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a particular case of workflow instance, where lexical entries move across agents and become dynamically updated. We also give an important exploitation example of the semantic resources managed or built thanks to LexFlow, describing its integration with SemKey, a system for semantic collaborative tagging.}, KEYWORDS = {semantic resources, cooperative knowledge definition, semantic tagging}, URL = {https://publications.cnr.it/doc/172595}, ISBN = {3-540-74781-8}, CONFERENCE_NAME = {10th Congress of Italian Association for Artificial Intelligence-Cooperative construction of linguistic knowledge bases Workshop}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {10-13 September 2007}, } @INPROCEEDINGS{BERTAGNA_2007_INPROCEEDINGS_BMSMTHH_173656, AUTHOR = {Bertagna, F. and Monachini, M. and Soria, C. and Marchetti, A. and Tesconi, M. and Huang, C. and Hsich, S.}, TITLE = {Fostering Intercultural Collaboration: a Web Service Architecture for Cross-Fertilization of Distributed Wordnets}, YEAR = {2007}, ABSTRACT = {Enhancing the development of multilingual lexicons is of foremost importance for intercultural collaboration to take place, as multilingual lexicons are the cornerstone of several multilingual applications. However, the development and maintenance of large-scale, robust multilingual dictionaries is a tantalizing task. In this paper we present a tool, based on a web service architecture, enabling semi-automatic generation of bilingual lexicons through linking of distributed monolingual lexical resources. In addition to lexicon development, the architecture also allows enrichment of monolingual source lexicons through exploitation of the semantic information encoded in corresponding entries. In the paper we describe our case study applied to the Italian and Chinese wordnets, and we illustrate how the architecture can be extended to access distributed multilingual WordNets over the Internet, paving the way to exploitation in a cross-lingual framework of the wealth of information built over the last decade}, KEYWORDS = {distributed language resources, interoperable lexical resources, integration of WordNets}, PAGES = {185-198}, URL = {https://publications.cnr.it/doc/173656}, VOLUME = {4568}, DOI = {10.1007/978-3-540-74000-1_11}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-1-60558-198-9}, CONFERENCE_NAME = {IWIC 2007-The First International Workshop on Intercultural Collaboration}, CONFERENCE_PLACE = {Kyoto, Japan}, CONFERENCE_DATE = {25-26 Gennaio 2007}, } @INPROCEEDINGS{FRANCOPOULO_2007_INPROCEEDINGS_FBGCMPS_84673, AUTHOR = {Francopoulo, G. and Bel, N. and George, M. and Calzolari, N. and Monachini, M. and Pet, M. and Soria, C.}, TITLE = {Lexical Markup Framework: an ISO Standard for Semantic Information in NLP Lexicons}, YEAR = {2007}, ABSTRACT = {Lexical Markup Framework (LMF) is a model that provides a common standardized framework for Natural Language Processing (NLP) lexicons. The goals of LMF are to provide a common model for the creation and use of such lexical resources to manage the exchange of data between and among these resources, and to enable the merging of a large number of individual resources to form extensive global electronic resources.}, URL = {https://publications.cnr.it/doc/84673}, ISBN = {978-3-8233-6314-9}, CONFERENCE_NAME = {GLDV2007-Lexical-Semantic and Ontological Resources of the GLDV Working Group on Lexicography at the Biennal Spring Conference}, CONFERENCE_PLACE = {Tubingen}, CONFERENCE_DATE = {13-14/04/2007}, } @INPROCEEDINGS{MONACHINI_2007_INPROCEEDINGS_MQRC_84676, AUTHOR = {Monachini, M. and Quochi, V. and Ruimy, N. and Calzolari, N.}, TITLE = {Lexical Relations and Domain Knowledge: The BioLexicon Meets the Qualia Structure}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84676}, CONFERENCE_NAME = {GL2007: Fourth International Conference on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Parigi}, CONFERENCE_DATE = {10-11 Maggio 2007}, EDITOR = {Bouillon, P. and Danlos, L. and Kanzaki, K.}, } @INPROCEEDINGS{QUOCHI_2007_INPROCEEDINGS_QDSMC_84735, AUTHOR = {Quochi, V. and Del Gratta, R. and Sassolini, E. and Monachini, M. and Calzolari, N.}, TITLE = {Toward a Standard Lexical Resource in the Bio Domain}, YEAR = {2007}, ABSTRACT = {The present paper describes a large-scale lexical resource for the biology domain designed both for human and for machine use. This lexicon aims at semantic interoperability and extendability, through the adoption of ISO-LMF standard for lexical representation and through a granular and distributed encoding of relevant information. The first part of this contribution focuses on three aspects of the model that are of particular interest to the biology community: the treatment of term variants, the representation on bio events and the alignment with a domain ontology. The second part of the paper describes the physical implementation of the model: a relational database equipped with a set of automatic uploading procedures. Peculiarity of the BioLexicon is that it combines features of both terminologies and lexicons. A set verbs relevant for the domain is also represented with full details on their syntactic and semantic argument structure.}, KEYWORDS = {Lexical representation model, Lexical Database, Computational Lexicography, Special Domains, Standards}, PAGES = {295-299}, URL = {https://publications.cnr.it/doc/84735}, PUBLISHER = {Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)}, ISBN = {978-83-7177-413-3}, CONFERENCE_NAME = {LTC07-3rd Language and Technology Conference: Human Language Technology. Challenges of the Information Society}, CONFERENCE_PLACE = {Poznan, Poland}, CONFERENCE_DATE = {5-7 Ottobre 2007}, } @INPROCEEDINGS{TORAL_2007_INPROCEEDINGS_TM_84684, AUTHOR = {Toral, A. and Monachini, M.}, TITLE = {Formalising and bottom-up enriching the ontology of a Generative Lexicon}, YEAR = {2007}, ABSTRACT = {This paper presents on-going research to formalise the ontology of a computational lexicon in OWL (W3C standard) as well as to enrich it by applying a bottom-up approach that extracts semantic information from the lexicon. The resource used follows the Generative Lexicon (GL) theory and therefore (1) puts a challenge to ontology design as its semantic types are multidimensional and (2) enables the acquisition of further knowledge on concepts from semantic units. The formalisation allows the ontology to be processed by Description Logics reasoners as well as to be employed in Semantic Web applications. Moreover, the lexicon-driven enrichment increases the semantic information present in the ontology making it appropriate for ontology-driven Natural Language Processing. Finally, the paper studies the application of these procedures to a subsequent GL-based biological resource.}, KEYWORDS = {Ontologies, Generative Lexicon, Qualia Structure, Semantic Web}, PAGES = {599-603}, URL = {https://publications.cnr.it/doc/84684}, PUBLISHER = {INCOMA Ltd (Shoumen, BGR)}, ISBN = {978-954-91743-7-3}, CONFERENCE_NAME = {RANLP-2007-International Conference on Recent Advances in Natural Language Processing}, CONFERENCE_PLACE = {Borovets, Bulgaria}, CONFERENCE_DATE = {27-29 September 2007}, EDITOR = {Angelova, G. and Bontcheva, K. and Mitkov, R. and Nicolov, N. and Nicolov, N.}, } @INPROCEEDINGS{TORAL_2007_INPROCEEDINGS_TM_84685, AUTHOR = {Toral, A. and Monachini, M.}, TITLE = {SIMPLE-OWL: a Generative Lexicon Ontology for NLP and the Semantic Web}, YEAR = {2007}, ABSTRACT = {This research deals with the modelling of a Generative Lexicon based ontology to be used in the Semantic Web and Natural Language Processing semantic tasks. This ontology is imported from a existing computational Lexical Resource and is converted to the W3C standard Web Ontology Language. This presents some challenges, as for example the multidimensionality of the original ontology, which are covered in the current paper. The result of this research is an OWL compliant semantically rich and linguistically-based ontology, thus useful to the automatic processing of text within the Semantic Web paradigm.}, KEYWORDS = {Owl, Ontologies, Generative Lexicon, Semantic Web}, URL = {https://publications.cnr.it/doc/84685}, ISBN = {3-540-74781-8}, CONFERENCE_NAME = {10th Congress of Italian Association for Artificial Intelligence-Senso Comune Workshop}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {10-13 Settembre 2007}, } @INPROCEEDINGS{TORAL_2007_INPROCEEDINGS_TMM_84686, AUTHOR = {Toral, A. and Monachini, M. and Muñoz, R.}, TITLE = {Automatically converting and enriching a computational lexicon Ontology for NLP semantic tasks}, YEAR = {2007}, ABSTRACT = {This paper describes the automatic transformation of a Generative Lexicon (GL) based Ontology into OWL, the Semantic Web ontology language. Furthermore, the OWL ontology is automatically enriched by means of a bottom-up procedure that extracts additional semantic information (relationships, features, predicates and quantifier restrictions) from the lexicon. The contribution of this research is two-fold. On one hand, we introduce a methodology for the formalisation of GL ontologies. On the other, we have developed automatic procedures that bring out a formalised, reasoning-capable, and semantically rich ontology, thus suitable for Natural Language Processing semantic tasks.}, PAGES = {216-220}, URL = {https://publications.cnr.it/doc/84686}, PUBLISHER = {Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)}, ISBN = {978-83-7177-413-3}, CONFERENCE_NAME = {LTC07-3rd Language \& Technology Conference: Human Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan}, CONFERENCE_DATE = {5-7 Ottobre 2007}, } @TECHREPORT{CALZOLARI_2007_TECHREPORT_CMQSGB_157444, AUTHOR = {Calzolari, N. and Monachini, M. and Quochi, V. and Soria, C. and Goggi, S. and Baroni, P.}, TITLE = {FLaReNet: Fostering Language Resources Network. Grant Agreement n° 617001, eContentPlus}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157444}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DBCEMQS_157442, AUTHOR = {Del Gratta, R. and Bartolini, R. and Caselli, T. and Enea, A. and Monachini, M. and Quochi, V. and Sassolini, V.}, TITLE = {TimeML: An Ontological Mapping onto the UIMA Type Systems}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157442}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DMQSC_157425, AUTHOR = {Del Gratta, R. and Monachini, M. and Quochi, V. and Sassolini, E. and Calzolari, N.}, TITLE = {Bio-Lexicon DataBase: Architecture, Concepts and Loading Software}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157425}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DTQM_157441, AUTHOR = {Del Gratta, R. and Toral, A. and Quochi, V. and Monachini, M.}, TITLE = {LocalBioLex: A database framework for biolinguistic research on integrated databases}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157441}, } @TECHREPORT{FRANCOPOULO_2007_TECHREPORT_FMC_157433, AUTHOR = {Francopoulo, G. and Monachini, M. and Calzolari, N.}, TITLE = {Lexical Markup Framework: an ISO Standard for Semantic Information in NLP Lexicons}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157433}, } @TECHREPORT{FRANCOPOULO_2007_TECHREPORT_FMC_157435, AUTHOR = {Francopoulo, G. and Monachini, M. and Calzolari, N.}, TITLE = {Lexical Standards for ISO ballot}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157435}, } @TECHREPORT{MONACHINI_2007_TECHREPORT_M_157434, AUTHOR = {Monachini, M.}, TITLE = {Test-suites of ISO conformant lexical entries}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157434}, } @TECHREPORT{ROMARY_2007_TECHREPORT_RFMDBWFG_157438, AUTHOR = {Romary, L. and Francopoulo, G. and Monachini, M. and Declerck, T. and Bunt, H. and Wittenburg, P. and Funk, A. and Gillam, L.}, TITLE = {LIRICS-Final Public Report}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157438}, } @MISC{TESCONI_2007_MISC_TMBMSC_157409, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LeXFlow: a Prototype Supporting Collaborative Lexicon Development and Cross-fertilization}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157409}, } @INPROCEEDINGS{CALZOLARI_2006_INPROCEEDINGS_CSSCPBEMSC_84625, AUTHOR = {Calzolari, F. and Sassolini, E. and Sassi, M. and Cucurullo, S. and Picchi, E. and Bertagna, F. and Enea, A. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Next Generation Language Resources using Grid}, YEAR = {2006}, ABSTRACT = {This paper presents a case study concerning the challenges and requirements posed by next generation language resources, realized as an overall model of open, distributed and collaborative language infrastructure. If a sort of "new paradigm" for language resource sharing is required, we think that the emerging and still evolving technology connected to Grid computing is a very interesting and suitable one for a concrete realization of this vision. Given the current limitations of Grid computing, it is very important to test the new environment on basic language analysis tools, in order to get the feeling of what are the potentialities and possible limitations connected to its use in NLP. For this reason, we have done some experiments on a module of the Linguistic Miner, i.e. the extraction of linguistic patterns from restricted domain corpora. The Grid environment has produced the expected results (reduction of the processing time, huge storage capacity, data redundancy) without any additional cost for the final user.}, KEYWORDS = {grid, acquisition, topic classification}, PAGES = {1858-1861}, URL = {https://publications.cnr.it/doc/84625}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FBGCMPS_84649, AUTHOR = {Francopoulo, G. and Bel, N. and George, M. and Calzolari, N. and Monachini, M. and Pet, M. and Soria, C.}, TITLE = {Lexical markup framework (LMF) for NLP multilingual resources}, YEAR = {2006}, ABSTRACT = {Optimizing the production, maintenance and extension of lexical resources is one of the crucial aspects impacting Natural Language Processing (NLP). A second aspect involves optimizing the process leading to their integration into applications. In this respect, we believe that the production of a consensual specification on multilingual lexicons can be a useful aid for the various NLP actors. Within ISO, one purpose of LMF (ISO-24613) is to define a standard for lexicons that covers multilingual data.}, PAGES = {1-8}, URL = {https://publications.cnr.it/doc/84649}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {COLING-ACL Workshop on Multilingual Lexical Resources and Interoperability}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {2006}, BOOKTITLE = {Proceedings of the Workshop on Multilingual Language Resources and Interoperability}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FDMR_84631, AUTHOR = {Francopoulo, G. and Declerck, T. and Monachini, M. and Romary, L.}, TITLE = {The relevance of standards for research infrastructures}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84631}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FGCMBPS_84632, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {LMF for multilingual, specialized lexicons}, YEAR = {2006}, PAGES = {27-32}, URL = {https://publications.cnr.it/doc/84632}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FGCMBPS_84633, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {Lexical Markup Framework (LMF)}, YEAR = {2006}, ABSTRACT = {Optimizing the production, maintenance and extension of lexical resources is one the crucial aspects impacting Natural Language Processing (NLP). A second aspect involves optimizing the process leading to their integration in applications. With this respect, we believe that the production of a consensual specification on lexicons can be a useful aid for the various NLP actors. Within ISO, the purpose of LMF is to define a standard for lexicons. LMF is a model that provides a common standardized framework for the construction of NLP lexicons. The goals of LMF are to provide a common model for the creation and use of lexical resources, to manage the exchange of data between and among these resources, and to enable the merging of large number of individual electronic resources to form extensive global electronic resources. In this paper, we describe the work in progress within the sub-group ISO-TC37/SC4/WG4. Various experts from a lot of countries have been consulted in order to take into account best practices in a lot of languages for (we hope) all kinds of NLP lexicons.}, PAGES = {233-236}, URL = {https://publications.cnr.it/doc/84633}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FMRS_84650, AUTHOR = {Francopoulo, G. and Monachini, M. and Romary, L. and Salmont Alt, S.}, TITLE = {Lexical Markup Framework: Working to Reach a Consensual ISO Standard on Lexicons}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84650}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {22 Maggio 2006}, BOOKTITLE = {Lexical Markup Framework: Working to Reach a Consensual ISO Standard on Lexicons-Tutorial}, } @INPROCEEDINGS{GIOULI_2006_INPROCEEDINGS_GLGPMSCC_84634, AUTHOR = {Giouli, V. and Labropoulou, P. and Gavrilidou, M. and Piperidis, S. and Monachini, M. and Soria, C. and Calzolari, N. and Choukri, K.}, TITLE = {Language Resources Production Models: the Case of the INTERA Multilingual Corpus and Terminology}, YEAR = {2006}, ABSTRACT = {This paper reports on the multilingual Language Resources (MLRs), i.e. parallel corpora and terminological lexicons for less widely digitally available languages, that have been developed in the INTERA project and the methodology adopted for their production. Special emphasis is given to the reality factors that have influenced the MLRs development approach and their final constitution. Building on the experience gained in the project, a production model has been elaborated, suggesting ways and techniques that can be exploited in order to improve LRs production taking into account realistic issues.}, KEYWORDS = {multilingual parallel corpora, language resources production models, less widely digitally available languages}, PAGES = {609-614}, URL = {https://publications.cnr.it/doc/84634}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{MARCHETTI_2006_INPROCEEDINGS_MTRRBMSCHH_84652, AUTHOR = {Marchetti, A. and Tesconi, M. and Ronzano, F. and Rosella, M. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Huang, C. R. and Hsieh, S. K.}, TITLE = {Towards an Architecture for the GlobalWordNet Initiative}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84652}, CONFERENCE_NAME = {SWAP-06, the 3rd Italian Semantic Web Workshop}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{MARCHETTI_2006_INPROCEEDINGS_MTRRBMSCHH_263653, AUTHOR = {Marchetti, A. and Tesconi, M. and Ronzano, F. and Rosella, M. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Huang, C. and Hsieh, S.}, TITLE = {Toward an Architecture for the Global Wordnet Initiative}, YEAR = {2006}, ABSTRACT = {Enhancing the development of multilingual lexicons is of foremost importance for intercultural collaboration to take place, as multilingual lexicons are the cornerstone of several multilingual applications. However, the development and maintenance of large-scale, robust multilingual dictionaries is a tantalizing task. Moreover, Semantic Web's growing interest towards the availability of high-quality lexical resources and their multilingual interoperability, is focusing more and more attention on this topic. In this paper we present a tool, based on a web service architecture, enabling semi-automatic generation of bilingual lexicons through linking of distributed monolingual lexical resources. In addition to lexicon development, the architecture also allows enrichment of monolingual source lexicons through exploitation of the semantic information encoded in corresponding entries. In the paper we describe our case study applied to the Italian and Chinese wordnets, and we illustrate how the architecture can be extended to access distributed multilingual WordNets over the Internet, paving the way to exploitation in a cross-lingual framework of the wealth of information built over the last decade.}, KEYWORDS = {Lexical resource, wordnet, multilingual interoperability, semantic web}, PAGES = {7-35}, URL = {http://ceur-ws.org/Vol-201/35.pdf}, CONFERENCE_NAME = {SWAP 2006-Semantic Web Applications and Perspectives}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {18-20 December, 2006}, } @INPROCEEDINGS{MONACHINI_2006_INPROCEEDINGS_M_84638, AUTHOR = {Monachini, M.}, TITLE = {LMF semantic package and mapping of existing semantic lexicons}, YEAR = {2006}, ABSTRACT = {The definition of a standard for the representation of lexical data has progressively become mandatory in the linguistic and computational linguistic community to cope with the ever encreasing number of digital lexical data that are gathered and disseminated worldwide. LMF should be seen by the community as a tool for modelling one's own lexical data, with the possibile result tat people will provide useful feedback on the usability and needed evolution of the standard project.}, PAGES = {29}, URL = {https://publications.cnr.it/doc/84638}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {22 Maggio 2006}, BOOKTITLE = {Lexical Markup Framework: Working to Reach a Consensual ISO Standard on Lexicons-Tutorial}, EDITOR = {Francopoulo, G. and Monachini, M. and Romary, L. and Salmon Alt, S.}, } @INPROCEEDINGS{MONACHINI_2006_INPROCEEDINGS_MCCFMMOU_84639, AUTHOR = {Monachini, M. and Calzolari, N. and Choukri, K. and Friedrich, J. and Maltese, G. and Mammini, M. and Odijk, J. and Ulivieri, M.}, TITLE = {Unified Lexicon and Unified Morphosyntactic Specifications for Written and Spoken Italian}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84639}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{SORIA_2006_INPROCEEDINGS_STBCMM_171465, AUTHOR = {Soria, C. and Tesconi, M. and Bertagna, F. and Calzolari, N. and Marchetti, A. and Monachini, M.}, TITLE = {Moving to dynamic computational lexicons with LeXFlow}, YEAR = {2006}, ABSTRACT = {LeXFlow is a framework for semi-automatic integration of lexicons, already expressed in standardized format. LeXFlow is intended as a tool for, on the one hand, paving the way to the development of dynamic multi-source lexicons; and on the other, for fostering the adoption of standards. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a particular case of workflow instance, where lexical entries move across agents and become dynamically updated. To this end, we have designed a lexical flow (LF) corresponding to the scenario where an entry of a lexicon A becomes enriched via basically two steps. First, by virtue of being mapped onto a corresponding entry belonging to a lexicon B, the entry(LA) inherits the semantic relations available in B. Second, by resorting to an automatic application that acquires information about semantic relations from corpora, the relations acquired are integrated into the entry and proposed to the human encoder. As a result of the lexical flow, in addition, for each starting lexical entry(LA) mapped onto a corresponding entry(LB) the flow produces a new entry representing the merging of the original two entries.}, KEYWORDS = {computational lexicons, collaborative authoring}, PAGES = {12}, URL = {https://publications.cnr.it/doc/171465}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{TESCONI_2006_INPROCEEDINGS_TMBMHCS_173931, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Huang, C. and Calzolari, N. and Soria, C.}, TITLE = {Towards agent-based cross-lingual interoperability of distributed lexical resources}, YEAR = {2006}, ABSTRACT = {In this paper we present an application fostering the integration and interoperability of computational lexicons, focusing on the particular case of mutual linking and cross-lingual enrichment of two wordnets, ItalWordNet and Sinica-BOW lexicons. This is intended as a case-study investingating the needs and requirements of semi-automatic integration and interoperability of lexical resources.}, KEYWORDS = {wordnet, multilingual computational lexicons, collaborative authoring}, PAGES = {17-24}, URL = {https://publications.cnr.it/doc/173931}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {ACL Workshop on Multilingual Lexical Resources and Interoperability}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {15-23 Luglio 2006}, BOOKTITLE = {Proceedings of the Workshop on Multilingual Language Resources and Interoperability}, } @INPROCEEDINGS{TESCONI_2006_INPROCEEDINGS_TMBMSC_83590, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LeXFlow: a system for cross-fertilization of computational lexicons}, YEAR = {2006}, ABSTRACT = {This demo presents LeXFlow, a workflow management system for crossfertilization of computational lexicons. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a set of workflow types, where lexical entries move across agents in the process of being dynamically updated. A prototype of LeXFlow has been implemented with extensive use of XML technologies (XSLT, XPath, XForms, SVG) and open-source tools (Cocoon, Tomcat, MySQL). LeXFlow is a web-based application that enables the cooperative and distributed management of computational lexicons.}, KEYWORDS = {computational lexicons, collaborative authoring}, URL = {https://publications.cnr.it/doc/83590}, DOI = {10.3115/1225403.1225406}, CONFERENCE_NAME = {COLING-ACL '06 Proceedings of the COLING/ACL on Interactive presentation sessions}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {16-23 luglio 2006}, } @INPROCEEDINGS{TESCONI_2006_INPROCEEDINGS_TMBMSC_84656, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LeXFlow: a framework for cross-fertilization of computational lexicons}, YEAR = {2006}, ABSTRACT = {This demo presents LeXFlow, a workflow management system for cross-fertilization of computational lexicons. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a set of workflow types, where lexical entries move across agents in the process of being dynamically updated. A prototype of LeXFlow has been implemented with extensive use of XML technologies (XSLT, XPath, XForms, SVG)and open-source tools (Cocoon, Tomcat, MySQL). LeXFlow is a web-based application that enables the cooperative and distributed management of computational lexicons.}, PAGES = {9-12}, URL = {https://publications.cnr.it/doc/84656}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {COLING/ACL 2006}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {15-23 Luglio 2006}, BOOKTITLE = {Proceedings of the COLING/ACL 2006 Interactive Presentation Sessions}, } @INPROCEEDINGS{TOKUNAGA_2006_INPROCEEDINGS_TSCCMSHPXYK_84657, AUTHOR = {Tokunaga, T. and Sornlertlamvanich, V. and Charoenporn, T. and Calzolari, N. and Monachini, M. and Soria, C. and Huang, C. and Prevot, L. and Xia, Y. and Yu, H. and Kiyoaki, S.}, TITLE = {Infrastructure for standardization of Asian language resources}, YEAR = {2006}, ABSTRACT = {As an area of great linguistic and cultural diversity, Asian language resources have received much less attention than their western counterparts. Creating a common standard for Asian language resources that is compatible with an international standard has at least three strong advantages: to increase the competitive edge of Asian countries, to bring Asian countries to closer to their western counterparts, and to bring more cohesion among Asian countries. To achieve this goal, we have launched a two year project to create a common standard for Asian language resources. The project is comprised of four research items, (1) building a description framework of lexical entries, (2) building sample lexicons, (3) building an upper-layer ontology and (4) evaluating the proposed framework through an application. This paper outlines the project in terms of its aim and approach.}, PAGES = {827-834}, URL = {https://publications.cnr.it/doc/84657}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {COLING/ACL 2006}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {15-26 luglio 2006}, BOOKTITLE = {Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions}, } @TECHREPORT{MONACHINI_2006_TECHREPORT_MSCFB_157402, AUTHOR = {Monachini, M. and Soria, C. and Calzolari, N. and Francopoulo, G. and Bel, N.}, TITLE = {WD of Lexica standard for CD ballot}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157402}, } @TECHREPORT{QUOCHI_2006_TECHREPORT_QMCDS_157403, AUTHOR = {Quochi, V. and Monachini, M. and Calzolari, N. and Del Gratta, R. and Sassolini, E.}, TITLE = {Bio-Lexicon Model and Preliminary ISO Conformant Data Categories}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157403}, } @TECHREPORT{RUIMY_2006_TECHREPORT_RMC_157404, AUTHOR = {Ruimy, N. and Monachini, M. and Calzolari, N.}, TITLE = {Lessico Computazionale Multilivello dell'Italiano PAROLE-SIMPLE-CLIPS}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157404}, } @ARTICLE{MONACHINI_2005_ARTICLE_MS_64515, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Building Multilingual Terminological Lexicons for Less Widely Available Languages}, YEAR = {2005}, ABSTRACT = {Availability of Linguistic Resources for the development of Human Language Technology applications is nowadays recognized as a critical issue with both political and economic impact and implications on the sphere of cultural identity. This paper reports about the experience gained during the INTERA European project for the production of multilingual terminological lexicons for less widely available languages, i.e. those languages that suffer from poor representation over the net and from scarce computational resources, but yet are requested by the market. It discusses the procedure followed within the project, focuses on the problems faced which had an impact on the initial goals, presents the necessary modifications that resulted from these problems, evaluates the market needs as attested by various surveys, and describes the methodology that is proposed for the efficient production of Multilingual Terminological Lexicons.}, PAGES = {251-261}, URL = {https://publications.cnr.it/doc/64515}, VOLUME = {15}, PUBLISHER = {Polish Scientific Publishers PWN (Warszawa, Polonia)}, ISSN = {1230-2384}, JOURNAL = {Archives of Control Sciences}, } @INPROCEEDINGS{GAVRILIDOU_2005_INPROCEEDINGS_GLMPS_84580, AUTHOR = {Gavrilidou, M. and Labropoulou, P. and Monachini, M. and Piperidis, S. and Soria, C.}, TITLE = {Building Multilingual Language Resources}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84580}, ISBN = {954-91743-2-8}, CONFERENCE_NAME = {Language and Speech Infrastructure for Information Access in the Balkan Countries}, CONFERENCE_PLACE = {Borovets, Bulgaria}, CONFERENCE_DATE = {25/2/2005}, } @INPROCEEDINGS{MONACHINI_2005_INPROCEEDINGS_MC_84585, AUTHOR = {Monachini, M. and Calzolari, N.}, TITLE = {Initiatives towards the integration of Lexicons: MILE is taking steps forward}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84585}, CONFERENCE_NAME = {Machine Translation}, CONFERENCE_PLACE = {Kothen, Germany}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{MONACHINI_2005_INPROCEEDINGS_MS_84597, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Building Multilingual Terminological Lexicons for Less Widely Available Languages}, YEAR = {2005}, ABSTRACT = {Availability of Linguistic Resources for the development of Human Language Technology applications is nowadays recognized as a critical issue with both political and economic impact and implications on the sphere of cultural identity. This paper reports about the experience gained during the INTERA European project for the production of multilingual terminological lexicons for less widely available languages, i.e. those languages that suffer from poor representation over the net and from scarce computational resources, but yet are requested by the market. It discusses the procedure followed within the project, focuses on the problems faced which had an impact on the initial goals, presents the necessary modifications that resulted from these problems, evaluates the market needs as attested by various surveys, and describes the methodology that is proposed for the efficient production of Multilingual Terminological Lexicons.}, PAGES = {129-133}, URL = {https://publications.cnr.it/doc/84597}, PUBLISHER = {IMPRESJA Wydawnictwa Elektroniczne S. A (Poznan, POL)}, ISBN = {83-7111-341-2}, CONFERENCE_NAME = {2nd Language \& Technology Conference Human Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan}, CONFERENCE_DATE = {21-23 Aprile 2005}, BOOKTITLE = {2nd Language \& Technology Conference Human Language Technologies as a Challenge for Computer Science and Linguistics Poznan}, EDITOR = {Vetulani, Z.}, } @INPROCEEDINGS{SORIA_2005_INPROCEEDINGS_SM_84601, AUTHOR = {Soria, C. and Monachini, M.}, TITLE = {Methods, Models and Standardization Issues for the Creation of Linguistic Resources: the Case of Under-Represented Languages}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84601}, CONFERENCE_NAME = {TALN \& RECITAL 2005: 12ème conférence annuelle sur le Traitement Automatique des Langues Naturelles}, CONFERENCE_PLACE = {Dourdan-France}, CONFERENCE_DATE = {6-10 Giugno 2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_CBLM_157387, AUTHOR = {Calzolari, N. and Bertagna, F. and Lenci, A. and Monachini, M.}, TITLE = {Boosting Lexical Resources for the Semantic Web. Generative Lexicon and Lexicon Interoperability}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157387}, CONFERENCE_NAME = {GL2005-3rd International Workshop on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Genève, Switzerland}, CONFERENCE_DATE = {19-21 Maggio 2005}, } @TECHREPORT{DECLERCK_2005_TECHREPORT_DKBM_157374, AUTHOR = {Declerck, T. and Kessler, M. and Bel, N. and Monachini, M.}, TITLE = {Evaluation of initiatives for morpho-syntactic and syntactic annotation}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157374}, } @TECHREPORT{FRANCOPOULO_2005_TECHREPORT_FBBDMBSG_157377, AUTHOR = {Francopoulo, G. and Bontcheva, K. and Bunt, H. and Declerck, T. and Monachini, M. and Budin, G. and Schiffrin, A. and Gillam, L.}, TITLE = {Periodic Progress Report}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157377}, } @TECHREPORT{FRANCOPOULO_2005_TECHREPORT_FBBDMBSG_157378, AUTHOR = {Francopoulo, G. and Bontcheva, K. and Bunt, H. and Declerck, T. and Monachini, M. and Budin, G. and Schiffrin, A. and Gillam, L.}, TITLE = {Annual Progress Report}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157378}, } @TECHREPORT{FRANCOPOULO_2005_TECHREPORT_FBMN_157376, AUTHOR = {Francopoulo, G. and Bunt, H. and Monachini, M. and Nioche, J.}, TITLE = {Risk Management Plan}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157376}, } @TECHREPORT{FRANCOPOULO_2005_TECHREPORT_FGCMBPS_157379, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {Language Resource Management – Lexical Markup Framework}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157379}, } @TECHREPORT{MAMMINI_2005_TECHREPORT_MUM_157383, AUTHOR = {Mammini, M. and Ulivieri, M. and Monachini, M.}, TITLE = {Unified Lexica: Common sample lexicon and harmonized morpho-syntactic specifications between PAROLE and LCStar}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157383}, } @TECHREPORT{MONACHINI_2005_TECHREPORT_MSCDW_157385, AUTHOR = {Monachini, M. and Soria, C. and Choukri, K. and Declerck, T. and Wittenburg, P.}, TITLE = {Final Evaluation Report}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157385}, } @TECHREPORT{MONACHINI_2005_TECHREPORT_MSUCDM_157386, AUTHOR = {Monachini, M. and Soria, C. and Ulivieri, M. and Calzolari, N. and Declerck, T. and Mammini, M.}, TITLE = {Evaluation of existing standards for NLP Lexica: Proposal for Candidate Data Categories}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157386}, } @MISC{FRANCOPOULO_2005_MISC_FGCMBPS_151541, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {Lexical Markup Framework}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151541}, } @MISC{GAVRILIDOU_2005_MISC_GLMPS_151543, AUTHOR = {Gavrilidou, M. and Labropoulou, P. and Monachini, M. and Piperidis, S. and Soria, C.}, TITLE = {INTERA Business model}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151543}, } @MISC{MAMMINI_2005_MISC_MUM_151544, AUTHOR = {Mammini, M. and Ulivieri, M. and Monachini, M.}, TITLE = {Lessici Unificati “su richiesta”}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151544}, } @MISC{MAMMINI_2005_MISC_MUM_151545, AUTHOR = {Mammini, M. and Ulivieri, M. and Monachini, M.}, TITLE = {Lessico Unificato}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151545}, } @MISC{MAMMINI_2005_MISC_MUM_151546, AUTHOR = {Mammini, M. and Ulivieri, M. and Monachini, M.}, TITLE = {Specifiche Lessicali Morfo-sintattiche Unificate}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151546}, } @MISC{MONACHINI_2005_MISC_MS_151528, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Terminologia Multilingue (inglese-greco-serbo-sloveno-bulgaro)}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151528}, } @MISC{MONACHINI_2005_MISC_MSPSR_151547, AUTHOR = {Monachini, M. and Soria, C. and Picchi, E. and Sassolini, E. and Ruffolo, P.}, TITLE = {Procedure e tecniche di acquisizione semi-automatica di terminologie da testi paralleli}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151547}, } @MISC{MONACHINI_2005_MISC_MSUCDM_151542, AUTHOR = {Monachini, M. and Soria, C. and Ulivieri, M. and Calzolari, N. and Declerck, T. and Mammini, M.}, TITLE = {Data Category Registry}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151542}, } @MISC{RUIMY_2005_MISC_RMC_151535, AUTHOR = {Ruimy, N. and Monachini, M. and Calzolari, N.}, TITLE = {Lessico elettronico multi-livello dell'italiano: PAROLE-SIMPLE-CLIPS}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151535}, } @MISC{SORIA_2005_MISC_SM_151538, AUTHOR = {Soria, C. and Monachini, M.}, TITLE = {MILE-OWL}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151538}, } @INPROCEEDINGS{BERTAGNA_2004_INPROCEEDINGS_BCMSU_84572, AUTHOR = {Bertagna, F. and Calzolari, N. and Monachini, M. and Soria, C. and Ulivieri, M.}, TITLE = {Report on the interlingual annotation experience at ILC-CNR}, YEAR = {2004}, URL = {http://www.google.com/url?sa=t\&rct=j\&q=\&esrc=s\&source=web\&cd=1\&ved=0CDQQFjAA\&url=http%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fdownload%3Fdoi%3D10.1.1.92.5078%26rep%3Drep1%26type%3Dpdf\&ei=ksa5UZOcJ8mjhgeziIGQBw\&usg=AFQjCNE1nXfd9hHiIstx1Lq_4VaiGDvkkA\&sig2=fIUJ3FE3BnBgDALu5T832w\&bvm=bv.47883778,d.ZG4}, CONFERENCE_NAME = {Seventh Interlingua Workshop on Determining Interlingua Utility for Machine Translation}, CONFERENCE_PLACE = {Washington DC}, CONFERENCE_DATE = {2 Ottobre 2004}, BOOKTITLE = {Biennal Conference of the AMTA-Determining Interlingua Utility for Machine Translation}, EDITOR = {Habash, N. and Dorr, B. and Hovy, E. and Reeder, F.}, } @INPROCEEDINGS{BERTAGNA_2004_INPROCEEDINGS_BLMC_84573, AUTHOR = {Bertagna, F. and Lenci, A. and Monachini, M. and Calzolari, N.}, TITLE = {The MILE Lexical Classes: Data Categories for Content Interoperability among Lexicons}, YEAR = {2004}, PAGES = {8}, URL = {https://publications.cnr.it/doc/84573}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation-Workshop: A Registry of Linguistic Data Categories within an Integrated Language Resources Repository Area (INTERA)}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {29-5-2004}, } @INPROCEEDINGS{BERTAGNA_2004_INPROCEEDINGS_BLMC_84574, AUTHOR = {Bertagna, F. and Lenci, A. and Monachini, M. and Calzolari, N.}, TITLE = {Content Interoperability of Lexical Resources: Open Issues and MILE Perspectives}, YEAR = {2004}, ABSTRACT = {The paper tackles the issue of content interoperability among lexical resources, by presenting an experiment of mapping differently conceived lexicons, FrameNet and NOMLEX, onto MILE (Multilingual ISLE Lexical Entry), a meta-entry for the encoding of multilingual lexical information, acting as a general schema of shared and common lexical objects. The aim is to (i) raise problems and (ii) test the expressive potentialities of MILE as a standard environment for Computational Lexicons.}, PAGES = {131-134}, URL = {https://publications.cnr.it/doc/84574}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation, held in Memory of Antonio Zampolli. Lisbon, Portugal, Proceedings, Volume I, Paris, The European Language Resources Association (ELRA)}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {26-27-28 May 2004}, } @INPROCEEDINGS{CALZOLARI_2004_INPROCEEDINGS_CCGMBFLMMP_84592, AUTHOR = {Calzolari, N. and Choukri, K. and Gavrilidou, M. and Maegaard, B. and Baroni, P. and Fersøe, H. and Lenci, A. and Mapelli, V. and Monachini, M. and Piperidis, S.}, TITLE = {ENABLER Thematic Network of National Projects: Technical, Strategic and Political Issues of LRs}, YEAR = {2004}, ABSTRACT = {In this paper we present general strategies concerning Language Resources (LRs) - Written, Spoken and, recently, Multimodal - as developed within the ENABLER Thematic Network. LRs are a central component of the so-called "linguistic infrastructure" (the other key element being Evaluation), necessary for the development of any Human Language Technology (HLT) application. They play a critical role, as horizontal technology, in different emerging areas of FP6, and have been recognized as a priority within a number of national projects around Europe and world-wide. The availability of LRs is also a "sensitive" issue, touching directly the sphere of linguistic and cultural identity, but also with economical, societal and political implications. This is going to be even more true in the new Europe with 25 languages on a par.}, KEYWORDS = {Language Resources, Strategic and Political Issues, Written and Spoken, Linguistic Infrastructure, Supranational Coordination}, PAGES = {937-940}, URL = {http://www.lrec-conf.org/proceedings/lrec2004/}, VOLUME = {III}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004-Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbon}, CONFERENCE_DATE = {24-30/05/2004}, BOOKTITLE = {Proceedings of the Fourth International Conference on Language Resources and Evaluation}, EDITOR = {Lino, M. T. and Xavier, M. F. and Ferreira, F. and Costa, R. and Silva, R.}, } @INPROCEEDINGS{FERSE_2004_INPROCEEDINGS_FM_84607, AUTHOR = {Fersøe, H. and Monachini, M.}, TITLE = {ELRA Validation Methodology and Standard Promotion for Linguistic Resources}, YEAR = {2004}, ABSTRACT = {This paper describes the results of work made for ELRA during 2003-2004. It describes the methodology for validation of written language resources (WLRs), specifically lexica, which has been developed for ELRA and tested on a few resources in the ELRA catalogue. It discusses the importance of key issues in lexicon creation and validation such as the adoption of standards for the coding of linguistic content and the importance of documentation. It reports on the experience gained from applying the methodology to lexical resources in the ELRA catalogue arguing that the checks must be reasonable, informative, on a suitable level of detail, and generic. It proposes a set of basic elements to be included in future discussions on establishing standards for lexicon resources. In conclusion it sketches the work to be undertaken in 2004 to promote validation and the adoption of standards.}, PAGES = {941-944}, URL = {https://publications.cnr.it/doc/84607}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {26-27-28/05/2004}, } @INPROCEEDINGS{MONACHINI_2004_INPROCEEDINGS_MCMRU_84612, AUTHOR = {Monachini, M. and Calzolari, F. and Mammini, M. and Rossi, S. and Ulivieri, M.}, TITLE = {Unifying Lexicons in view of a Phonological and Morphological Lexical DB}, YEAR = {2004}, ABSTRACT = {The present work falls in the line of activities promoted by the European Languguage Resource Association (ELRA) Production Committee (PCom) and raises issues in methods, procedures and tools for the reusability, creation, and management of Language Resources. A two-fold purpose lies behind this experiment. The first aim is to investigate the feasibility, define methods and procedures for combining two Italian lexical resources that have incompatible formats and complementary information into a Unified Lexicon (UL). The adopted strategy and the procedures appointed are described together with the driving criterion of the merging task, where a balance between human and computational efforts is pursued. The coverage of the UL has been maximized, by making use of simple and fast matching procedures. The second aim is to exploit this newly obtained resource for implementing the phonological and morphological layers of the CLIPS lexical database. Implementing these new layers and linking them with the already exisitng syntactic and semantic layers is not a trivial task. The constraints imposed by the model, the impact at the architectural level and the solution adopted in order to make the whole database 'speak' efficiently are presented. Advantages vs. disadvantages are discussed.}, PAGES = {1107-1110}, URL = {https://publications.cnr.it/doc/84612}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {26-27-28 May 2004}, } @TECHREPORT{GAVRILIDOU_2004_TECHREPORT_GGDLMSPRS_157392, AUTHOR = {Gavrilidou, M. and Giouli, V. and Desipri, E. and Labropoulou, P. and Monachini, M. and Soria, C. and Picchi, E. and Ruffolo, P. and Sassolini, E.}, TITLE = {Report on the multilingual resources production}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157392}, } @TECHREPORT{GAVRILIDOU_2004_TECHREPORT_GGDMS_157393, AUTHOR = {Gavrilidou, M. and Giouli, V. and Desipri, E. and Monachini, M. and Soria, C.}, TITLE = {Report on the model of LRs production. INTERA}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157393}, } @ARTICLE{MARINELLI_2003_ARTICLE_MBBGMOPRCZ_64468, AUTHOR = {Marinelli, R. and Biagini, L. and Bindi, R. and Goggi, S. and Monachini, M. and Orsolini, P. and Picchi, E. and Rossi, S. and Calzolari, N. and Zampolli, A.}, TITLE = {The Italian PAROLE corpus: an overview}, YEAR = {2003}, PAGES = {401-421}, URL = {https://publications.cnr.it/doc/64468}, VOLUME = {16-17}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{MONACHINI_2003_ARTICLE_MC_64489, AUTHOR = {Monachini, M. and Calzolari, N.}, TITLE = {Methods for standardization: the case of morphosyntax within the EAGLES project}, YEAR = {2003}, PAGES = {423-460}, URL = {https://publications.cnr.it/doc/64489}, VOLUME = {16-17}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{RUIMY_2003_ARTICLE_RMGCDUR_64485, AUTHOR = {Ruimy, N. and Monachini, M. and Gola, E. and Calzolari, N. and Del Fiorentino, M. C. and Ulivieri, M. and Rossi, S.}, TITLE = {A computational semantic lexicon of Italian: SIMPLE}, YEAR = {2003}, ABSTRACT = {Abstract - This paper describes the Italian Semantic Computational Lexicon elaborated in the framework of the European LE-SIMPLE Project. SIMPLE was aimed at adding a layer of semantic information to a subset of PAROLE lexica. The SIMPLE framework is based principally on the Generative Lexicon theory which allows to express the multidimensionality of meaning by means of ‘qualia structure’. Word senses are described according to their position within the SIMPLE ontology, which is based on the principle of orthogonal inheritance and consists of semantic types for characterizing simple nouns, event and property denoting lexical units. The encoding process is guided by templates, that are schematic structures containing clusters of structured information specific to each semantic type. Besides a high degree of granularity of meaning representation, the SIMPLE lexicon presents innovative aspects such as link between the syntactic and semantic levels of information, description of predicative representation and enforcement of selectional restrictions/preferences on arguments.}, PAGES = {821-864}, URL = {https://publications.cnr.it/doc/64485}, VOLUME = {18-19}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_CBLM_84556, AUTHOR = {Calzolari, N. and Bertagna, F. and Lenci, A. and Monachini, M.}, TITLE = {New Perspectives for Lexical Resources in the Semantic Web Scenario}, YEAR = {2003}, PAGES = {10-19}, URL = {https://publications.cnr.it/doc/84556}, CONFERENCE_NAME = {GL 2003-Second International Workshop on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Genève, Switzerland}, CONFERENCE_DATE = {15-17 May 2003}, EDITOR = {Bouillon, P. and Kanzaki, K.}, } @INPROCEEDINGS{RUIMY_2003_INPROCEEDINGS_RMC_84563, AUTHOR = {Ruimy, N. and Monachini, M. and Calzolari, N.}, TITLE = {Un lexique électronique multi-niveaux de l'italien}, YEAR = {2003}, ABSTRACT = {CLIPS est la plus vaste ressource lexicale électronique de l'italien. Elle comprend 55.000 mots codés à 4 niveaux de description linguistique. La représentation lexicale est basée sur des standards internationaux: CLIPS utilise en effet le même modèle, le même langage de représentation et la même méthodologie que 11 autres lexiques développés au cours des projets européens PAROLE et SIMPLE. Les informations fournies, particulièrement utiles pour des applications de TLH, sont très structurées, granulaires et innovatrices, avec entre autres au niveau sémantique la Extended Qualia Structure, basée sur la théorie du Lexique Génératif, et la représentation prédicative. La description d'une unité lexicale est un continuum à travers les différents niveaux d'information. Les propriétés phonologiques, morphologiques et syntaxiques d'un lemme, ainsi que son/ses schéma(s) d'arguments sont décrits. Au niveau sémantique, chaque lexème/sens est associé à un vaste ensemble structuré d'informations, parmi lesquelles son type ontologique et l'expression - au moyen des relations qualia - des différentes facettes de sa sémantique. La représentation prédicative décrit, quant à elle, le scénario sémantique (dans lequel le mot s'insère) et ses participants auxquels sont attribués rôle thématique et contraintes sémantiques. La relation des niveaux syntaxique et sémantique est assurée par des liens permettant de projeter les structures argumentales sur leur(s) réalisation(s) syntaxique(s). Une telle richesse d'information, et en particulier celle fournie par la Extended Qualia, permet notamment 1) de constituer des réseaux sémantiques, en formulant une requête sur l'ensemble des relations qualia contenant un mot-clé ; 2) d'extraire des noyaux de vocabulaire de domaines spécifiques, en alternant requêtes sur qualia et sens ; 3) d'acquérir des collocations lexicales, en exploitant les liens syntagmatiques évènements/entités exprimés par les qualia ; 4) de désambiguïser la contribution sémantique du modificateur dans certains groupes nominaux complexes, en analysant la structure qualia de la tête.}, PAGES = {1-10}, URL = {https://publications.cnr.it/doc/84563}, ISBN = {80-86732-21-5}, CONFERENCE_NAME = {CIL XVII International Congress of Linguists}, CONFERENCE_PLACE = {Prague}, CONFERENCE_DATE = {24-29 Luglio 2003}, } @TECHREPORT{BARONI_2003_TECHREPORT_BCFLM_157347, AUTHOR = {Baroni, P. and Calzolari, N. and Fiorentini, G. and Lenci, A. and Monachini, M.}, TITLE = {Resources Landscape Map (1st release)}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157347}, } @TECHREPORT{BARONI_2003_TECHREPORT_BCFLM_317185, AUTHOR = {Baroni, P. and Calzolari, N. and Fiorentini, G. and Lenci, A. and Monachini, M.}, TITLE = {Resources Landscape (First Release)}, YEAR = {2003}, ABSTRACT = {ELSNET-4 Deliverable D6.2}, KEYWORDS = {Language Resources, Landscapes}, URL = {https://publications.cnr.it/doc/317185}, } @TECHREPORT{BERTAGNA_2003_TECHREPORT_BCLM_157314, AUTHOR = {Bertagna, F. and Calzolari, N. and Lenci, A. and Monachini, M.}, TITLE = {Report on the Feasibility and the Organisational Requirements for the Construction of Multilingual LRs}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157314}, } @TECHREPORT{CALZOLARI_2003_TECHREPORT_CBLM_157316, AUTHOR = {Calzolari, N. and Bertagna, F. and Lenci, A. and Monachini, M.}, TITLE = {Standards and Best Practice for Multilingual Computational Lexicons-MILE (the Multilingual ISLE Lexical Entry)}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157316}, } @TECHREPORT{CALZOLARI_2003_TECHREPORT_CBLM_157337, AUTHOR = {Calzolari, N. and Bertagna, F. and Lenci, A. and Monachini, M.}, TITLE = {MILE Users? Evaluation and Feedback}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157337}, } @TECHREPORT{GAVRILIDOU_2003_TECHREPORT_GDLCMS_157321, AUTHOR = {Gavrilidou, M. and Desipri, E. and Labropoulo, P. and Calzolari, N. and Monachini, M. and Soria, C.}, TITLE = {Technical Specifications for the Selection and Encoding of Multilingual Resources}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157321}, } @TECHREPORT{LENCI_2003_TECHREPORT_LCM_157345, AUTHOR = {Lenci, A. and Calzolari, N. and Monachini, M.}, TITLE = {Report on LR Related Activities to Be Promoted}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157345}, } @TECHREPORT{MONACHINI_2003_TECHREPORT_MBCL_157322, AUTHOR = {Monachini, M. and Bertagna, F. and Calzolari, N. and Lenci, A.}, TITLE = {Improving Harmonisation between Resources: Divergence/Convergence between Specifications and de-facto Standards}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157322}, } @TECHREPORT{MONACHINI_2003_TECHREPORT_MBCUN_157323, AUTHOR = {Monachini, M. and Bertagna, F. and Calzolari, N. and Underwood, N. and Navarretta, C.}, TITLE = {Towards a Standard for the Creation of Lexica}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157323}, } @TECHREPORT{MONACHINI_2003_TECHREPORT_MS_157324, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Testing Scenario and Quality Assessment Strategy}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157324}, } @TECHREPORT{RUIMY_2003_TECHREPORT_RMC_157325, AUTHOR = {Ruimy, N. and Monachini, M. and Calzolari, N.}, TITLE = {Progetto CLIPS: Specifiche Linguistiche e Manuale di Codifica, Livello sintattico}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157325}, } @TECHREPORT{RUIMY_2003_TECHREPORT_RMC_157326, AUTHOR = {Ruimy, N. and Monachini, M. and Calzolari, N.}, TITLE = {Progetto CLIPS: Specifiche Linguistiche e Manuale di Codifica, Livello semantico}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157326}, } @INCOLLECTION{RUIMY_2001_INCOLLECTION_RGM_136426, AUTHOR = {Ruimy, N. and Gola, E. and Monachini, M.}, TITLE = {Lexicography Informs Lexical Semantics: the SIMPLE Experience}, YEAR = {2001}, ABSTRACT = {Gli autori presentano un approccio innovativo alla costruzione di un lessico semantico che coniuga teoria linguistica e pratica lessicografica, dimostrando che la visione della lessicografia quale disciplina ortogonale alla linguistica teorica è ingannevole. L’'articolo valuta l’'adeguatezza del Lessico Generativo, una teoria innovativa nel settore della semantica lessicale utilizzata per lo sviluppo dei lessici SIMPLE (coordinato da Pisa). Tali risorse computazionali su vasta scala, costruite sotto l’'egida della CE per 12 lingue europee, condividono modello teorico, formato di rappresentazione, un medesimo nucleo di entrate lessicali e sono divenute di fatto uno standard. Il potenziale dei ruoli qualia del Lessico Generativo viene esaminato attraverso la rappresentazione dei nomi astratti che, per loro intrinseca complessità, costituiscono un significativo banco di prova per ogni teoria semantica. I qualia forniscono le dimensioni semantiche lungo cui strutturare un’'ontologia dei nomi astratti e costruire parallelamente, per la loro descrizione, una ‘libreria’ di ‘templates’, ovvero nuclei strutturati di informazioni associate ad ogni tipo semantico. La metodologia ‘template-driven’ conferisce rilevanza e valore competitivo ai lessici così sviluppati, in quanto costituisce non solo un originale strumento di codifica ma anche l'’implementazione della teoria ed assicura, inoltre, coerenza sia all’'interno di un lessico che tra lessici di lingue diverse.}, KEYWORDS = {semantica lessicale, ontologia, templates, lessico elettronico, lessico generativo}, PAGES = {350-362}, URL = {https://publications.cnr.it/doc/136426}, PUBLISHER = {Cambridge University Press (Cambridge, GBR)}, ISBN = {0521780489}, BOOKTITLE = {The Language of Word Meaning}, EDITOR = {Bouillon, P. and Busa, F. and Bogouraev, B.}, }