@INCOLLECTION{MALLIA_2024_INCOLLECTION_MBQ_485252, AUTHOR = {Mallia, M. and Bandini, M. and Quochi, V.}, TITLE = {An interface for linking ancient languages}, YEAR = {2024}, ABSTRACT = {The paper focuses on the linking potentials offered by the EpiLexO web-based front-end for the creation and editing of an ecosystem of digital resources for ancient languages, developed in the context of a project on the languages of fragmentary attestation of ancient Italy. The focus is particularly on mechanisms introduced for linking lexical information to other information bits either internally or externally, e.g. for creating attestations by linking lexical forms to their variants in relevant inscriptions, as well as for linking lexical data to external independent LOD datasets available on a remote endpoint. Finally, in the conclusions, we briefly introduce some future planned or desired enhancements as well as the final platform component, a parallel interface that constitutes the fruition application, which will be open to anyone on the web and will allow for browsing, searching, cross-querying and visualising the created set of interlinked resources.}, KEYWORDS = {eLexicography, Ancient languages, Linguistic Linked Open Data, Digital historical linguistics}, PAGES = {1-12}, URL = {https://publications.cnr.it/doc/485252}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, BOOKTITLE = {Proceedings of the CLaDA-BG 2023 Conference: Language Technologies and Digital Humanities: Resources and Applications (LTaDH-RA), Sofia, Bulgaria, 10-12 May 2023}, EDITOR = {Simov, K. and Osenova, P.}, } @MISC{BANDINI_2024_MISC_BQ_491319, AUTHOR = {Bandini, M. and Quochi, V.}, TITLE = {EpiLexO-User Manual}, YEAR = {2024}, ABSTRACT = {This document contains a user manual for EpiLexO, a specialized web platform designed for the creation and editing of lexical resources, associated evidence, references, and relevant bibliography of fragmentary languages of ancient Italy such as Oscan, Faliscan, and Venetic in the context of the project: Languages and Cultures of Ancient Italy. Historical Linguistics and Digital Models (PRIN 2017XJLE8J). The platform, a single-page web application, includes several sections, each of which provides functionality for generating or editing lexical resources and establishing connections and links between their elements and different sets of internal and external resources. This User Manual aims to help users understand the interface by illustrating its functions with step-by-step instructions, examples, and troubleshooting guidance. Its target audience includes historical linguists, digital humanists and epigraphists whose research is based on linguistics and philology.}, KEYWORDS = {Digital Epigraphy, Restsprachen, Lexicon Linking, Ancient Languages, eLexicography, User Manual, Interface}, PAGES = {1-77}, URL = {https://doi.org/10.5281/zenodo.10475219}, } @ARTICLE{MURANO_2023_ARTICLE_MQDRZ_485254, AUTHOR = {Murano, F. and Quochi, V. and Del Grosso, A. M. and Rigobianco, L. and Zinzi, M.}, TITLE = {Describing Inscriptions of Ancient Italy. The ItAnt Project and Its Information Encoding Process}, YEAR = {2023}, ABSTRACT = {This paper discusses the challenges addressed in the digital scholarly encoding of the fragmentary texts of the languages of Ancient Italy according to the TEI/EpiDoc Guidelines in XML format. This contribution describes the solutions and customisations that have been adopted for dealing with the peculiarities of our epigraphical documentation and with the formalisation of epigraphical information deemed interesting for data retrieval in a historical linguistic perspective. The making of a digital corpus consisting of new critical editions of selected inscriptions is a work carried out in the context of the project "Languages and Cultures of Ancient Italy. Historical Linguistics and Digital Models", which aims to investigate the languages of Ancient Italy by combining the traditional methods, proper to historical linguistics, with methods and technologies proper to the digital humanities and computational lexicography. More specifically, the purpose of the project is to create a collection of interrelated digital language resources which comprise: 1) the digital corpus of texts editions; 2) a computational lexicon compliant with the Web Semantic requirements; 3) a relevant bibliographic reference dataset encoded according to the FRBRoo/LRMoo specifications. Additionally, selected textual data and scientific interpretations will be encoded by using CIDOC CRM and its extensions, namely CRMtex and CRMinf. The present contribution tackles one of the main aspects of the project, and proposes significant innovations in the encoding of critical editions for epigraphic texts of fragmentary languages, which will hopefully foster future interoperability and integration with other external datasets, a paramount concern of the project.}, KEYWORDS = {text encoding, ancient languages, digital epigraphy, TEI/EpiDoc}, PAGES = {15}, URL = {https://dl.acm.org/doi/pdf/10.1145/3606703}, VOLUME = {16}, DOI = {10.1145/3606703}, PUBLISHER = {Association for Computing Machinery (New York, NY, Stati Uniti d'America)}, ISSN = {1556-4711}, JOURNAL = {Journal on computing and cultural heritage (Online)}, } @INCOLLECTION{DELFANTE_2022_INCOLLECTION_DFMQ_469112, AUTHOR = {Del Fante, D. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {Italian Language Resources. From CLARIN-IT to the VLO and Back: Sketching a Methodology for Monitoring LRs Visibility}, YEAR = {2022}, ABSTRACT = {This paper sketches a user-oriented, qualitative methodology for both (i) monitoring the existence and availability of language resources relevant for a given CLARIN national community and language and (ii) assessing the offering potential of CLARIN, in terms of Language Resources provided to national consortia. From the user perspective, the methodology has been applied to investigate the visibility of language resources available for Italian within the CLARIN central services, in particular the Virtual Language Observatory. As a proof-of-concept, the methodology has been tested on the resources available through the CLARIN-IT data centres, but, ideally, it could be applied by any national data centre aiming to assess the existence of LRs in CLARIN for any given languages and check their accessibility for the interested users. It is thus argued that such an assessment might be a useful instrument in the hands of national coordinators and centre managers for (i) bringing to the fore both strengths and critical issues about their data providing community and (ii) for planning targeted actions to improve and increase both visibility and accessibility of their LRs.}, KEYWORDS = {Virtual Language Observatory, CLARIN-IT, CLARIN-ERIC, Qualitative Assessment Methodology, User Involvement}, PAGES = {10-22}, URL = {https://ecp.ep.liu.se/index.php/clarin/article/view/413/371}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, BOOKTITLE = {Selected Papers from the CLARIN Annual Conference 2021}, EDITOR = {Monachini and Monica and Eskevich and Maria}, } @INPROCEEDINGS{AGNOLONI_2022_INPROCEEDINGS_ABFMMQRV_472294, AUTHOR = {Agnoloni, T. and Bartolini, R. and Frontini, F. and Montemagni, S. and Marchetti, C. and Quochi, V. and Ruisi, M. and Venturi, G.}, TITLE = {Making Italian Parliamentary Records Machine-Actionable: the Construction of the ParlaMint-IT corpus}, YEAR = {2022}, ABSTRACT = {This paper describes the process of acquisition, cleaning, interpretation, coding and linguistic annotation of a collection of parliamentary debates from the Senate of the Italian Republic covering the COVID-19 pandemic emergency period and a former period for reference and comparison according to the CLARIN ParlaMint prescriptions. The corpus contains 1199 sessions and 79,373 speeches for a total of about 31 million words, and was encoded according to the ParlaCLARIN TEI XML format. It includes extensive metadata about the speakers, sessions, political parties and parliamentary groups. As required by the ParlaMint initiative, the corpus was also linguistically annotated for sentences, tokens, POS tags, lemmas and dependency syntax according to the universal dependencies guidelines. Named entity annotation and classification is also included. All linguistic annotation was performed automatically using state-of-the-art NLP technology with no manual revision. The Italian dataset is freely available as part of the larger ParlaMint 2.1 corpus deposited and archived in CLARIN repository together with all other national corpora. It is also available for direct analysis and inspection via various CLARIN services and has already been used both for research and educational purposes.}, KEYWORDS = {parliamentary debates, CLARIN ParlaMint, corpus creation, corpus annotation}, PAGES = {117-124}, URL = {https://aclanthology.org/2022.parlaclarin-1.17/}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, CONFERENCE_NAME = {Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {20/06/2022}, } @INPROCEEDINGS{DELFANTE_2022_INPROCEEDINGS_DFMQ_468964, AUTHOR = {Del Fante, D. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {CLARIN-IT: An Overview on the Italian Clarin Consortium After Six Years of Activity}, YEAR = {2022}, ABSTRACT = {This paper offers an overview of the Italian CLARIN consortium after six years since its establishment. The members, the centres and the repositories and the most important collections are described. Lastly, in order to showcase the visibility and the accessiblity of Language Resources provided by CLARIN-IT from a user-perspective, we show how Italian resources are findable within CLARIN ERI}, KEYWORDS = {Language Resources, Data Repositories and Archives, Research Infrastructures, CLARIN}, PAGES = {8}, URL = {http://ceur-ws.org/Vol-3160/short21.pdf}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Italian Research Conference on Digital Libraries}, CONFERENCE_PLACE = {Università degli Studi di Padova}, CONFERENCE_DATE = {24/02/2022}, BOOKTITLE = {Proceedings of the 18th Italian Research Conference on Digital Libraries}, EDITOR = {Di Nunzio, G. M. and Portelli, B. and Redavid, D. and Silvello, G.}, } @INPROCEEDINGS{QUOCHI_2022_INPROCEEDINGS_QBKMMPRTZ_472419, AUTHOR = {Quochi, V. and Bellandi, A. and Khan, F. and Mallia, M. and Murano, F. and Piccini, S. and Rigobianco, L. and Tommasi, A. and Zavattari, C.}, TITLE = {From Inscriptions to Lexica and Back: A Platform for Editing and Linking the Languages of Ancient Italy}, YEAR = {2022}, ABSTRACT = {Available language technology is hardly applicable to scarcely attested ancient languages, yet their digital semantic representation, though challenging, is an asset for the purpose of sharing and preserving existing cultural knowledge. In the context of a project on the languages and cultures of ancient Italy, we took up this challenge. This paper thus describes the development of a user friendly web platform, EpiLexO, for the creation and editing of an integrated system of language resources for ancient fragmentary languages centered on the lexicon, in compliance with current digital humanities and Linked Open Data principles. EpiLexo allows for the editing of lexica with all relevant cross-references: for their linking to their testimonies, as well as to bibliographic information and other (external) resources and common vocabularies. The focus of the current implementation is on the languages of ancient Italy, in particular Oscan, Faliscan, Celtic and Venetic; however, the technological solutions are designed to be general enough to be potentially applicable to different contexts and scenarios.}, KEYWORDS = {Digital Epigraphy, Restsprachen, Lexicon Editing and Linking, tools for DH}, PAGES = {59-67}, URL = {https://aclanthology.org/2022.lt4hala-1.0/}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {979-10-95546-78-8}, CONFERENCE_NAME = {Second Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2022)}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {25/06/2022}, BOOKTITLE = {Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2022)}, EDITOR = {Sprugnoli, R. and Passarotti, M.}, } @INPROCEEDINGS{QUOCHI_2022_INPROCEEDINGS_QBMTZ_472422, AUTHOR = {Quochi, V. and Bellandi, A. and Mallia, M. and Tommasi, A. and Zavattari, C.}, TITLE = {Supporting Ancient Historical Linguistics and Cultural Studies with EpiLexO}, YEAR = {2022}, ABSTRACT = {This contribution presents a system of independent software components meant to support the creation of ecosystems of interrelated language data (i.e. lexica linked to textual testimonies, concepts, metadata, bibliographic references, and other external lexical resources) according to the current state-of-the-art representational models for the semantic web. The system is implemented as a set of autonomous servers exposing Restful APIs that in principle can serve different frontend applications and use cases. In this work they serve the EpiLexO GUI application designed and geared to support scholars of ancient languages of fragmentary attestation in their studies. The development of both the back-ends and the front-end is still work-in progress, but a first version is ready for use.}, KEYWORDS = {tools for DH, ancient languages, restsprachen, lexicon editor, corpus management, lexicon-text linking}, PAGES = {39-43}, URL = {https://office.clarin.eu/v/CE-2022-2118-CLARIN2022_ConferenceProceedings.pdf}, VOLUME = {ISSN 2773-2177}, CONFERENCE_NAME = {CLARIN Annual Conference 2022}, CONFERENCE_PLACE = {Prague, Czechia}, CONFERENCE_DATE = {10-12/10/2022}, BOOKTITLE = {CLARIN Annual Conference Proceedings 2022}, EDITOR = {Erjavec, T. and Eskevich, M.}, } @INPROCEEDINGS{MARINETTI_2022_INPROCEEDINGS_MMQBBDPRSZMM_479194, AUTHOR = {Marinetti, A. and Murano, F. and Quochi, V. and Ballerini, M. and Boschetti, F. and Del Grosso, A. M. and Piccini, S. and Rigobianco, L. and Solinas, P. and Zinzi, M. and Mallia, M. and Middei, E.}, TITLE = {Challenges in Encoding Fragmentary Attested Languages}, YEAR = {2022}, ABSTRACT = {The ItAnt project investigates the langages of ancient Italy, whose only attestation consist in epigraphic evidence, focusing on Venetic, Oscan, Faliscan and Celtic languages. For this purpose, the project combines the traditional method proper to historical linguistics with the setting up of digital technologies, developing computational tools specifically designes to create a digital set of interrelated resources.}, KEYWORDS = {digital epigraphy, eLexicography, Linguistic Linked Open Data, Text Encoding}, URL = {https://ciegl2022.sciencesconf.org/resource/page/id/30}, CONFERENCE_NAME = {XVI Congresso Internazionale di Epigrafia greca e latina}, CONFERENCE_PLACE = {Bordeaux}, CONFERENCE_DATE = {29 agosto-settembre 2022}, } @TECHREPORT{MARTELLI_2022_TECHREPORT_MMCNVUFQKKLDTTCSKIDGM_472421, AUTHOR = {Martelli, F. and Maru, M. and Campagnano, C. and Navigli, R. and Velardi, P. and Ureña Ruiz, R. and Frontini, F. and Quochi, V. and Kallas, J. and Koppel, K. and Langemets, M. and De Does, J. and Tempelaars, R. and Tiberius, C. and Costa, R. and Salgado, A. and Krek, S. and Čibej, J. and Dobrovoljc, K. and Gantar, P. and Munda, T.}, TITLE = {D3. 8 Lexical-semantic analytics for NLP}, YEAR = {2022}, ABSTRACT = {The present document illustrates the work carried out in task 3.3 (work package 3) focused on lexicalsemantic analytics for Natural Language Processing (NLP). This task aims at computing analytics for lexicalsemantic information such as words, senses and domains in the available resources, investigating their role in NLP applications. Specifically, this task concentrates on three research directions, namely i) which grouping senses based on their semantic similari sense clustering , in ty improves the performance of NLP tasks such as Word Sense Disambiguation (WSD), ii) domain labeling of text , in which the lexicographic resources made available by the ELEXIS project for research purposes allow better performances to be achieved, and fin senses ally iii) analysing the , for which a software package is made available. diachronic distribution of In this deliverable, we illustrate the research activities aimed at achieving the aforementioned goals and put forward suggestions for future works. Importantly, we stress the crucial role played by highquality lexicalsemantic r esources when investigating such linguistic aspects and their impact on NLP applications. To this end, as an additional contribution, we address the paucity of manually the ELEXIS parallelannotated data in the lexical senseannotated datasetsemantic research field and introduce , a novel entirely manuallyavailable in 10 European languages and featuring 5 annotation layers.}, KEYWORDS = {research infrastructures, lexicography, lexical resources, word-sense disambiguation, WSD, sense-annotated language data, multilinguality}, PAGES = {67}, URL = {https://elex.is/wp-content/uploads/ELEXIS_D3_8_Lexical-Semantic_Analytics_for_NLP_final_report.pdf}, } @TECHREPORT{TASOVAC_2022_TECHREPORT_TTBBBCUFHHJKKKKMMMMMQRRSSVWWZ_463877, AUTHOR = {Tasovac, T. and Tiberius, C. and Bamberg, C. and Bellandi, A. and Burch, T. and Costa, R. and Ďurčo, M. and Frontini, F. and Hennemann, J. and Heylen, K. and Jakubíček, M. and Khan, F. and Klee, A. and Kosem, I. and Kovář, V. and Matuška, O. and McCrae, J. and Monachini, M. and Mörth, K. and Munda, T. and Quochi, V. and Repar, A. and Roche, C. and Salgado, A. and Sievers, H. and Váradi, T. and Weyand, S. and Woldrich, A. and Zhanial, S.}, TITLE = {D5. 3 Overview of Online Tutorials and Instruction Manuals}, YEAR = {2022}, ABSTRACT = {The ELEXIS Curriculum is an integrated set of training materials which contextualizes ELEXIS tools and services inside a broader, systematic pedagogic narrative. This means that the goal of the ELEXIS Curriculum is not simply to inform users about the functionalities of particular tools and services developed within the project, but to show how such tools and services are a) embedded in both lexicographic theory and practice; and b) representative of and contributing to the development of digital skills among lexicographers. The scope and rationale of the curriculum are described in more detail in the Deliverable D5.2 Guidelines for Producing ELEXIS Tutorials and Instruction Manuals. The goal of this deliverable, as stated in the project DOW, is to provide "a clear, structured overview of tutorials and instruction manuals developed within the project."}, KEYWORDS = {ELEXIS, lexicography, training materials}, PAGES = {31}, URL = {https://elex.is/wp-content/uploads/ELEXIS_D5_3_Overview-of-Online-Tutorials-and-Instruction-Manuals.pdf}, } @MISC{FRONTINI_2022_MISC_FBQMMZUW_463506, AUTHOR = {Frontini, F. and Bellandi, A. and Quochi, V. and Monachini, M. and Mörth, K. and Zhanial, S. and Ďurčo, M. and Woldrich, A.}, TITLE = {CLARIN Tools and Resources for Lexicographic Work}, YEAR = {2022}, ABSTRACT = {This course introduces lexicographers to the CLARIN Research Infrastructure and highlights language resources and tools useful for lexicographic practices. The course consists of two parts. In Part 1, you will learn about CLARIN, its technical and knowledge infrastructure, and about how to deposit and find lexical resources in CLARIN. In Part 2, you will become acquainted with CLARIN tools that can be used to create lexical resources.}, KEYWORDS = {CLARIN, lexicography}, URL = {https://elexis.humanistika.org/id/UnwYPq70Dewbn7XDEjsMM}, } @MISC{MARTELLI_2022_MISC_MNKKGKNPOLKKDUSLVGLQMFTTCSIM_472295, AUTHOR = {Martelli, F. and Navigli, R. and Krek, S. and Kallas, J. and Gantar, P. and Koeva, S. and Nimb, S. and Pedersen, B. S. and Olsen, S. and Langemets, M. and Koppel, K. and Üksik, T. and Dobrovoljc, K. and Ureña Ruiz, R. and Sancho Sánchez, J. and Lipp, V. and Váradi, T. and Győrffy, A. and László, S. and Quochi, V. and Monachini, M. and Frontini, F. and Tiberius, C. and Tempelaars, R. and Costa, R. and Salgado, A. and Čibej, J. and Munda, T.}, TITLE = {Parallel sense-annotated corpus ELEXIS-WSD 1. 0}, YEAR = {2022}, ABSTRACT = {ELEXIS-WSD is a parallel sense-annotated corpus in which content words (nouns, adjectives, verbs, and adverbs) have been assigned senses. Version 1.0 contains sentences for 10 languages: Bulgarian, Danish, English, Spanish, Estonian, Hungarian, Italian, Dutch, Portuguese, and Slovene. The corpus was compiled by automatically extracting a set of sentences from WikiMatrix (Schwenk et al., 2019), a large open-access collection of parallel sentences derived from Wikipedia, using an automatic approach based on multilingual sentence embeddings. The sentences were manually validated according to specific formal, lexical and semantic criteria (e.g. by removing incorrect punctuation, morphological errors, notes in square brackets and etymological information typically provided in Wikipedia pages). To obtain a satisfying semantic coverage, we filtered out sentences with less than 5 words and less than 2 polysemous words were filtered out. Subsequently, in order to obtain datasets in the other nine target languages, for each selected sentence in English, the corresponding WikiMatrix translation into each of the other languages was retrieved. If no translation was available, the English sentence was translated manually. The resulting corpus is comprised of 2,024 sentences for each language.}, KEYWORDS = {Word Sense Disambiguation, corpus parallelo, disambiguazione automatica del senso, annotazione semantica multilingue}, URL = {http://hdl.handle.net/11356/1674}, } @MISC{QUOCHI_2022_MISC_QB_463856, AUTHOR = {Quochi, V. and Bellandi, A.}, TITLE = {LexO editor: the basics-video tutorial}, YEAR = {2022}, ABSTRACT = {Video tutorial sull'uso di LexO, un editor di lessici secondo il modello Ontolex-lemon. Il tutoria è parte dell' ELEXIS training programme disponibile sulla piattaforma DARIAH-teach.}, KEYWORDS = {lexicon editor, video tutorial, training material, lexO, online web application}, URL = {https://www.youtube.com/watch?v=9KE0laMaTAs\&list=PLoD829qNERpYKq8JRkY4EIGgZCdi0QHOd}, } @INPROCEEDINGS{MARTELLI_2021_INPROCEEDINGS_MNKTKGKNPOLKKDUSLVGLQMFTCSIM_461705, AUTHOR = {Martelli, F. and Navigli, R. and Krek, S. and Tiberius, C. and Kallas, J. and Gantar, P. and Koeva, S. and Nimb, S. and Pedersen, B. S. and Olsen, S. and Langements, M. and Koppel, K. and Üksik, T. and Dobrovolijc, K. and Ureña Ruiz, R. and Sanchosánchez, J. and Lipp, V. and Varadi, T. and Györffy, A. and László, S. and Quochi, V. and Monachini, M. and Frontini, F. and Tempelaars, R. and Costa, R. and Salgado, A. and Čibej, J. and Munda, T.}, TITLE = {Designing the ELEXIS Parallel Sense-Annotated Dataset in 10 European Languages}, YEAR = {2021}, ABSTRACT = {Over the course of the last few years, lexicography has witnessed the burgeoning of increasingly reliable automatic approaches supporting the creation of lexicographic resources such as dictionaries, lexical knowledge bases and annotated datasets. In fact, recent achievements in the field of Natural Language Processing and particularly in Word Sense Disambiguation have widely demonstrated their effectiveness not only for the creation of lexicographic resources, but also for enabling a deeper analysis of lexical-semantic data both within and across languages. Nevertheless, we argue that the potential derived from the connections between the two fields is far from exhausted. In this work, we address a serious limitation affecting both lexicography and Word Sense Disambiguation, i.e. the lack of high-quality sense-annotated data and describe our efforts aimed at constructing a novel entirely manually annotated parallel dataset in 10 European languages. For the purposes of the present paper, we concentrate on the annotation of morpho-syntactic features. Finally, unlike many of the currently available sense-annotated datasets, we will annotate semantically by using senses derived from high-quality lexicographic repositories.}, KEYWORDS = {Digital lexicography, Natural Language Processing, Computational Linguistics, Corpus Linguistics, Word Sense Disambiguation}, PAGES = {377-396}, URL = {https://static-curis.ku.dk/portal/files/279888836/eLex_2021_22_pp377_395.pdf}, CONFERENCE_NAME = {eLex 2021}, CONFERENCE_DATE = {05/-7/2021-07/07/2021}, BOOKTITLE = {Proceedings of the eLex 2021 conference}, } @INPROCEEDINGS{MARINETTI_2021_INPROCEEDINGS_MMQBBDPRS_461529, AUTHOR = {Marinetti, A. and Murano, F. and Quochi, V. and Ballerini, M. and Boschetti, F. and Del Grosso, A. M. and Piccini, S. and Rigobianco, L. and Solinas, P.}, TITLE = {Languages and Cultures of Ancient Italy. Historical Linguistics and Digital Models}, YEAR = {2021}, ABSTRACT = {The abstract accompanies a poster presenting an overview of the project "Languages and cultures of Ancient Italy", which had just started. The project brings together competences from Historical Linguistics, Computational Lexicography and Digital Humanities. The main objective of the project is to investigate the cultures of ancient Italy on the basis of theirlinguistic documentation (7th - 1stc. B.C.) by means of digital tools specifically tailored for their peculiarities.}, KEYWORDS = {digital epigraphy, computational lexicons, text-lexicon linking, restsprachen, digital models, digital humanities}, PAGES = {528-532}, URL = {https://aiucd2021.labcd.unipi.it/en/book-of-abstracts-conference/}, CONFERENCE_NAME = {10th National Conference of Associazione per l'Informatica Umanistica e la Cultura Digitale}, CONFERENCE_PLACE = {Pisa (Virtuale)}, CONFERENCE_DATE = {19-22 gennaio 2021}, } @MISC{ERJAVEC_2021_MISC_EOOLSGRPKBSVDDJHNCDVMLCAFMQVRMBSRDUPBKMDLR_463861, AUTHOR = {Erjavec, T. and Ogrodniczuk, M. and Osenova, P. and Ljubešić, N. and Simov, K. and Grigorova, V. and Rudolf, M. and Pančur, A. and Kopp, M. and Barkarson, S. and Steingrímsson, S. and Van Der Pol, H. and Depoorter, G. and De Does, J. and Jongejan, B. and Haltrup Hansen, D. and Navarretta, C. and Calzada Pérez, M. and De Macedo, L. D. and Van Heusden, R. and Marx, M. and Çöltekin, Ç. and Coole, M. and Agnoloni, T. and Frontini, F. and Montemagni, S. and Quochi, V. and Venturi, G. and Ruisi, M. and Marchetti, C. and Battistoni, R. and Sebők, M. and Ring, O. and Darģis, R. and Utka, A. and Petkevičius, M. and Briedienė, M. and Krilavičius, T. and Morkevičius, V. and Diwersy, S. and Luxardo, G. and Rayson, P.}, TITLE = {Linguistically annotated multilingual comparable corpora of parliamentary debates ParlaMint. ana 2. 1}, YEAR = {2021}, ABSTRACT = {ParlaMint 2.1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (from November 1st 2019), or being "reference" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e.g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e.g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https://clarin-eric.github.io/parla-clarin/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the linguistically marked-up version of the corpus, while the text version is available at http://hdl.handle.net/11356/1432. The ParlaMint.ana linguistic annotation includes tokenization, sentence segmentation, lemmatisation, Universal Dependencies part-of-speech, morphological features, and syntactic dependencies, and the 4-class CoNLL-2003 named entities. Some corpora also have further linguistic annotations, such as PoS tagging or named entities according to language-specific schemes, with their corpus TEI headers giving further details on the annotation vocabularies and tools.}, KEYWORDS = {dibattiti parlamentari, covid-19, ParlaCLARIN, parlamenti, discorso politico, CLARIN, linguistic annotation, pos-tagging, ner, linguistic dependency annotation, UD}, URL = {http://hdl.handle.net/11356/1432}, } @MISC{ERJAVEC_2021_MISC_EOOLSGRPKBSVDDJHNCDVMLCAFMQVRMBSRDUPBKMDLR_463865, AUTHOR = {Erjavec, T. and Ogrodniczuk, M. and Osenova, P. and Ljubešić, N. and Simov, K. and Grigorova, V. and Rudolf, M. and Pančur, A. and Kopp, M. and Barkarson, S. and Steingrímsson, S. and Van Der Pol, H. and Depoorter, G. and De Does, J. and Jongejan, B. and Haltrup Hansen, D. and Navarretta, C. and Calzada Pérez, M. and De Macedo, L. D. and Van Heusden, R. and Marx, M. and Çöltekin, Ç. and Coole, M. and Agnoloni, T. and Frontini, F. and Montemagni, S. and Quochi, V. and Venturi, G. and Ruisi, M. and Marchetti, C. and Battistoni, R. and Sebők, M. and Ring, O. and Darģis, R. and Utka, A. and Petkevičius, M. and Briedienė, M. and Krilavičius, T. and Morkevičius, V. and Diwersy, S. and Luxardo, G. and Rayson, P.}, TITLE = {Multilingual comparable corpora of parliamentary debates ParlaMint 2. 1}, YEAR = {2021}, ABSTRACT = {ParlaMint 2.1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (after November 1st 2019), or being "reference" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e.g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e.g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https://clarin-eric.github.io/parla-clarin/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the ParlaMint TEI-encoded corpora with the derived plain text version of the corpus along with TSV metadata on the speeches. Also included is the 2.0 release of the data and scripts available at the GitHub repository of the ParlaMint project. Note that there also exists the linguistically marked-up version of the corpus, which is available at http://hdl.handle.net/11356/1431.}, KEYWORDS = {dibattiti parlamentari, covid-19, discorso politico, CLARIN, parlamenti, ParlaCLARIN}, URL = {http://hdl.handle.net/11356/1431}, } @TECHREPORT{BARTOLINI_2020_TECHREPORT_BQMA_453502, AUTHOR = {Bartolini, R. and Quochi, V. and Monachini, M. and Affé, F.}, TITLE = {Relazione di fine progetto "PIM-Piattaforma Integrata Monitoraggio"}, YEAR = {2020}, ABSTRACT = {Il documento presenta l'attività svolta dal CNR-ILC nel ruolo di subcontraente di COMDATA per la realizzazione di moduli di trattamento automatico del linguaggio e la consulenza per l'integrazione di metodi di clustering automatico di documenti nella Digital Library del progetto PIM.}, KEYWORDS = {accesso intelligente al testo, digital library, natural language processing}, PAGES = {156}, URL = {https://publications.cnr.it/doc/453502}, } @INPROCEEDINGS{NICOLAS_2018_INPROCEEDINGS_NKMDCAEBQS_387361, AUTHOR = {Nicolas, L. and König, A. and Monachini, M. and Del Gratta, R. and Calamai, S. and Abel, A. and Enea, A. and Biliotti, F. and Quochi, V. and Stella, F. V.}, TITLE = {CLARIN-IT: State of Affairs, Challenges and Opportunities}, YEAR = {2018}, ABSTRACT = {his paper gives an overview on the Italian national CLARIN consortium as it currently stands two years after its creation at the end of 2015. It thus discusses the current state of affairs of the consortium on several aspects, especially with regards to members. It also discusses the events and initiatives that have been undertaken, as well as the ones that are planned in the close future. It finally outlines the conclusions of a user survey performed to understand the expectations of a targeted user population and provides indications regarding the next steps planned.}, KEYWORDS = {CLARIN-IT Consortium Pisa Bolzano Siena}, PAGES = {1-14}, URL = {http://www.ep.liu.se/ecp/contents.asp?issue=147}, VOLUME = {147}, ISBN = {978-91-7685-273-6}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {18-20 September, 2017}, BOOKTITLE = {Selected papers from the CLARIN Annual Conference 2017, Budapest, 18-20 September 2017}, } @INPROCEEDINGS{SORIA_2018_INPROCEEDINGS_SQR_387362, AUTHOR = {Soria, C. and Quochi, V. and Russo, I.}, TITLE = {The DLDP Survey on Digital Use and Usability of EU Regional and Minority Languages}, YEAR = {2018}, ABSTRACT = {This paper reports about the design, the results and the key findings of a survey launched by the Digital Language Diversity Project about the digital use and usability of regional and minority languages. The aim of the survey - the first of this kind - was to investigate the real needs and expectations of European minority language speakers regarding digital opportunities. The focus on four languages (Basque, Breton, Karelian and Sardinian) at different stages of digital development offers a starting point to develop strategies for assessing digital vitality of these languages and overcoming specific difficulties.}, KEYWORDS = {minority languages, digital survival, electronic communication}, PAGES = {4155-4160}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/684.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @TECHREPORT{BARONI_2018_TECHREPORT_BQRSCGHKSS_483257, AUTHOR = {Baroni, P. and Quochi, V. and Russo, I. and Soria, C. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Salonen, T. and Sarhimaa, A.}, TITLE = {Kit per la sopravvivenza digitale della lingua sarda-Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua sarda}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua sarda (versione italiana)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Sardinian}, PAGES = {12}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Sardinian_IT.pdf}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443050, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Euskarak Mundu Digitalean Bizirauteko Kita-DLDPren gomendioak, euskararen bizitasun digitala hobetu dadin}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale della lingua basca (versione basca)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Basque}, PAGES = {27}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Basque_EU.pdf}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443051, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Kit de Supervivencia Lingüística Digital del Euskera-Recomendaciones del DLDP para mejorar la Vitalidad Digital del euskera}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del basco (versione spagnola)}, KEYWORDS = {digital diversity, digital vitality, recommendations, Basque, digital survival}, PAGES = {28}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Basque_ES.pdf}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443020, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Digital Language Survival Kit}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale delle lingue (versione inglese integrale)}, KEYWORDS = {sopravvivenza digitale, lingue minoritarie, less-resourced languages}, PAGES = {38}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_443047, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Roadmap}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione inglese integrale)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {19}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Roadmap.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483247, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Roadmap-Policy Recommendations & Timeline}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione inglese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_EN.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483251, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {DLDP etenemissuunnitelma-Toimenpidesuunnitelmat ja aikajana}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione finlandese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_FI.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483254, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {La DLDP Hoja de Ruta-Políticas recomendadas & Cronograma}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione spagnola sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_ES.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483255, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Diversità Linguistica Digitale: la Roadmap-Raccomandazioni strategiche & Sequenza}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione italiana sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_IT.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483256, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {DLDP Bide Orria-Gomendatutako politikak & Kronograma}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione basca sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_EU.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483262, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Die DLDP Roadmap-Strategieempfehlungen & Zeitplan}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione tedesca sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_DE.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483263, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {La Roadmap DLDP-Recommandations de politique et calendrier}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione francese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_FR.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HSBCGKQRSS_443354, AUTHOR = {Hicks, D. and Soria, C. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A.}, TITLE = {Pak treuzveviñ ar Brezhoneg niverel-Erbedoù an DLDP evit gwellaat buhezegezh niverel ar brezhoneg}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del bretone (versione bretone)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Breton_BR.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HSBCGKQRSS_443359, AUTHOR = {Hicks, D. and Soria, C. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A.}, TITLE = {Kit de survie numerique pour la langue bretonne-Les recommandations du DLDP pour améliorer la vitalité numérique du Breton}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del bretone (versione francese)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Breton_FR.pdf}, } @TECHREPORT{SALONEN_2018_TECHREPORT_SBCGHKQRSS_443365, AUTHOR = {Salonen, T. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Sarhimaa, A. and Soria, C.}, TITLE = {Karjalan digitaalinen kielenselviytymispakkaus-DLDP-suositukset karjalan kielen digitaalisen elinvoimaisuuden parantamiseksi}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale della lingua careliana (versione finlandese)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Karelian}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Karelian_FI.pdf}, } @TECHREPORT{SALONEN_2018_TECHREPORT_SBCGHKQRSS_483261, AUTHOR = {Salonen, T. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Sarhimaa, A. and Soria, C.}, TITLE = {Karjalan digitualine hengihjiämispakkavus-DLDP-rekomendatsiet karjalan kielen digitualizen elinvoimazuon kohendamizeh}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua careliana (versione careliana)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Karelian}, PAGES = {12}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Karelian_KRL.pdf}, } @MISC{CEBERIO_2018_MISC_CGSRQ_440548, AUTHOR = {Ceberio, K. and Gurrutxaga, A. and Soria, C. and Russo, I. and Quochi, V.}, TITLE = {How to Use the Digital Language Vitality Scale}, YEAR = {2018}, ABSTRACT = {The Digital Language Vitality Scale is an instrument developed within the framework of the Digital Language Diversity Project (www.dldp.eu) for estimating the degree of digital vitality of any given language. It aims to be an instrument for self-assessment of the digital vitality of any language, although it is aimed in particular at identifying current gaps, needs and requirements regarding the extent to which a language community is active/vital on digital media and devices so that adequate digital language planning can be done. This document instructs prospective adopters on how to best use it.}, KEYWORDS = {Diversità Linguistica, BLARK, Sopravvivenza linguistica digitale}, PAGES = {18}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Vitality-Scale.pdf}, } @EDITORIAL{SORIA_2017_EDITORIAL_SRQ_382301, AUTHOR = {Soria, C. and Russo, I. and Quochi, V.}, TITLE = {Reports on Digital Language Diversity in Europe}, YEAR = {2017}, ABSTRACT = {In these reports we present the results of the first survey about the actual needs of European minority languages speakers in terms of digital opportunities}, KEYWORDS = {regional languahges, minority languages, digital vitality, digital use}, URL = {http://www.dldp.eu/content/reports-digital-language-diversity-europe}, } @INPROCEEDINGS{NICOLAS_2017_INPROCEEDINGS_NKMDCAEBQ_375984, AUTHOR = {Nicolas, L. and Konig, A. and Monachini, M. and Del Gratta, R. and Calamai, S. and Abel, A. and Enea, A. and Biliotti, F. and Quochi, V.}, TITLE = {CLARIN-IT: State of Affairs, Challenges and Opportunities}, YEAR = {2017}, ABSTRACT = {This paper provides an overview on the Italian national CLARIN consortium and the status of CLARIN-IT in general. It thus discusses the current state of affairs of the consortium and provi-des information on the members, especially with regards to what they offer to CLARIN in terms of resources, services and expertise, and what CLARIN offers them to further their own research.}, KEYWORDS = {Italian CLARIN consortium, CLARIN-IT}, PAGES = {4}, URL = {https://www.clarin.eu/event/2017/clarin-annual-conference-2017-budapest-hungary}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {18-20 September, 2017}, } @INCOLLECTION{QUOCHI_2016_INCOLLECTION_Q_358123, AUTHOR = {Quochi, V.}, TITLE = {Development and representation of Italian light-fare constructions}, YEAR = {2016}, ABSTRACT = {The essay describes the study of the development and use of light fare 'do' constructions in Child-directed Speech and in Child Language with the twofold goal of showing that a Construction Grammar approach is viable, and of providing support to usage-based, functional predictions on language acquisition. The analysis of naturalistic data derived from the CHILDES database lead to two main findings: first, a representation of fare Light Verb Constructions as a family of constructions organized like a radial category is not only possible but more explicative, second, there exists a 'fare' pivot schema that children generalize at an early stage because it serves the purpose of naming new events, activities or situations.}, KEYWORDS = {Corpus linguistics Language Acquisition Construction Grammar, phraseology}, PAGES = {39-64}, URL = {https://benjamins.com/#catalog/books/cal.19.03quo/details}, VOLUME = {19}, DOI = {10.1075/cal.19.03quo}, PUBLISHER = {John Benjamins Publishing Company (Amsterdam/Philadelphia, USA)}, ISBN = {9789027204417}, BOOKTITLE = {Corpus-based Approaches to Construction Grammar}, EDITOR = {Yoon, J. and Th Gries, S.}, } @INPROCEEDINGS{SORIA_2016_INPROCEEDINGS_SRQHGST_355526, AUTHOR = {Soria, C. and Russo, I. and Quochi, V. and Hicks, D. and Gurrutxaga, A. and Sarhimaa, A. and Tuomisto, M.}, TITLE = {Fostering digital representation of EU regional and minority languages: the Digital Language Diversity Project}, YEAR = {2016}, ABSTRACT = {Poor digital representation of minority languages further prevents their usability on digital media and devices. The Digital Language Diversity Project, a three-year project funded under the Erasmus+ programme, aims at addressing the problem of low digital representation of EU regional and minority languages by giving their speakers the intellectual an practical skills to create, share, and reuse online digital content. Availability of digital content and technical support to use it are essential prerequisites for the development of language-based digital applications, which in turn can boost digital usage of these languages. In this paper we introduce the project, its aims, objectives and current activities for sustaining digital usability of minority languages through adult education.}, KEYWORDS = {Less-resourced languages, Language Technology, digital language vitality, digital language diversity}, PAGES = {3256-3260}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, } @INPROCEEDINGS{DELGRATTA_2015_INPROCEEDINGS_DFMPRBGKQSC_342213, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Goggi, S. and Khan, F. and Quochi, V. and Soria, C. and Calzolari, N.}, TITLE = {Visualising Italian Language Resources: a Snapshot}, YEAR = {2015}, ABSTRACT = {This paper aims to provide a first snapshot of Italian Language Resources (LRs) and their uses by the community, as documented by the papers presented at two different conferences, LREC2014 and CLiC-it 2014. The data of the former were drawn from the LOD version of the LRE Map, while those of the latter come from manually analyzing the proceedings. The results are presented in the form of visual graphs and confirm the initial hypothesis that Italian LRs require concrete actions to enhance their visibility.}, KEYWORDS = {Italian Language Resources}, PAGES = {100-104}, URL = {https://books.openedition.org/aaccademia/1277?lang=it}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 December 2015}, BOOKTITLE = {Proceedings of the Second Italian Conference on Computational Linguistics CLiC-it 2015}, EDITOR = {Bosco, C. and Tonelli, S. and Zanzotto, F. M.}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FQM_304304, AUTHOR = {Frontini, F. and Quochi, V. and Monachini, M.}, TITLE = {Generative Lexicon and polysemy: inducing logical alternations}, YEAR = {2015}, ABSTRACT = {The current paper brings together the results of a series of experiments for inducing regular sense alternations, or regular/ logical polysemy, from a computational lexicon based on the Generative Lexicon theory. The results are discussed in light of the potential benefits and uses of the amended algorithm.}, KEYWORDS = {Polysemy, Generative Lexicon, Logical Alternations}, PAGES = {7}, URL = {https://publications.cnr.it/doc/304304}, PUBLISHER = {MAPLEX2015 Multiple Approaches to Lexicon Conference (Yamagata, JPN)}, CONFERENCE_NAME = {MAPLEX2015 Multiple Approaches to Lexicon Conference}, CONFERENCE_PLACE = {Yamagata, Japan}, CONFERENCE_DATE = {February 9-10, 2015}, EDITOR = {Hsieh, S. and Kanzaki, K.}, } @ARTICLE{SORIA_2014_ARTICLE_SCMQBCMOP_285553, AUTHOR = {Soria, C. and Calzolari, N. and Monachini, M. and Quochi, V. and Bel, N. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {The language resource Strategic Agenda: the FLaReNet synthesis of community recommendations}, YEAR = {2014}, ABSTRACT = {The main purpose of this paper is to serve as a landmark for future research and in particular for future strategic, infrastructural and coordination initiatives. It presents a preliminary plan for actions and infrastructures that could become the basis for future initiatives in the sector of Language Resources and Technologies (LRTs). The FLaReNet Language Resource Strategic Agenda presents a set of recommen- dations for the development and progress of LRT in Europe, as issued from a three- year consultation of the FLaReNet European project. Recommendations cover a broad range of topics and activities, spanning over production and use of language resources, licensing, maintenance and preservation issues, infrastructures for language resour- ces, resource identification and sharing, evaluation and validation, interoperability and policy issues. The intended recipients belong to a large set of players and stakeholders in LRT, ranging from individuals to research and education institutions, to policy- makers, funding agencies, SMEs and large companies, service and media providers}, KEYWORDS = {Strategic agenda, Language resources planning, Recommended priority actions}, PAGES = {753-775}, URL = {https://publications.cnr.it/doc/285553}, VOLUME = {48}, DOI = {10.1007/s10579-014-9279-y}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @INCOLLECTION{CALZOLARI_2014_INCOLLECTION_CNMQST_286868, AUTHOR = {Calzolari and Nicoletta and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Lexicons, Terminologies, Ontologies: Reflections from Experiences in Resource Construction}, YEAR = {2014}, ABSTRACT = {This contribution aims at highlighting the strong interconnection between lexicons, terminologies and ontologies and especially the fundamental role that ontologies and lexica mutually play. Our view is that lexical resources are evolving in nature, from ontologically based lexicons we are going towards lexically based ontologies. We explore different instantiations of the current trend of using formal ontologies as a core module of computational lexicons, presenting the advantages especially in multilingual and terminological contexts. We present work showing that the lexical knowledge already present in non formal computational lexicons can be exploited to derive or enrich a formal ontology without much manual effort. In the terminology domain, we describe the construction of a resource for biology, directly linked to a parallel domain-ontology, that combines characteristics of both lexicons and terminologies, so that is can allow for intelligent access to content. Finally, we describe our experience in two projects in which formal ontologies play a central role in the context of multilingual computational lexicons, where the ontology is what acts as the glue among the different monolingual lexicons and what provides cross-lingual reasoning capabilities.}, KEYWORDS = {Computational Lexicons, Ontology, Terminology, Interoperability, Standards}, PAGES = {103-121}, URL = {http://www.springer.com/computer/ai/book/978-3-642-45326-7}, VOLUME = {8003}, DOI = {10.1007/978-3-642-45327-4_7}, PUBLISHER = {Springer (Berlin Heidelberg, DEU)}, ISBN = {978-3-642-45326-7}, BOOKTITLE = {Language, Culture, Computation. Computational Linguistics and Linguistics. Essays Dedicated to Yaacov Choueka on the Occasion of His 75th Birthday, Part III}, EDITOR = {Dershowitz, N. and Nissan, E.}, } @INPROCEEDINGS{ANTICO_2014_INPROCEEDINGS_AQMM_286882, AUTHOR = {Antico, G. and Quochi, V. and Monachini, M. and Martinelli, M.}, TITLE = {Marrying Technical Writing with LRT}, YEAR = {2014}, ABSTRACT = {In the last years the Technical Writer operational scenarios and the workflow sensibly changed; specifically,"free style" writing - or manual writing - has become outdated and technical writing is now much more concerned with structured management of content than in the past. Technical writing has become more demanding due to a number of factors among which the rise and spread of mobile devices usage. This paper discusses the new needs of technical writing and content management business and how LRT can help it improve quality and productivity.}, KEYWORDS = {controlled language, technical writing, content management systems}, PAGES = {19-25}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may 2014}, EDITOR = {Isahara, H. and Lee, K. C. S. and Nam, S.}, } @INPROCEEDINGS{BARTOLINI_2014_INPROCEEDINGS_BQDRM_286944, AUTHOR = {Bartolini, R. and Quochi, V. and De Felice, I. and Russo, I. and Monachini, M.}, TITLE = {From Synsets to Videos: Enriching ItalWordNet Multimodally}, YEAR = {2014}, ABSTRACT = {The paper describes the multimodal enrichment of ItalWordNet action verbs' entries by means of an automatic mapping with a conceptual ontology of action types instantiated by video scenes (ImagAct). The two resources present significative differences as well as interesting complementary features, such that a mapping of these two resources can lead to a an enrichment of IWN, through the connection between synsets and videos apt to illustrate the meaning described by glosses. Here, we describe an approach inspired by ontology matching methods for the automatic mapping of ImagAct video scenes onto ItalWordNet. The experiments described in the paper are conducted on Italian, but the same methodology can be extended to other languages for which WordNets have been created, since ImagAct is available also for English, Chinese and Spanish. This source of multimodal information can be exploited to design second language learning tools, as well as for language grounding in action recognition in video sources and potentially for robotics.}, KEYWORDS = {Action ontology, Multimodality, WordNet}, PAGES = {3110-3117}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {LREC 2014. European Language Resources Association ELRA: Paris (Francia)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{DEFELICE_2014_INPROCEEDINGS_DBRQM_291282, AUTHOR = {De Felice, I. and Bartolini, R. and Russo, I. and Quochi, V. and Monachini, M.}, TITLE = {Evaluating ImagAct-WordNet mapping for English and Italian through videos}, YEAR = {2014}, ABSTRACT = {In this paper we present the results of the evaluation of an automatic mapping between two lexical resources, WordNet/ItalWordNet and ImagAct, a conceptual ontology of action types instantiated by video scenes. Results are compared with those obtained from a previous experiment performed only on Italian data. Differences between the two evaluation strategies, as well as between the quality of the mappings for the two languages considered in this paper, are iscussed.}, KEYWORDS = {Language Resources (LRs)}, PAGES = {128-131}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-CLICit-2014.pdf}, DOI = {10.12871/CLICIT2014126}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-88-67-41472-7}, CONFERENCE_NAME = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 \& the Fourth International Workshop EVALITA 2014. Pisa University Press srl: Pisa (Italia)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 December 2014, Pisa}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{FRONTINI_2014_INPROCEEDINGS_FQM_291452, AUTHOR = {Frontini, F. and Quochi, V. and Monachini, M.}, TITLE = {Polysemy alternations extraction using the PAROLE SIMPLE CLIPS Italian lexicon}, YEAR = {2014}, ABSTRACT = {This paper presents the results of an experiment of polysemy alternations induction from a lexicon (Utt and Pad´o, 2011; Frontini et al., 2014), discussing the results and proposing an amendment in the original algorithm.}, KEYWORDS = {Language Resources and Technologies}, PAGES = {175-179}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-CLICit-2014.pdf}, DOI = {10.12871/CLICIT2014134}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-88-67-41472-7}, CONFERENCE_NAME = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 \& the Fourth International Workshop EVALITA 2014}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 December 2014, Pisa}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{FRONTINI_2014_INPROCEEDINGS_FQPUM_286984, AUTHOR = {Frontini, F. and Quochi, V. and Padó, S. and Utt, J. and Monachini, M.}, TITLE = {Polysemy Index for Nouns: an Experiment on Italian using the PAROLE SIMPLE CLIPS Lexical Database}, YEAR = {2014}, ABSTRACT = {An experiment is presented to induce a set of polysemous basic type alternations (such as ANIMAL-FOOD, or BUILDING-INSTITUTION) by deriving them from the sense alternations found in an existing lexical resource. The paper builds on previous work and applies those results to the Italian lexicon PAROLE SIMPLE CLIPS. The new results show how the set of frequent type alternations that can be induced from the lexicon is partly different from the set of polysemy relations selected and explicitly applied by lexicographers when building it. The analysis of mismatches shows that frequent type alternations do not always correspond to prototypical polysemy relations, nevertheless the proposed methodology represents a useful tool offered to lexicographers to systematically check for possible gaps in their resource.}, KEYWORDS = {Polysemy, lexical resources, semantics}, PAGES = {2955-2963}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, BOOKTITLE = {LREC 2014 Ninth International Conference on Language Resources and Evaluation Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{PANUNZI_2014_INPROCEEDINGS_PDGJMMQR_285381, AUTHOR = {Panunzi, A. and De Felice, I. and Gregori, L. and Jacoviello, S. and Monachini, M. and Moneglia, M. and Quochi, V. and Russo, I.}, TITLE = {Translating action verbs using a dictionary of images: the IMAGACT ontology}, YEAR = {2014}, ABSTRACT = {Action verbs have many meanings, covering actions in different ontological types. Moreover, each language categorizes action in its own way. One verb can refer to many different actions and one action can be identified by more than one verb. The range of variations within and across languages is largely unknown, causing trouble in all translation tasks. IMAGACT is a corpus-based ontology of action concepts, derived from English and Italian spontaneous speech corpora, which makes use of the universal language of images to identify the different action types extended by verbs referring to action in English, Italian, Chinese and Spanish. This paper presents the IMAGACT search interface and the various kinds of linguistic information the user can derive from it. IMAGACT makes explicit the variation of meaning of action verbs within one language and allows comparisons of verb variations within and across languages. Because the action concepts are represented with videos, extension into new languages beyond those presently implemented in IMAGACT is done using competence-based judgments by mother-tongue informants, without intense lexicographic work involving underdetermined semantic descriptions.}, KEYWORDS = {Action verbs, Image ontology, Multilingual dictionary, Computer-aided translation}, PAGES = {1163-1170}, URL = {http://euralex2014.eurac.edu/en/callforpapers/Documents/EURALEX%202014_gesamt.pdf}, DOI = {10.13140/2.1.3719.2320}, PUBLISHER = {EURAC (Bolzano, ITA)}, ISBN = {978-88-88906-97-3}, CONFERENCE_NAME = {XVI EURALEX International Congress: The User in Focus}, CONFERENCE_PLACE = {Bolzano}, CONFERENCE_DATE = {15-19/07/2014}, BOOKTITLE = {Proceedings of the XVI EURALEX International Congress: The User in Focus}, EDITOR = {Abel, A. and Vettori, C. and Ralli, N.}, } @INPROCEEDINGS{CASELLI_2012_INPROCEEDINGS_CFQRR_287038, AUTHOR = {Caselli, T. and Frontini, F. and Quochi, V. and Rubino, F. and Russo, I.}, TITLE = {Flexible Acquisition of Subcategorization Frames in Italian}, YEAR = {2012}, ABSTRACT = {Lexica of predicate-argument structures constitute a useful tool for several tasks in NLP. This paper describes a web-service system for automatic acquisition of verb subcategorization frames (SCFs) from parsed data in Italian. The system acquires SCFs in an unsupervised manner. We created two gold standards for the evaluation of the system, the first by mixing together information from two lexica (one manually created and the second automatically acquired) and manual exploration of corpus data and the other annotating data extracted from a specialized corpus (environmental domain). Data filtering is accomplished by means of the maximum likelihood estimate (MLE). The evaluation phase has allowed us to identify the best empirical MLE threshold for the creation of a lexicon (P=0.653, R=0.557, F1=0.601). In addition to this, we assigned to the extracted entries of the lexicon a confidence score based on the relative frequency and evaluated the extractor on domain specific data. The confidence score will allow the final user to easily select the entries of the lexicon in terms of their reliability: one of the most interesting feature of this work is the possibility the final users have to customize the results of the SCF extractor, obtaining different SCF lexica in terms of size and accuracy.}, KEYWORDS = {lexicon, automatic acquisition, subcategorisation frames}, PAGES = {2842-2848}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/summaries/390.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {9782951740877}, CONFERENCE_NAME = {Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 Maggio 2012}, BOOKTITLE = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{DELGRATTA_2012_INPROCEEDINGS_DFMQRAL_223098, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Quochi, V. and Rubino, F. and Abrate, M. and Lo Duca, A.}, TITLE = {L-LEME: an Automatic Lexical Merger based on the LMF Standard}, YEAR = {2012}, ABSTRACT = {The present paper describes LMF LExical MErger (L-LEME), an architecture to combine two lexicons in order to obtain new resource(s). L-LEME relies on standards, thus exploiting the benefits of the ISO Lexical Markup Framework (LMF) to ensure interoperability. L-LEME is meant to be dynamic and heavily adaptable: it allows the users to configure it to meet their specific needs. The L-LEME architecture is composed of two main modules: the Mapper, which takes in input two lexicons A and B and a set of user-defined rules and instructions to guide the mapping process (Directives D) and gives in output all matching entries. The algorithm also calculates a cosine similarity score. The Builder takes in input the previous results, a set of Directives D1 and produces a new LMF lexicon C. The Directives allow the user to define its own building rules and different merging scenarios. L-LEME is applied to a specific concrete task within the PANACEA project, namely the merging of two Italian SubCategorization Frame (SCF) lexicons. The experiment is interesting in that A and B have different philosophies behind, being A built by human introspection and B automatically extracted. Ultimately, L-LEME has interesting repercussions in many language technology applications}, KEYWORDS = {LMF, Lexicon mapping, similarity score}, PAGES = {31-40}, URL = {https://publications.cnr.it/doc/223098}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC) 2012}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {2012}, BOOKTITLE = {Proceedings of the LREC 2012 Workshop on Language Resource Merging}, EDITOR = {Bel, N. and Gavrilidou, M. and Monachini, M. and Quochi, V. and Rimell, L.}, } @INPROCEEDINGS{FRONTINI_2012_INPROCEEDINGS_FQR_220785, AUTHOR = {Frontini, F. and Quochi, V. and Rubino, F.}, TITLE = {Automatic Creation of Quality Multi-Word Lexica from Noisy Text Data}, YEAR = {2012}, ABSTRACT = {This paper describes the design of a tool for the automatic creation of multi-word lexica that is deployed as a web service and runs on automatically web-crawled data within the framework of the PANACEA platform. The main purpose of our task is to provide a (computationally "light") tool that creates a full high quality lexical resource of multi-word items. Within the platform, this tool is typically inserted in a work flow whose first step is automatic web-crawling. Therefore, the input data of our lexical extractor is intrinsically noisy. The paper evaluates the capacity of the tool to deal with noisy data, and in particular with texts containing a significant amount of duplicated paragraphs. The accuracy of the extraction of multi-word expressions from the original crawled corpus is compared to the accuracy of the extraction from a later "de-duplicated" version of the corpus. The paper shows how our method can extract with sufficiently good precision also from the original, noisy crawled data. The output of our tool is a multi-word lexicon formatted and encoded in XML according to the Lexical Mark-up Framework.}, KEYWORDS = {Lexical induction, multi-word extraction, web-based distributed platform, noisy data}, URL = {http://www.kde.cs.tut.ac.jp/~aono/pdf/COLING2012/AND/pdf/AND04.pdf}, PUBLISHER = {ACM, Association for computing machinery (New York, USA)}, ISBN = {978-1-4503-1919-5}, CONFERENCE_NAME = {AND 2012}, CONFERENCE_PLACE = {Mumbai, India}, CONFERENCE_DATE = {December 9, 2012}, BOOKTITLE = {Proceedings of the Sixth Workshop on Analytics for Noisy Unstructured Text Data}, } @INPROCEEDINGS{POCH_2012_INPROCEEDINGS_PTHQB_286877, AUTHOR = {Poch, M. and Toral, A. and Hamon, O. and Quochi, V. and Bel, N.}, TITLE = {Towards a User-Friendly Platform for Building Language Resources based on Web Services}, YEAR = {2012}, ABSTRACT = {This paper presents the platform developed in the PANACEA project, a distributed factory that automates the stages involved in the acquisition, production, updating and maintenance of Language Resources required by Machine Translation and other Language Technologies. We adopt a set of tools that have been successfully used in the Bioinformatics field, they are adapted to the needs of our field and used to deploy web services, which can be combined to build more complex processing chains (workflows). This paper describes the platform and its different components (web services, registry, workflows, social network and interoperability). We demonstrate the scalability of the platform by carrying out a set of massive data experiments. Finally, a validation of the platform across a set of required criteria proves its usability for different types of users (non-technical users and providers).}, KEYWORDS = {service platform, workflow, interoperability}, PAGES = {1156-1163}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/543_Paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {Eighth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Istanbul, Turchia}, CONFERENCE_DATE = {23-25/05/2012}, BOOKTITLE = {Proceedings of the Eighth International Conference on Language Resources and Evaluation, LREC 2012}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{QUOCHI_2012_INPROCEEDINGS_QFR_220778, AUTHOR = {Quochi, V. and Frontini, F. and Rubino, F.}, TITLE = {A MWE Acquisition and Lexicon Builder Web Service}, YEAR = {2012}, ABSTRACT = {This paper describes the development of a web-service tool for the automatic extraction of Multi-word expressions lexicons, which has been integrated in a distributed platform for the automatic creation of linguistic resources. The main purpose of the work described is thus to provide a (computationally "light") tool that produces a full lexical resource: multi-word terms/items with relevant and useful attached information that can be used for more complex processing tasks and applications (e.g. parsing, MT, IE, query expansion, etc.). The output of our tool is a MW lexicon formatted and encoded in XML according to the Lexical Mark-up Framework. The tool is already functional and available as a service. Evaluation experiments show that the tool precision is of about 80%.}, KEYWORDS = {Multiword extraction, lexical resources, LMF, web services}, PAGES = {2291-2306}, URL = {http://aclweb.org/anthology/C/C12/C12-1140.pdf}, PUBLISHER = {Curran Associates (Red Hook, NY 12571, USA)}, ISBN = {9781627483896}, CONFERENCE_NAME = {International Conference on Computational Linguistics (COLING)}, CONFERENCE_PLACE = {Mumbai, India}, CONFERENCE_DATE = {December 2012}, BOOKTITLE = {Proceedings of COLING 2012: Technical Papers}, EDITOR = {Kay, M. and Boitet, C.}, } @INPROCEEDINGS{RUBINO_2012_INPROCEEDINGS_RFQ_220773, AUTHOR = {Rubino, F. and Frontini, F. and Quochi, V.}, TITLE = {Integrating NLP Tools in a Distributed Environment: A Case Study Chaining a Tagger with a Dependency Parser}, YEAR = {2012}, ABSTRACT = {The present paper tackles the issue of PoS tag conversion within the framework of a distributed web service platform for the automatic creation of language resources. PoS tagging is now considered a "solved problem"; yet, because of the differences in the tagsets, interchange of the various PoS taggers vailable is still hampered. In this paper we describe the implementation of a PoS-tagged-corpus converter, which is needed for chaining together in a workflow the FreeLing PoS tagger for Italian and the DESR dependency parser, given that these two tools have been developed independently. The conversion problems experienced during the implementation, related to the properties of the different tagsets and of tagset conversion in general, are discussed together with the solutions adopted. Finally, the converter is evaluated by assessing the impact of conversion on the performance of the dependency parser by comparing with the outcome of the native pipeline. From this we learn that in most cases parsing errors are due to actual tagging errors, and not to conversion itself. Besides, information on accuracy loss is an important feature in a distributed environment of (NLP) services, where users need to decide which services best suit their needs}, KEYWORDS = {PoS tag conversion, interoperability, NLP pipelines}, PAGES = {2125-2131}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/summaries/726.html}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {9782951740877}, CONFERENCE_NAME = {Language Resources and Evaluation Conference 2012}, CONFERENCE_PLACE = {Istanbul, Turchia}, CONFERENCE_DATE = {23-25 Maggio 2012}, BOOKTITLE = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{SORIA_2012_INPROCEEDINGS_SBCMMOPQC_219679, AUTHOR = {Soria, C. and Bel, N. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Calzolari, N.}, TITLE = {The FLaReNet Strategic Language Resource Agenda}, YEAR = {2012}, ABSTRACT = {The FLaReNet Strategic Agenda highlights the most pressing needs for the sector of Language Resources and Technologies and presents a set of recommendations for its development and progress in Europe, as issued from a three-year consultation of the FLaReNet European project. The FLaReNet recommendations are organised around nine dimensions: a) documentation b) interoperability c) availability, sharing and distribution d) coverage, quality and adequacy e) sustainability f) recognition g) development h) infrastructure and i) international cooperation. As such, they cover a broad range of topics and activities, spanning over production and use of language resources, licensing, maintenance and preservation issues, infrastructures for language resources, resource identification and sharing, evaluation and validation, interoperability and policy issues. The intended recipients belong to a large set of players and stakeholders in Language Resources and Technology, ranging from individuals to research and education institutions, to policy-makers, funding agencies, SMEs and large companies, service and media providers. The main goal of these recommendations is to serve as an instrument to support stakeholders in planning for and addressing the urgencies of the Language Resources and Technologies of the future.}, KEYWORDS = {strategic agenda, language resources planning, recommended priority actions}, PAGES = {1379-1386}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 may 2012}, BOOKTITLE = {Proceedings of the 8th international conference on Language Resources and Evaluation (LREC2012)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Dogan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{QUOCHI_2012_INPROCEEDINGS_Q_220828, AUTHOR = {Quochi, V.}, TITLE = {How predictive are grammatical constructions in Italian? The case of the caused-motion construction}, YEAR = {2012}, ABSTRACT = {Differently from English, Italian has a rich morpho logical system and a relative free word-order. For these reasons, the suitability of a "full-scope" constructional approach to Italian is not given. Although Goldberg's (1995, 2006) version of Construction grammar language is constructions all the way down (or up), one could still argue that in Italian, i.e. a language rich in morphology, abstract, grammatical constructions do not play a role, thus weakening the constructionist view. One of the strong points in favour of goldberg's approach is that argument structure constructions in English have been found to be highly predictive of sentence meaning (Goldberg et al. 2005), which provides a motivationfor their early acquisition by children. Many of such studies and evidences are still missing for Italian. This contribution will therefore attempt to start filling this gap by testing the predictive power of the Italian Caused Motion Construction. Data is taken from the CHILDES database (MacWhinney 2000) and annotated according to constructional properties and verb meaning. The annotation is then used to calculate the Cue and Category Validity (Murphy 2002) of both the Construction and the main verbs, which measures their predictive power (i.e respectively their reliability and availability) in relation to the overall sentence meaning. Results show that the Italian Caused Motion Construction is not only more reliable than verbs as a predictor of overall sentence meaning, but it is also more available.}, KEYWORDS = {Construction Grammar Psicolinguistica Linguistica del corpus}, PAGES = {265-265}, URL = {http://www.sle2012.eu/downloads/Book_abstracts_SLE2012_23aug_final.pdf}, CONFERENCE_NAME = {45th Annual Meeting of the Societas Linguistica Europaea (SLE2012)}, CONFERENCE_PLACE = {Stoccolma, Svezia}, CONFERENCE_DATE = {29/8-1/9 2012}, } @TECHREPORT{POCH_2012_TECHREPORT_PHQDTTPB_221573, AUTHOR = {Poch, M. and Hamon, O. and Quochi, V. and Del Gratta, R. and Toral, A. and Thurmair, G. and Prokopidis, P. and Bel, N.}, TITLE = {D3. 4 Third version (v4) of the integrated platform and documentation}, YEAR = {2012}, ABSTRACT = {The deliverable describes the third and final version of the PANACEA platform.}, KEYWORDS = {infrastrutture Trattamento del linguaggio naturale}, URL = {https://publications.cnr.it/doc/221573}, } @TECHREPORT{QUOCHI_2012_TECHREPORT_QFBHPPBTTK_221616, AUTHOR = {Quochi, V. and Frontini, F. and Bartolini, R. and Hamon, O. and Poch Riera, M. and Padro, M. and Bel, N. and Thurmair, G. and Toral, A. and Kamran, A.}, TITLE = {D7. 4 Third evaluation report. Evaluation of PANACEA v3 and produced resources}, YEAR = {2012}, ABSTRACT = {D7.4 reports on the evaluation of the different components integrated in the PANACEA third cycle of development as well as the final validation of the platform itself. All validation and evaluation experiments follow the evaluation criteria already described in D7.1. The main goal of WP7 tasks was to test the (technical) functionalities and capabilities of the middleware that allows the integration of the various resource-creation components into an interoperable distributed environment (WP3) and to evaluate the quality of the components developed in WP5 and WP6. The content of this deliverable is thus complementary to D8.2 and D8.3 that tackle advantages and usability in industrial scenarios. It has to be noted that the PANACEA third cycle of development addressed many components that are still under research. The main goal for this evaluation cycle thus is to assess the methods experimented with and their potentials for becoming actual production tools to be exploited outside research labs. For most of the technologies, an attempt was made to re-interpret standard evaluation measures, usually in terms of accuracy, precision and recall, as measures related to a reduction of costs (time and human resources) in the current practices based on the manual production of resources. In order to do so, the different tools had to be tuned and adapted to maximize precision and for some tools the possibility to offer confidence measures that could allow a separation of the resources that still needed manual revision has been attempted. Furthermore, the extension to other languages in addition to English, also a PANACEA objective, has been evaluated. The main facts about the evaluation results are now summarized.}, KEYWORDS = {PANACEA, evaluation, machine translation}, URL = {http://hdl.handle.net/10230/22533}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQ_221631, AUTHOR = {Rimell, L. and Bel, N. and Padró, M. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {D6. 2 Integrated Final Version of the Components for Lexical Acquisition}, YEAR = {2012}, ABSTRACT = {The PANACEA project has addressed one of the most critical bottlenecks that threaten the development of technologies to support multilingualism in Europe, and to process the huge quantity of multilingual data produced annually. Any attempt at automated language processing, particularly Machine Translation (MT), depends on the availability of language-specific resources. Such Language Resources (LR) contain information about the language's lexicon, i.e. the words of the language and the characteristics of their use. In Natural Language Processing (NLP), LRs contribute information about the syntactic and semantic behaviour of words - i.e. their grammar and their meaning - which inform downstream applications such as MT. To date, many LRs have been generated by hand, requiring significant manual labour from linguistic experts. However, proceeding manually, it is impossible to supply LRs for every possible pair of European languages, textual domain, and genre, which are needed by MT developers. Moreover, an LR for a given language can never be considered complete nor final because of the characteristics of natural language, which continually undergoes changes, especially spurred on by the emergence of new knowledge domains and new technologies. PANACEA has addressed this challenge by building a factory of LRs that progressively automates the stages involved in the acquisition, production, updating and maintenance of LRs required by MT systems. The existence of such a factory will significantly cut down the cost, time and human effort required to build LRs. WP6 has addressed the lexical acquisition component of the LR factory, that is, the techniques for automated extraction of key lexical information from texts, and the automatic collation of lexical information into LRs in a standardized format. The goal of WP6 has been to take existing techniques capable of acquiring syntactic and semantic information from corpus data, improving upon them, adapting and applying them to multiple languages, and turning them into powerful and flexible techniques capable of supporting massive applications. One focus for improving the scalability and portability of lexical acquisition techniques has been to extend exiting techniques with more powerful, less "supervised" methods. In NLP, the amount of supervision refers to the amount of manual annotation which must be applied to a text corpus before machine learning or other techniques are applied to the data to compile a lexicon. More manual annotation means more accurate training data, and thus a more accurate LR. However, given that it is impractical from a cost and time perspective to manually annotate the vast amounts of data required for multilingual MT across domains, it is important to develop techniques which can learn from corpora with less supervision. Less supervised methods are capable of supporting both large-scale acquisition and efficient domain adaptation, even in the domains where data is scarce. Another focus of lexical acquisition in PANACEA has been the need of LR users to tune the accuracy level of LRs. Some applications may require increased precision, or accuracy, where the application requires a high degree of confidence in the lexical information used. At other times a greater level of coverage may be required, with information about more words at the expense of some degree of accuracy. Lexical acquisition in PANACEA has investigated confidence thresholds for lexical acquisition to ensure that the ultimate users of LRs can generate lexical data from the PANACEA factory at the desired level of accuracy.}, KEYWORDS = {Lexical Acquisition}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D6.2.pdf}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQD_221650, AUTHOR = {Rimell, L. and Bel, N. and Padró, M. and Frontini, F. and Monachini, M. and Quochi, V. and Del Gratta, R.}, TITLE = {D6. 5 Merged dictionaries}, YEAR = {2012}, ABSTRACT = {This document presents the merged dictionaries delivered in PANACEA. Those dictionaries result from merging already existing lexica, generally for general domain, with domain specific lexica acquired using PANACEA platform. The domain specific lexica are presented and delivered in D6.3 and the merging repository that allowed the multilevel merging in D6.4.}, KEYWORDS = {merged dictionaries, computational lexicon}, URL = {http://www.panacea-lr.eu//en/deliverables/list}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQD_221755, AUTHOR = {Rimell, L. and Bel, N. and Padrò, M. and Frontini, F. and Monachini, M. and Quochi, V. and Del Gratta, R.}, TITLE = {D6. 3 Monolingual lexica for English, Spanish and Italian tuned for a particular domain (LAB and ENV)}, YEAR = {2012}, ABSTRACT = {This document presents the lexica acquired using PANACEA platform for Labour and Environment domains. The languages of the lexica are English, Spanish and Italian. The lexical information acquired depends on the language, according to the available tools in the platform.}, KEYWORDS = {Lexicon Acqusition}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D6.3.pdf}, } @ARTICLE{THOMPSON_2011_ARTICLE_TMMCDLMMPQRSVRA_205232, AUTHOR = {Thompson, P. and McNaught, J. and Montemagni, S. and Calzolari, N. and Del Gratta, R. and Lee, V. and Marchi, S. and Monachini, M. and Pezik, P. and Quochi, V. and Rupp, C. and Sasaki, Y. and Venturi, G. and Rebholz Schuhmann, D. and Ananiadou, S.}, TITLE = {The BioLexicon: a large-scale terminological resource for biomedical text mining}, YEAR = {2011}, ABSTRACT = {Background Due to the rapidly expanding body of biomedical literature, biologists require increasingly sophisticated and efficient systems to help them to search for relevant information. Such systems should account for the multiple written variants used to represent biomedical concepts, and allow the user to search for specific pieces of knowledge (or events) involving these concepts, e.g., protein-protein interactions. Such functionality requires access to detailed information about words used in the biomedical literature. Existing databases and ontologies often have a specific focus and are oriented towards human use. Consequently, biological knowledge is dispersed amongst many resources, which often do not attempt to account for the large and frequently changing set of variants that appear in the literature. Additionally, such resources typically do not provide information about how terms relate to each other in texts to describe events. Results This article provides an overview of the design, construction and evaluation of a large-scale lexical and conceptual resource for the biomedical domain, the BioLexicon. The resource can be exploited by text mining tools at several levels, e.g., part-of-speech tagging, recognition of biomedical entities, and the extraction of events in which they are involved. As such, the BioLexicon must account for real usage of words in biomedical texts. In particular, the BioLexicon gathers together different types of terms from several existing data resources into a single, unified repository, and augments them with new term variants automatically extracted from biomedical literature. Extraction of events is facilitated through the inclusion of biologically pertinent verbs (around which events are typically organized) together with information about typical patterns of grammatical and semantic behaviour, which are acquired from domain-specific texts. In order to foster interoperability, the BioLexicon is modelled using the Lexical Markup Framework, an ISO standard. Conclusions The BioLexicon contains over 2.2 M lexical entries and over 1.8 M terminological variants, as well as over 3.3 M semantic relations, including over 2 M synonymy relations. Its exploitation can benefit both application developers and users. We demonstrate some such benefits by describing integration of the resource into a number of different tools, and evaluating improvements in performance that this can bring.}, KEYWORDS = {Text Mining, Information Extraction, Computational Lexicon}, PAGES = {1-29}, URL = {http://www.biomedcentral.com/1471-2105/12/397}, VOLUME = {12}, DOI = {10.1186/1471-2105-12-397}, PUBLISHER = {BioMed Central ([London], Regno Unito)}, ISSN = {1471-2105}, JOURNAL = {BMC bioinformatics}, } @EDITORIAL{CALZOLARI_2011_EDITORIAL_CBSGMQ_206410, AUTHOR = {Calzolari, N. and Baroni, P. and Soria, C. and Goggi, S. and Monachini, M. and Quochi, V.}, TITLE = {Proceedings of the 3rd European Language Resources and Technologies Forum: Language Resources in the Sharing Age-the Strategic Agenda}, YEAR = {2011}, ABSTRACT = {Proceedings of the third FLaReNet forum on the European Language Resources and Technologies, held in Venezia, at the Auditorium Santa Margherita of the Università Ca' Foscari, on 26-27 May 2011.}, KEYWORDS = {Language Resources, Language Technologies}, PAGES = {86}, URL = {http://www.flarenet.eu/sites/default/files/FLaReNet_Forum_2011_Proceedings.pdf}, } @INPROCEEDINGS{CALZOLARI_2011_INPROCEEDINGS_CMQ_205719, AUTHOR = {Calzolari, N. and Monachini, M. and Quochi, V.}, TITLE = {Interoperability Framework: The FLaReNet action plan proposal}, YEAR = {2011}, ABSTRACT = {Standards are fundamental to ex-change, preserve, maintain and integrate data and language resources, and as an essential basis of any language resource infrastructure. This paper promotes an Interoperability Framework as a dynamic environment of standards and guidelines, also intended to support the provision of language-(web)service interoperability. In the past two decades, the need to define common practices and formats for linguistic resources has been increasingly recognized and sought. Today open, collaborative, shared data is at the core of a sound language strategy, and standardisation is actively on the move. This paper first describes the current landscape of standards, and presents the major barriers to their adoption; then, it describes those scenarios that critically involve the use of standards and provide a strong motivation for their adoption; lastly, a series of actions and steps needed to operationalise standards and achieve a full interoperability for Language Resources and Technologies are proposed.}, KEYWORDS = {Language Resources, standards}, PAGES = {41-49}, URL = {https://publications.cnr.it/doc/205719}, ISBN = {978-974-466-564-5}, CONFERENCE_NAME = {Workshop on Language Resources, Technology and Services in the Sharing Paradigm}, CONFERENCE_PLACE = {Chiang Mai}, CONFERENCE_DATE = {12 Novembre 2011}, } @INPROCEEDINGS{QUOCHI_2011_INPROCEEDINGS_Q_287125, AUTHOR = {Quochi, V.}, TITLE = {The development of Light-'do' Verb Constructions in Italian}, YEAR = {2011}, ABSTRACT = {This contribution presents the results of a study of the development of Light 'do' Verb Constructions in Italian based on naturalistic data. The claim is that there exists a Light Verb pivot schema that accounts for new productive formations and that this pattern is learnt by young children because it constitutes a labeling technique for naming new events, activities and situations. The findings of this research support two hypotheses of language acquisition. The results are based on analysis of longitudinal transcriptions of adult children interactions contained in the CHILDES databank (MacWhinney 2000).}, KEYWORDS = {child language, construction grammar, light verb constructions}, PAGES = {256-257}, URL = {http://sle2011.cilap.es/downloads/book_abstracts.pdf}, CONFERENCE_NAME = {SLE 2011-44 TH ANNUAL MEETING}, CONFERENCE_PLACE = {Logroño, Spain}, CONFERENCE_DATE = {8-11 Settembre 2011}, BOOKTITLE = {SLE 2011-44TH ANNUAL MEETING BOOK OF ABSTRACT}, EDITOR = {Arista, J. M.}, } @TECHREPORT{ARRANZ_2011_TECHREPORT_ABBCCDFGMQRR_290606, AUTHOR = {Arranz, V. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Del Gratta, R. and Frontini, F. and Goggi, S. and Monachini, M. and Quochi, V. and Rubino, F. and Russo, I.}, TITLE = {The FLaReNet Databook}, YEAR = {2011}, ABSTRACT = {The FLaReNet Databook is not only the collection of all the factual material collected during the activities of the project, but also a set on innovative initiatives and instruments that will remain in place for the continuous collection of such "facts". The purpose of the Databook is in fact, on one side, to consolidate the analyses carried out in the project and, at the same time, to set up the proper mechanisms that will enable the provision of a continuous stream of relevant factual material, also after the end of the project.}, KEYWORDS = {Language Resources (LRs)}, PAGES = {1-8}, URL = {http://www.flarenet.eu/?q=FLaReNet_Databook}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CBCMMOPQS_206397, AUTHOR = {Calzolari, N. and Bel, N. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C.}, TITLE = {Final FLaReNet deliverable: Language Resources for the Future-The Future of Language Resources}, YEAR = {2011}, ABSTRACT = {Language Technologies (LT), together with their backbone, Language Resources (LR), provide an essential support to the challenge of Multilingualism and ICT of the future. The main task of language technologies is to bridge language barriers and to help creating a new environment where information flows smoothly across frontiers and languages, no matter the country, and the language, of origin. To achieve this goal, all players involved need to act as a community able to join forces on a set of shared priorities. However, until now the field of Language Resources and Technology has long suffered from an excess of individuality and fragmentation, with a lack of coherence concerning the priorities for the field, the direction to move, not to mention a common timeframe. The context encountered by the FLaReNet project was thus represented by an active field needing a coherence that can only be given by sharing common priorities and endeavours. FLaReNet has contributed to the creation of this coherence by gathering a wide community of experts and making them participate in the definition of an exhaustive set of recommendations.}, KEYWORDS = {language resources and technologies, infrastructures}, PAGES = {97}, URL = {https://publications.cnr.it/doc/206397}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CQS_206420, AUTHOR = {Calzolari, N. and Quochi, V. and Soria, C.}, TITLE = {FLaReNet Strategic Language Resource Agenda}, YEAR = {2011}, ABSTRACT = {Despite the complexity of handling its languages, the European Union has established that cultural and language differences are a unique asset to be preserved. Europe needs to find means - such as technological ones - to overcome the language barriers to support citizens and industry in a multilingual globalised world. The large majority of industrial technological applications that handle natural language, i.e. Machine Translation, Crosslingual Information Retrieval, Multilingual Information Extraction, Automatic Document Indexing, Question Answering, Natural Language Interfaces, etc., include Language Resources as critical components. Although Language Technologies may consist of language independent engines, they depend on the availability of language-dependent knowledge under the form of Language Resources for their real-life implementation. At the same time, it is proved that a critical mass of Language Resources can make advancement in research and technology development possible and quicker, making Europe the leader of the market related to multilingualism. Companies such as Google or Microsoft play a dominant role in this framework, as they have access to a huge amount of data in many different languages, devote considerable resources to Language Technologies, have massive computing power and a direct research-to-application pipeline using a new business model based on so-called "free" services. The fact that a US company like Google is delivering some of the most comprehensive Language Technology solutions to support multilingualism should raise concern among EU officials.}, KEYWORDS = {Language resources, infrastructures}, PAGES = {23}, URL = {https://publications.cnr.it/doc/206420}, } @TECHREPORT{MONACHINI_2011_TECHREPORT_MQCBBCCFHKLMOPPRSUW_206507, AUTHOR = {Monachini, M. and Quochi, V. and Calzolari, N. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Francopoulo, G. and Hinrichs, E. and Krauwer, S. and Lemnitzer, L. and Mariani, J. and Odijk, J. and Piperidis, S. and Przepiorkowski, A. and Romary, L. and Schmidt, H. and Uszkoreit, H. and Wittenburg, P.}, TITLE = {The Standards' Landscape Towards an Interoperability Framework}, YEAR = {2011}, ABSTRACT = {This document proposes an overview of the current scene towards an Interoperability Framework and acts as a reference point for the current standards that the community fosters and encourages to adopt/improve. This initiative is in close synchronization with other relevant initiatives such as CLARIN, ELRA, ISO and TEI and META-Share. The document builds on the CLARIN Standardisation Action Plan and adapts and extends it to the needs of the broader LT Community, beyond the SSH research areas including the industry. The main goal of this document is to give a practical orientation for various LT players, both commercial and academic; the main message being that a harmonized domain of language resources and technology can be achieved stepwise, but that an effort to adopt standards is necessary to overcome fragmentation. NB: This is to be intended by no means as a static, closed document, rather a dynamic one which needs to be constantly/periodically revised and updated by the community itself.}, KEYWORDS = {Standards, interoperability}, PAGES = {23}, URL = {https://publications.cnr.it/doc/206507}, } @INPROCEEDINGS{CALZOLARI_2010_INPROCEEDINGS_CSDGQRCMP_84809, AUTHOR = {Calzolari, N. and Soria, C. and Del Gratta, R. and Goggi, S. and Quochi, V. and Russo, I. and Choukri, K. and Mariani, J. and Piperidis, S.}, TITLE = {The LREC Map of Language Resources and Technologies}, YEAR = {2010}, ABSTRACT = {In this paper we present the LREC Map of Language Resources (data and tools), an innovative feature introduced in conjunction with the LREC 2010 Conference. The purpose of the Map is to shed light on the vast amount of resources that represent the background of the research presented at LREC, in the attempt to fill in a gap in the community knowledge about the resources that are used or created worldwide. It also aims at a change of culture in the field, actively engaging each researcher in the documentation task about resources. The Map has been developed on the basis of the information provided by LREC authors during the submission of papers to the LREC 2010 conference and the LREC workshops, and contains information about almost 2000 resources. The paper illustrates the motivation behind this initiative, its main characteristics, its relevance and future impact in the field, the metadata used to describe the resources, and finally presents some of the most relevant findings.}, KEYWORDS = {LR national/international projects, organizational/policy issues}, PAGES = {949-956}, URL = {http://www.lrec-conf.org/proceedings/lrec2010/index.html}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {LREC 2010 Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-23 May 2010}, BOOKTITLE = {LREC'10-Seventh International Conference on Language Resources and Evaluation. Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{DELGRATTA_2010_INPROCEEDINGS_DDBCEMQSTC_84782, AUTHOR = {Del Gratta, R. and D'Onofrio, L. and Bartolini, R. and Caselli, T. and Enea, A. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A. and Calzolari, N.}, TITLE = {A Web-based Architecture for Interoperability of Lexical Resources}, YEAR = {2010}, ABSTRACT = {In this paper we present aWeb Service Architecture for managing high level interoperability of Language Resources (LRs) by means of a Service Oriented Architecture (SOA) and the use of ISO standards, such as ISO LMF. We propose a layered architecture which separates the management of legacy resources (data collection) from data aggregation (workflow) and data access (user requests). We provide a case study to demonstrate how the proposed architecture is capable of managing data exchange among different lexical services in a coherent way and show how the use of a lexical standard becomes of primary importance when a protocol of interoperability is defined.}, KEYWORDS = {Interoperability, Web sercives, Lexical resources}, PAGES = {53-62}, URL = {http://weblab.iit.cnr.it/kyoto/www2.let.vu.nl/twiki/pub/Kyoto/Publications/icgl2010_DOnofrioetal.pdf}, PUBLISHER = {City university of Hong Kong press (Hong Kong, CHN)}, ISBN = {978-962-442-323-5}, CONFERENCE_NAME = {2nd International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {18-20 January 2010}, BOOKTITLE = {2nd International Conference on Global Interoperability for Language Resources, ICGL 2010}, EDITOR = {Fang, A. C. and Ide, N. and Webster, J.}, } @INPROCEEDINGS{JEZEK_2010_INPROCEEDINGS_JQ_84783, AUTHOR = {Jezek, E. and Quochi, V.}, TITLE = {Capturing Coercions in Texts: a First Annotation Exercise}, YEAR = {2010}, ABSTRACT = {In this paper we report the first results of an annotation exercise of argument coercion phenomena performed on Italian texts. Our corpus consists of ca 4000 sentences from the PAROLE sottoinsieme corpus (Bindi et al. 2000) annotated with Selection and Coercion relations among verb-noun pairs formatted in XML according to the Generative Lexicon Mark-up Language (GLML) format (Pustejovsky et al., 2008). For the purposes of coercion annotation, we selected 26 Italian verbs that impose semantic typing on their arguments in either Subject, Direct Object or Complement position. Every sentence of the corpus is annotated with the source type for the noun arguments by two annotators plus a judge. An overall agreement of 0.87 kappa indicates that the annotation methodology is reliable. A qualitative analysis of the results allows us to outline some suggestions for improvement of the task: 1) a different account of complex types for nouns has to be devised and 2) a more comprehensive account of coercion mechanisms requires annotation of the deeper meaning dimensions that are targeted in coercion operations, such as those captured by Qualia relations.}, KEYWORDS = {Corpus (creation, annotation, etc.), Knowledge Discovery/Representation, Semantics}, PAGES = {1464-1471}, URL = {http://www.lrec-conf.org/proceedings/lrec2010/summaries/713.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-23 Maggio 2010}, BOOKTITLE = {Proceedings of the Seventh International Conference on Language Resources and Evaluation-LREC'10}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{PUSTEJOVSKY_2010_INPROCEEDINGS_PRPJBQ_84771, AUTHOR = {Pustejovsky, J. and Rumshisky, A. and Plotnick, A. and Jezek, E. and Batiukova, O. and Quochi, V.}, TITLE = {SemEval-2010 Task 7: Argument Selection and Coercion}, YEAR = {2010}, ABSTRACT = {The paper describes the Argument Selection and Coercion task for the SemEval-2010 evaluation exercise, which involves characterizing the type of compositional operation that exists between a predicate and the arguments it selects. Specifically, the goal is to identify whether the type that a verb selects is satisfied directly by the argument, or whether the argument must change type to satisfy the verb typing.}, KEYWORDS = {semantic annotation, verb coercion}, URL = {http://www.aclweb.org/anthology/S10-1005}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-932432-70-1}, CONFERENCE_NAME = {Fifth International Workshop on Semantic Evaluation (SemEval 2010)}, CONFERENCE_PLACE = {Uppsala}, CONFERENCE_DATE = {15-16 Luglio 2010}, BOOKTITLE = {Proceedings of the 5th International Workshop on Semantic Evaluation}, EDITOR = {Erk, K. and Strapparava, C.}, } @TECHREPORT{CALZOLARI_2010_TECHREPORT_CSBQBBCMOP_157488, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Quochi, V. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 4}, YEAR = {2010}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157488}, } @ARTICLE{QUOCHI_2009_ARTICLE_Q_288752, AUTHOR = {Quochi, V.}, TITLE = {Usage scenarios and basic workflows}, YEAR = {2009}, PAGES = {5-5}, URL = {http://www.clarin.eu/sites/default/files/CLARIN_Newsletter_no_6.pdf}, VOLUME = {6}, JOURNAL = {CLARIN Newsletter}, } @ARTICLE{QUOCHI_2009_ARTICLE_QDSBMC_30876, AUTHOR = {Quochi, V. and Del Gratta, R. and Sassolini, E. and Bartolini, R. and Monachini, M. and Calzolari, N.}, TITLE = {A Standard Lexical-Terminological Resource for the Bio Domain}, YEAR = {2009}, ABSTRACT = {The present paper describes a large-scale lexical resource for the biology domain designed both for human and for machine use. This lexicon aims at semantic interoperability and extendability, through the adoption of ISO-LMF standard for lexical representation and through a granular and distributed encoding of relevant information. The first part of this contribution focuses on three aspects of the model that are of particular interest to the biology community: the treatment of term variants, the representation on bio events and the alignment with a domain ontology. The second part of the paper describes the physical implementation of the model: a relational database equipped with a set of automatic uploading procedures. Peculiarity of the BioLexicon is that it combines features of both terminologies and lexicons. A set verbs relevant for the domain is also represented with full details on their syntactic and semantic argument structure.}, KEYWORDS = {Lexical representation model, Lexical Database, Computational Lexicography, Special Domains, Standards}, PAGES = {325-335}, URL = {https://publications.cnr.it/doc/30876}, VOLUME = {5603}, DOI = {10.1007/978-3-642-04235-5_28}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @EDITORIAL{CALZOLARI_2009_EDITORIAL_CBBBCGMMOPQST_183877, AUTHOR = {Calzolari, N. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Goggi, S. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Proceedings of the 1st European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, ABSTRACT = {Proceedings of the first FLaReNet Forum on the European Language Resources and Technologies, held in Vienna, at the Austrian Academy of Science, on 12-13 February 2009.}, KEYWORDS = {Language Resources, Language Technologies, Multilingual, Digital}, PAGES = {105}, URL = {http://www.flarenet.eu/sites/default/files/Vienna09_Proceedings.pdf}, } @INPROCEEDINGS{JEZEK_2009_INPROCEEDINGS_JQC_84752, AUTHOR = {Jezek, E. and Quochi, V. and Calzolari, N.}, TITLE = {Relevance of Qualia Relations in Coercive Contexts}, YEAR = {2009}, KEYWORDS = {annotation, annotation scheme, semantics, type shift}, URL = {https://publications.cnr.it/doc/84752}, CONFERENCE_NAME = {5th International Conference on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2009}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CBGMQST_157465, AUTHOR = {Calzolari, N. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Dissemination Plan}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157465}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CBGMQST_157468, AUTHOR = {Calzolari, N. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157468}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CMSBGQT_157467, AUTHOR = {Calzolari, N. and Monachini, M. and Soria, C. and Baroni, P. and Goggi, S. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 2}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157467}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBCGMQTBBCMOP_157462, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Caselli, T. and Goggi, S. and Monachini, M. and Quochi, V. and Toral, A. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Action Plan}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157462}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBGMQT_157466, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Evaluation Plan for the functioning of the Network}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157466}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBMQ_157463, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Monachini, M. and Quochi, V.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Annual Report No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157463}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBMQT_157469, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Monachini, M. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Project Presentation}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157469}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBBCCMMOPQT_157464, AUTHOR = {Calzolari, N. and Soria, C. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Blueprint of actions and infrastructures No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources, Infrastructures, Recommendations}, URL = {https://publications.cnr.it/doc/157464}, } @MISC{CALZOLARI_2009_MISC_CBBBCGMMOPQST_157471, AUTHOR = {Calzolari, N. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Goggi, S. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157471}, } @MISC{CALZOLARI_2009_MISC_CBBCMOPBGMQST_157457, AUTHOR = {Calzolari, N. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Extended Report of: The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157457}, } @MISC{CALZOLARI_2009_MISC_CBBCMOPBGMQST_157460, AUTHOR = {Calzolari, N. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Short Report of The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157460}, } @INPROCEEDINGS{MONACHINI_2008_INPROCEEDINGS_MQDC_84731, AUTHOR = {Monachini, M. and Quochi, V. and Del Gratta, R. and Calzolari, N.}, TITLE = {Using LMF to Shape a Lexicon for the Biomedical Domain}, YEAR = {2008}, ABSTRACT = {This paper describes the design, implementation and population of the BioLexicon in the framework of BootStrep, an FP6 project. The BioLexicon (BL) is a lexical resource designed for text mining in the bio-domain. It has been conceived to meet both domain requirements and upcoming ISO standards for lexical representation. The data model and data categories are compliant to the ISO Lexical Markup Framework and the Data Category Registry. The BioLexicon integrates features of lexicons and terminologies: term entries (and variants) derived from existing resources are enriched with linguistic features, including sub-categorization and predicate-argument information, extracted from texts. Thus, it is an extendable resource. Furthermore, the lexical entries will be aligned to concepts in the BioOntology, the ontological resource of the project. The BL implementation is an extensible relational database with automatic population procedures. Population relies on a dedicated input data structure allowing to upload terms and their linguistic properties and "pull-and-push" them in the database. The BioLexicon teaches that the state-of-the-art is mature enough to aim at setting up a standard in this domain. Being conformant to lexical standards, the BioLexicon is interoperable and portable to other areas.}, KEYWORDS = {Domain terminologies, Computational lexicons, Lexical standards, Lexical architectures}, PAGES = {153-157}, URL = {https://publications.cnr.it/doc/84731}, CONFERENCE_NAME = {LangTech 2008-Tecnologia applicata alla linguistica}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {28-29 February 2008}, EDITOR = {Delogu, C. and Falcone, M.}, } @INPROCEEDINGS{QUOCHI_2008_INPROCEEDINGS_QC_288714, AUTHOR = {Quochi, V. and Calderone, B.}, TITLE = {Learning properties of Noun Phrases: from data to functions}, YEAR = {2008}, ABSTRACT = {The paper presents two experiments of unsupervised classification of Italian noun phrases. The goal of the experiments is to identify the most prominent contextual properties that allow for a functional classification of noun phrases. For this purpose, we used a Self Organizing Map is trained with syntactically-annotated contexts containing noun phrases. The contexts are defined by means of a set of features representing morpho-syntactic properties of both nouns and their wider contexts. Two types of experiments have been run: one based on noun types and the other based on noun tokens. The results of the type simulation show that when frequency is the most prominent classification factor, the network isolates idiomatic or fixed phrases. The results of the token simulation experiment, instead, show that, of the 3 6 attributes represented in the original input matrix, only a few of them are prominent in the re-organization of the map. In particular, key features in the emergent macro-classification are the type of determiner and the grammatical number of the noun. An additional but not less interesting result is an organization into semantic/pragmatic micro-classes. In conclusions, our result confirm the relative prominence of determiner type and grammatical number in the task of noun (phrase) categorization.}, KEYWORDS = {cognitive linguistics, noun phrase}, PAGES = {2596-2602}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/summaries/644.html}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {Sixth International Conference on Language Resources and Evaluation (LREC'08)}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {28-30 Maggio}, } @INPROCEEDINGS{QUOCHI_2008_INPROCEEDINGS_QMDC_84700, AUTHOR = {Quochi, V. and Monachini, M. and Del Gratta, R. and Calzolari, N.}, TITLE = {A lexicon for biology and bioinformatics: the BOOTStrep experience}, YEAR = {2008}, KEYWORDS = {Lexicon, Ontologies, Lexical database}, PAGES = {2285-2292}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/576_paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{TORAL_2008_INPROCEEDINGS_TQDMSC_84714, AUTHOR = {Toral, R. A. and Quochi, V. and Del Gratta, R. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Lexically-based Ontologies and Ontologically Based Lexicons}, YEAR = {2008}, ABSTRACT = {This paper deals with the relations between ontologies and lexicons. We study the role of these two components and their evolution during the last years in the field of Computational Linguistics. Subsequently, we survey the current lines of research at ILC-CNR which tackle this topic. They involve (I) the reuse of already existing Lexical Resources to derive formal ontologies, (II) the conversion and combination of terminologies into rich and formal Lexical Resources and (III) the use of formal ontologies as the backbone of multilingual Lexical Resources.}, KEYWORDS = {Resource Infrastructure, UIMA, Clarin}, PAGES = {49-59}, URL = {https://publications.cnr.it/doc/84714}, CONFERENCE_NAME = {AI*IA 2008-10th Congress of Italian Association for Artificial Intelligence}, CONFERENCE_PLACE = {Cagliari}, CONFERENCE_DATE = {11-13 Settembre 2008}, } @INPROCEEDINGS{CASELLI_2007_INPROCEEDINGS_CQ_84670, AUTHOR = {Caselli, T. and Quochi, V.}, TITLE = {Inferring the semantics of temporal prepositions in Italian}, YEAR = {2007}, KEYWORDS = {italian, prepositions, computational linguistics}, PAGES = {38-44}, URL = {http://www.aclweb.org/anthology/W07-1606}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, CONFERENCE_NAME = {Fourth ACL-SIGSEM Workshop on Prepositions}, CONFERENCE_PLACE = {Prague, Czech Republic}, CONFERENCE_DATE = {28/07/2007}, BOOKTITLE = {Proceedings of the Fourth ACL-SIGSEM Workshop on Prepositions}, EDITOR = {Costello, F. and Kelleher, J. and Volk, M.}, } @INPROCEEDINGS{MONACHINI_2007_INPROCEEDINGS_MQRC_84676, AUTHOR = {Monachini, M. and Quochi, V. and Ruimy, N. and Calzolari, N.}, TITLE = {Lexical Relations and Domain Knowledge: The BioLexicon Meets the Qualia Structure}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84676}, CONFERENCE_NAME = {GL2007: Fourth International Conference on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Parigi}, CONFERENCE_DATE = {10-11 Maggio 2007}, EDITOR = {Bouillon, P. and Danlos, L. and Kanzaki, K.}, } @INPROCEEDINGS{QUOCHI_2007_INPROCEEDINGS_QDSMC_84735, AUTHOR = {Quochi, V. and Del Gratta, R. and Sassolini, E. and Monachini, M. and Calzolari, N.}, TITLE = {Toward a Standard Lexical Resource in the Bio Domain}, YEAR = {2007}, ABSTRACT = {The present paper describes a large-scale lexical resource for the biology domain designed both for human and for machine use. This lexicon aims at semantic interoperability and extendability, through the adoption of ISO-LMF standard for lexical representation and through a granular and distributed encoding of relevant information. The first part of this contribution focuses on three aspects of the model that are of particular interest to the biology community: the treatment of term variants, the representation on bio events and the alignment with a domain ontology. The second part of the paper describes the physical implementation of the model: a relational database equipped with a set of automatic uploading procedures. Peculiarity of the BioLexicon is that it combines features of both terminologies and lexicons. A set verbs relevant for the domain is also represented with full details on their syntactic and semantic argument structure.}, KEYWORDS = {Lexical representation model, Lexical Database, Computational Lexicography, Special Domains, Standards}, PAGES = {295-299}, URL = {https://publications.cnr.it/doc/84735}, PUBLISHER = {Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)}, ISBN = {978-83-7177-413-3}, CONFERENCE_NAME = {LTC07-3rd Language and Technology Conference: Human Language Technology. Challenges of the Information Society}, CONFERENCE_PLACE = {Poznan, Poland}, CONFERENCE_DATE = {5-7 Ottobre 2007}, } @INPROCEEDINGS{CALDERONE_2007_INPROCEEDINGS_CQ_287126, AUTHOR = {Calderone, B. and Quochi, V.}, TITLE = {Emergent Cognitive Functions of the Noun Phrase}, YEAR = {2007}, KEYWORDS = {noun phrase, emergence of language}, URL = {https://publications.cnr.it/doc/287126}, CONFERENCE_NAME = {SLE 2007 Annual Meeting}, CONFERENCE_PLACE = {Joensuu, Finlandia}, CONFERENCE_DATE = {28/08/2007-01/09/2007}, BOOKTITLE = {SLE 2007 Annual Meeting Book of Abstracts}, } @TECHREPORT{CALZOLARI_2007_TECHREPORT_CMQSGB_157444, AUTHOR = {Calzolari, N. and Monachini, M. and Quochi, V. and Soria, C. and Goggi, S. and Baroni, P.}, TITLE = {FLaReNet: Fostering Language Resources Network. Grant Agreement n° 617001, eContentPlus}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157444}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DBCEMQS_157442, AUTHOR = {Del Gratta, R. and Bartolini, R. and Caselli, T. and Enea, A. and Monachini, M. and Quochi, V. and Sassolini, V.}, TITLE = {TimeML: An Ontological Mapping onto the UIMA Type Systems}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157442}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DMQSC_157425, AUTHOR = {Del Gratta, R. and Monachini, M. and Quochi, V. and Sassolini, E. and Calzolari, N.}, TITLE = {Bio-Lexicon DataBase: Architecture, Concepts and Loading Software}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157425}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DTQM_157441, AUTHOR = {Del Gratta, R. and Toral, A. and Quochi, V. and Monachini, M.}, TITLE = {LocalBioLex: A database framework for biolinguistic research on integrated databases}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157441}, } @TECHREPORT{QUOCHI_2006_TECHREPORT_QMCDS_157403, AUTHOR = {Quochi, V. and Monachini, M. and Calzolari, N. and Del Gratta, R. and Sassolini, E.}, TITLE = {Bio-Lexicon Model and Preliminary ISO Conformant Data Categories}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157403}, } @INCOLLECTION{QUOCHI_2005_INCOLLECTION_Q_136443, AUTHOR = {Quochi, V.}, TITLE = {Issues on the acquisition of Italian complex nominals from text corpora: a computational approach combining syntactic and semantic information}, YEAR = {2005}, ABSTRACT = {The paper addressed the issue of Italian Complex Nominals from an (automatic) acquisition and representational perspective. Just like English noun compounds, ICNs blur the distinction between the syntactic and the lexical component because they are (at least) partially non-transparent but, nevertheless, show regularities both at the syntactic and at the semantic level. This contribution reports on an experiment conducted to identify the highest possible number of productive syntactic-semantic patterns of ICN formation, and to make explicit the particular semantic relation that exists between the head of the phrase and its modifier(s). I rely on a non-traditional generative theory of the lexicon, namely the Generative Lexicon, as a model for the representation/ interpretation of ICNs which provides us with a structured representation of the internal semantics of lexical items. The experiment explored the representational power of the qualia structure with respect to ICNs.}, KEYWORDS = {complex nominals, multiword expressions, lexicon, lexical representation, generative lexicon}, PAGES = {153-174}, URL = {https://publications.cnr.it/doc/136443}, PUBLISHER = {Edizioni Plus srl (Pisa, ITA)}, ISBN = {9788884922366}, BOOKTITLE = {Studies in the Semantics of Lexical Combinatory Patterns}, EDITOR = {Bertuccelli, M.}, } @INPROCEEDINGS{QUOCHI_2004_INPROCEEDINGS_Q_84616, AUTHOR = {Quochi, V.}, TITLE = {Representing Italian Complex Nominals: A Pilot Study}, YEAR = {2004}, ABSTRACT = {A corpus-based investigation of Italian Complex Nominals (CNs), of the form N+PP, which aims at clarifying their syntactic and semantic constitution, is presented. The main goal is to find out useful parameters for their representation in a computational lexicon. As a reference model we have taken an implementation of Pustejovsky's Generative Lexicon Theory (1995), the SIMPLE Italian Lexicon, and in particular the Extended Qualia Structure. Italian CN formation mainly exploits post-modification; of particular interest here are CNs of the kind N+PP since this syntactic pattern is highly productive in Italian and such CNs very often translate compound nouns of other languages. One of the major problems posed by CNs for interpretation is the retrieval or identification of the semantic relation linking their components, which is (at least partially) implicit on the surface. Studying a small sample, we observed some interesting facts that could be useful when setting up a larger experiment to identify semantic relations and/or automatically learn the syntactic peculiarities of given semantic paradigms. Finally, a set of representational features exploiting the results from our corpus is proposed.}, KEYWORDS = {Multiword expressions Complex Nominals, Italian language}, PAGES = {1863-1866}, URL = {https://publications.cnr.it/doc/84616}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona, Portogallo}, CONFERENCE_DATE = {26-28/05/2004}, BOOKTITLE = {Proceedings of the Fourth International Conference on Language Resources and Evaluation, LREC'04}, } @TECHREPORT{BARONI_2004_TECHREPORT_BCLQU_157368, AUTHOR = {Baroni, P. and Calzolari, N. and Lenci, A. and Quochi, V. and Ulivieri, M.}, TITLE = {Final Resources Landscape}, YEAR = {2004}, ABSTRACT = {ELSNET-4 Deliverable D6.4}, KEYWORDS = {Language Resources, Landscapes}, PAGES = {11}, URL = {https://publications.cnr.it/doc/157368}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_CLQ_84547, AUTHOR = {Calzolari, N. and Lenci, A. and Quochi, V.}, TITLE = {Towards Multiword and Multilingual Lexicons: Between Theory and Practice}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84547}, CONFERENCE_NAME = {Linguistics and Phonetics 2002 Conference}, CONFERENCE_PLACE = {Urayasu (Giappone)}, CONFERENCE_DATE = {2003}, } @TECHREPORT{QUOCHI_2003_TECHREPORT_QO_157349, AUTHOR = {Quochi, V. and Odjik, J.}, TITLE = {"Appendix F: Representing noun compounds and support verbs in MILE (PISA & XMELLT)"}, YEAR = {2003}, URL = {http://www.ilc.cnr.it/EAGLES96/isle/clwg_doc/ISLE_D2.2-D3.2.zip}, }