@ARTICLE{BANDINI_2025_ARTICLE_BQ_557403, AUTHOR = {Bandini, M. and Quochi, V.}, TITLE = {A Systematic Literature Review on the Representation of Texts as Linguistic Linked Open Data}, YEAR = {2025}, ABSTRACT = {Despite the growing interest in publishing linguistic data as Linked Open Data (LOD), the representation of ancient language corpora within the Semantic Web remains challenging. While LOD principles have been successfully applied to linguistic resources such as dictionaries, lexicons, and terminologies, their use for textual corpora — particularly those related to ancient languages — is still limited. Through a systematic literature review, we investigate how textual data has been represented as Linguistic Linked Open Data (LLOD), evaluating the potential and limitations of existing approaches and methodologies for enhancing data integration and interoperability in the Digital Humanities. This systematic literature review follows a rigorous methodology encompassing literature identification, screening for inclusion, and quality assessment. By classifying and analysing relevant studies, we provide a comprehensive overview of current practices and offer insights into their benefits and limitations}, KEYWORDS = {Ancient languages, Ancient texts, DigitAnt, Linguistic Linked Open Data, Semantic Web, Systematic literature review}, PAGES = {289-315}, URL = {http://doi.org/10.6092/issn.2532-8816/21195}, VOLUME = {2025 (20)}, DOI = {10.6092/issn.2532-8816/21195}, ISSN = {2532-8816}, JOURNAL = {UMANISTICA DIGITALE}, } @ARTICLE{LUZIETTI_2025_ARTICLE_LSGMCDCMQMD_550741, AUTHOR = {Luzietti, R. B. and Spadi, A. and Giampietro, N. and Mancuso, G. and Caravale, A. and D'Eredità, A. and Caradonna, M. and Moscati, P. and Quochi, V. and Monachini, M. and Degl'Innocenti, E.}, TITLE = {Digital Humanities and Heritage Science: moving from landscaping to a dynamic research observatory in an Open Science Cloud}, YEAR = {2025}, ABSTRACT = {This contribution presents the work conducted within the second work package of the Humanities and Heritage Italian Science Cloud (H2IOSC) infrastructural project, dedicated to "Landscaping and Building Communities", with the aim of defining a methodology for mapping the current status and availability of resources and technologies and enhancing their utilization in the Humanities and Cultural Heritage sectors. The mapping activity involved a comprehensive investigation encompassing language technologies, digital humanities, and heritage science disciplines in Italy. The aim of the landscaping activity is to collect information on the latest and most prevalent resources, tools, communities, best practices, standards, and projects developed within the Heritage, Social Sciences, and Digital Humanities communities. In this project, the four partnering infrastructures-CLARIN, DARIAH, E-RIHS, and OPERAS-collaborate closely to develop the best strategies for engaging and meeting the needs of their target research communities as well as to identify the set of priority items (resources, tools, and services) to FAIRify and onboard them into the national MarketPlace}, KEYWORDS = {digital humanities, heritage science, infrastructural survey, infrastrutture di ricerca}, PAGES = {419-439}, URL = {https://iris.cnr.it/handle/20.500.14243/550741}, VOLUME = {2025 (20)}, DOI = {10.6092/issn.2532-8816/21226}, ISSN = {2532-8816}, JOURNAL = {UMANISTICA DIGITALE}, } @INPROCEEDINGS{KHAN_2025_INPROCEEDINGS_KMQPFS_570784, AUTHOR = {Khan, A. F. and Mallia, M. and Quochi, V. and Pedonese, G. and Frontini, F. and Squadrito, E.}, TITLE = {A Pilot Project for Promoting Linguistic Linked Open Data}, YEAR = {2025}, ABSTRACT = {This paper presents a pilot initiative, part of the H2IOSC infrastructure, that strives to support and promote the creation, publication, and sharing of Linguistic Linked Open Data (LLOD) in Italy and beyond. We describe the different parts of the pilot project: those related to vocabulary hosting, RDF data publication, training development, and use case promotion. Key contributions include the publication and hosting of the REALITER series of lexicons, the PLLOD triple store platform, and LLOD-focused training initiatives. We also describe a series of use-cases taking place within the pilot}, KEYWORDS = {Training,Linguistic Linked Open data, H2IOSC, CLARIN}, PAGES = {1-6}, DOI = {10.1109/ieee}, CONFERENCE_NAME = {2025 IEEE International Conference on Cyber Humanities (IEEE-CH)}, BOOKTITLE = {2025 IEEE International Conference on Cyber Humanities (IEEE-CH)}, } @INPROCEEDINGS{SICHERA_2025_INPROCEEDINGS_SMQGFOL_562001, AUTHOR = {Sichera, P. and Monachini, M. and Quochi, V. and Giampietro, N. and Fabiani, V. and Ottaviani, R. and Luzietti, R. B.}, TITLE = {Synergies between CLARIN-IT and OPERAS-IT within H2IOSC: monitoring communities and orchestrating digital services}, YEAR = {2025}, ABSTRACT = {This paper introduces the Humanities and Cultural Heritage Open Science Cloud (H2IOSC) Marketplace, a digital environment designed for the discovery, access, and integration of di-verse digital resources, tools, and services in the fields of Language, Digital Humanities, and Cultural Heritage. The Marketplace is closely integrated with the H2IOSC Observatory-an an-alytical framework for monitoring the evolving landscape of digital resources-which provides insights into research infrastructure usage, allowing to align service offerings with user needs and expectations. Together, the Marketplace and Observatory promote interoperability and enhance collaboration between the CLARIN-IT and OPERAS-IT infrastructures}, KEYWORDS = {Marketplace, Workflow, Orchestration, Observatory, Osservatorio, Orchestrazione}, PAGES = {26-30}, URL = {https://www.clarin.eu/sites/default/files/CLARIN2025_ConferenceProceedings.pdf#page=35}, DOI = {10.5281/zenodo.17357825}, PUBLISHER = {Cristina Grisot, Thalassia Kontino (eds.) (Vienna, AUT)}, CONFERENCE_NAME = {CLARIN Annual Conference 2025}, CONFERENCE_PLACE = {Vienna}, BOOKTITLE = {CLARIN Annual Conference Proceedings 2025}, } @TECHREPORT{MONACHINI_2025_TECHREPORT_MQLMCCDSCGM_557398, AUTHOR = {Monachini, M. and Quochi, V. and Luzietti, R. B. and Moscati, P. and Caravale, A. and Chirivi, A. and Degl'Innocenti, E. and Spadi, A. and Caradonna, M. and Giampietro, N. and Mancuso, G.}, TITLE = {D2. 1 Landscaping and building communities}, YEAR = {2025}, ABSTRACT = {The Humanities and Heritage Italian Open Science Cloud (H2IOSC) project involves four Italian nodes of research infrastructures (RIs)-CLARIN, DARIAH, E-RIHS, and OPERAS. Within this project, the second work package focuses on establishing, mapping, and monitoring strategies for assessing the national and international operation contexts for the four RIs, the characteristics of their user communities, and the specific data resources, services, and tools that are most needed, used and newly created. The main objective of this work package is to conduct a comprehensive survey of the landscape of language technologies, humanities, and heritage science in Italy. This survey takes into account existing projects, resources, tools, communities, best practices, and standards that need to be integrated into the national Marketplace as well as the repositories of the four RIs. Collaboration within the work package was directed at structuring shared procedures for developing, managing, and adapting the survey's main objectives. Priority was on identifying and engaging key stakeholders within various communities. The surveying activity was organized around a set of shared activities, in parallel with others conducted by each infrastructure. The initial coordination activity focused on defining dimensions and criteria to guide the survey. It resulted in the creation of a structured questionnaire, which was designed to gather information about user needs, engage with stakeholders, identify communities of reference, evaluate relevant projects, and assess the FAIRness of data. The questionnaire was employed to gather insights on a wide range of topics, including data resources, software, tools, projects, training needs, prior knowledge of RIs, publications, and feedback. Such a questionnaire has been distributed among the relevant disciplinary communities and sub-communities of the four RIs. Respondents included students, researchers, professors, and subject matter experts in social science, digital humanities, and cultural heritage research fields. Preliminary results from the control group gave insights into the challenges and opportunities of the survey. Respondents have shared valuable information about data resources, technologies, software, and their training needs. The results highlight the need for clearer communication, dissemination activities, and better engagement strategies to encourage participation from the wider community. Addressing the lack of}, KEYWORDS = {Italian research infrastructures, Digital Humanities, Heritage science, Open Science}, URL = {https://iris.cnr.it/handle/20.500.14243/557398}, DOI = {10.5281/zenodo.14680021}, } @TECHREPORT{MONACHINI_2025_TECHREPORT_MQLSMDCGCM_572705, AUTHOR = {Monachini, M. and Quochi, V. and Luzietti, R. B. and Spadi, A. and Mancuso, G. and D'Eredità, A. and Caravale, A. and Giampietro, N. and Caradonna, M. and Melaccio, D.}, TITLE = {D2. 2 Updated Report on the H2IOSC Landscapes}, YEAR = {2025}, ABSTRACT = {This deliverable presents the updated results of the Landscaping and Building Communities activities carried out within WP2 of the H2IOSC project. Through a mixed-methods approach—combining questionnaires, interviews, focus groups, and internal and external scouting—the report provides a systematic overview of the research landscape in the Humanities, Linguistics, and Heritage Science domains in Italy. It maps existing digital resources, tools, services, community practices, FAIRness levels, training needs, and infrastructural gaps across the communities served by the four participating Research Infrastructures (CLARIN, DARIAH, E-RIHS, OPERAS). The findings feed directly into key components of the project, including the DHeLO landscaping platform and the H2IOSC Observatory, supporting resource integration, FAIRification, service development, capacity-building, and pilot innovation activities. Overall, the deliverable offers the strategic evidence base necessary for guiding the technical and community-oriented work of subsequent WPs}, KEYWORDS = {H2IOSC, Research Infrastructures}, URL = {https://iris.cnr.it/handle/20.500.14243/572705}, DOI = {10.5281/ZENODO.17737100}, } @ARTICLE{MALLIA_2024_ARTICLE_MBQ_459320, AUTHOR = {Mallia, M. and Bandini, M. and Quochi, V.}, TITLE = {An interface for linking ancient languages}, YEAR = {2024}, ABSTRACT = {The paper focuses on the linking potentials offered by the EpiLexO web-based front-end for the creation and editing of an ecosystem of digital resources for ancient languages, developed in the context of a project on the languages of fragmentary attestation of ancient Italy. The focus is particularly on mechanisms introduced for linking lexical information to other information bits either internally or externally, e. g. for creating attestations by linking lexical forms to their variants in relevant inscriptions, as well as for linking lexical data to external independent LOD datasets available on a remote endpoint. Finally, in the conclusions, we briefly introduce some future planned or desired enhancements as well as the final platform component, a parallel interface that constitutes the fruition application, which will be open to anyone on the web and willallow for browsing, searching, cross-querying and visualising the created set of interlinked resources}, KEYWORDS = {eLexicography, Ancient languages, Linguistic Linked Open Data, Digital historical linguistics}, PAGES = {175-190}, URL = {https://sciendo.com/it/article/10.2478/cait-2024-0042}, VOLUME = {2024 (4)}, DOI = {10.2478/cait-2024-0042}, ISSN = {1314-4081}, JOURNAL = {CYBERNETICS AND INFORMATION TECHNOLOGIES}, } @INPROCEEDINGS{BANDINI_2024_INPROCEEDINGS_BQ_526227, AUTHOR = {Bandini, M. and Quochi, V.}, TITLE = {Representing texts as LOD: a Systematic Literature Review}, YEAR = {2024}, ABSTRACT = {Despite the growing interest in publishing linguistic data as Linked Open Data, the publishing of ancient language corpora for the Semantic Web is still challenging. This contribution describes a systematic literature review on the representation of corpus data as Linguistic Linked Open Data, focusing especially on models and (data) granularity. Our goal is to gain insights into the advantages and disadvantages of the different approaches. Here we present our systematic review methodology and some initial results}, KEYWORDS = {Linked Open Data, Text corpora, ancient languages, systematic literature review}, PAGES = {455-461}, URL = {https://amsacta.unibo.it/id/eprint/7927/}, ISBN = {978-88-942535-8-0}, CONFERENCE_NAME = {Me. Te. Digitali. Mediterraneo in rete tra testi e contesti. XIII Convegno Annuale AIUCD2024}, BOOKTITLE = {Me. Te. Digitali. Mediterraneo in rete tra testi e contesti, Proceedings del XIII Convegno Annuale AIUCD2024}, } @INPROCEEDINGS{BOSCHETTI_2024_INPROCEEDINGS_BRQ_507141, AUTHOR = {Boschetti, F. and Rigobianco, L. and Quochi, V.}, TITLE = {Domain-Specific Languages for Epigraphy: The Case of ItAnt}, YEAR = {2024}, ABSTRACT = {This contribution illustrates how the definition of a Domain-Specific Language can support the activity of epigraphists and historical linguists. It presents and discusses a method and technological solution, based on Domain Specific Languages, for facilitating scholars in digitally representing the available knowledge of archaic languages and cultures, by increasing the human readability of the encoded data without sacrificing the compliance to standard models and formats. Such a work is framed in the context of an Italian National collaborative research project devoted to the study of the languages and cultures of ancient Italy, witnessed by a digital collection of inscriptions. The platform developed within this project offers an interesting use case and motivation for experimenting with DSLs for the creation of the needed digital critical editions. After explaining the DSL grammar definition process, we finally, test the applicability of the DSL grammar to 5 example inscriptions in the Faliscan language}, KEYWORDS = {Domain Specific Languages, Ancient Languages, Digital Scholarly Editions, Digital Humanities, Text Representation, TEI EpiDoc}, PAGES = {191-202}, URL = {https://ecp.ep.liu.se/index.php/clarin/article/view/1023}, VOLUME = {210}, DOI = {10.3384/ecp210007}, ISBN = {978-91-8075-740-9}, CONFERENCE_NAME = {CLARIN Annual Conference 2023}, BOOKTITLE = {Selected papers from the CLARIN Annual Conference 2023}, } @INPROCEEDINGS{LUZIETTI_2024_INPROCEEDINGS_LCCDGMMQSMD_523843, AUTHOR = {Luzietti, R. B. and Caradonna, M. and Caravale, A. and D'Eredita, A. and Giampietro, N. and Mancuso, G. and Moscati, P. and Quochi, V. and Spadi, A. and Monachini, M. and Degl'Innocenti, E.}, TITLE = {Digital Humanities and Heritage Science: moving from landscaping to a dynamic research observatory in an Open Science Cloud}, YEAR = {2024}, ABSTRACT = {The paper describes work carried out in the context of an infrastructural project for the implementation of a comprehensive investigation on language technologies, digital humanities, and heritage science disciplines in Italy. The topic of this landscaping activity consists in an extended survey of all the existing projects, resources, tools, communities, best practices, and standards in use among the SSH communities. Within this project, the second work package focuses on establishing, mapping, and monitoring strategies for assessing the national and international contexts for the four RIs, the characteristics of their user communities, and the specific data resources, services, and tools that are most needed, used and newly created}, KEYWORDS = {Research infrastructures, Landscaping, Building Communities, Digital Humanities, Cultural Heritage Science}, PAGES = {5}, URL = {https://amsacta.unibo.it/id/eprint/7927/}, DOI = {10.6092/unibo/amsacta/7927}, ISBN = {978-88-942535-8-0}, CONFERENCE_NAME = {Me. Te. Digitali. Mediterraneo in rete tra testi e contesti. XIII Convegno Annuale AIUCD2024}, BOOKTITLE = {Me. Te. Digitali. Mediterraneo in rete tra testi e contesti, Proceedings del XIII Convegno Annuale AIUCD2024}, } @INPROCEEDINGS{LUZIETTI_2024_INPROCEEDINGS_LQOCDM_572742, AUTHOR = {Luzietti, R. B. and Quochi, V. and Ottaviani, R. and Carpita, D. and Del Gratta, R. and Monachini, M.}, TITLE = {CLARIN in the Italian Open Science Cloud: landscaping and community engagement}, YEAR = {2024}, ABSTRACT = {This contribution is part of the H2IOSC project, supported by the Italian PNRR European Fund, in which the Italian CLARIN node collaborates with DARIAH, E-RIHS, and OPERAS to build an Italian Open Science Cloud. The paper presents an overview of two key project activities aim-img at landscaping the Italian resource panorama and increasing the Italian research community’s involvement. On the one hand, CLARIN-IT has benefited from CLARIN ERIC central services such as Virtual Language Observatory and Resource Families to gather information on the type and status of resources available that might be of interest of the Italian research community. On the other hand, through the H2IOSC activities CLARIN-IT is working to increase and strengthen the influence and use of CLARIN services within the Italian linguistics community}, KEYWORDS = {Research Infrastructures}, PAGES = {153-157}, URL = {https://www.clarin.eu/sites/default/files/CLARIN2024_ConferenceProceedings_final.pdf}, CONFERENCE_NAME = {CLARIN Annual Conference 2024}, BOOKTITLE = {CLARIN Annual Conference Proceedings}, } @INPROCEEDINGS{MALLIA_2024_INPROCEEDINGS_MBBMPRTZZQ_479261, AUTHOR = {Mallia, M. and Bandini, M. and Bellandi, A. and Murano, F. and Piccini, S. and Rigobianco, L. and Tommasi, A. and Zavattari, C. and Zinzi, M. and Quochi, V.}, TITLE = {DigItAnt: a platform for creating, linking and exploiting LOD lexica with heterogeneous resources}, YEAR = {2024}, ABSTRACT = {Over the past few years, the deployment of Linked Open Data (LOD) technologies has witnessed significant advancements across a myriad of sectors, linguistics included. This progression is characterized by an exponential increase in the conversion of resources to adhere to contemporary encoding standards. Such transformations are driven by the objectives outlined in “ecological” methodologies, notably the FAIR data principles, which advocate for the reuse and interoperability of resources. This paper presents the DigItAnt architecture, developed in the context of a national project funded by the Italian Ministry of Research and in the service of a recently started Italian endeavor to realize a federation of infrastructures for the humanities. It details its services, utilities and data types, and shows how it manages to produce, exploit and interlink LLOD and non-LLOD datasets in ways that are meaningful to its intended target disciplinary context, i. e. historical linguistics over epigraphy data. The paper also introduces how DigItAnt services and functionalities will contribute to the empowerment of the H2IOSC Italian infrastructures cluster project, which is devoted to the construction of a nationwide research infrastructure federation for the humanities, and it will possibly contribute to its pilot project towards an authoritative LLOD platform}, KEYWORDS = {Linguisti Linked Open Data, Ancient Languages, Digital Historical Linguistics, Language Technology for Digital Humanities}, PAGES = {55-65}, URL = {https://aclanthology.org/2024.ldl-1.8/}, PUBLISHER = {ELRA (Paris)}, ISBN = {978-2-493814-38-8}, CONFERENCE_NAME = {9th Workshop on Linked Data in Linguistics}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of the 9th Workshop on Linked Data in Linguistics}, } @INPROCEEDINGS{MALLIA_2024_INPROCEEDINGS_MDQ_526230, AUTHOR = {Mallia, M. and Del Gratta, R. and Quochi, V.}, TITLE = {Funzioni e sostenibilità di una piattaforma digitale per le lingue arcaiche}, YEAR = {2024}, ABSTRACT = {Questo contributo, basato sull'esperienza acquisita in un progetto di ricerca triennale dedicato allo sviluppo di tecnologie e risorse digitali nel campo degli studi linguistico-storici su lingue epigrafiche frammentarie, riflette sulla sostenibilità a lungo termine dei risultati digitali ottenuti da piccoli gruppi di ricerca nelle Digital Humanities (DH). In particolare, l'analisi si concentra sulla possibilità di integrare questi risultati nelle infrastrutture di ricerca virtuali, distribuite e federate, come CLARIN(-IT) e la recente iniziativa di federazione delle infrastrutture di ricerca italiane per le Scienze Umane e il Patrimonio Culturale, denominata 'Humanities and Heritage Italian Open Cloud' (H2IOSC)}, KEYWORDS = {Infrastrutture di ricerca, Sostenibilità tecnica, Epigrafia Digitale, Servizi web}, PAGES = {566-571}, URL = {https://amsacta.unibo.it/id/eprint/7927/}, ISBN = {978-88-942535-8-0}, CONFERENCE_NAME = {Me. Te. Digitali. Mediterraneo in rete tra testi e contesti. XIII Convegno Annuale AIUCD2024}, BOOKTITLE = {Me. Te. Digitali. Mediterraneo in rete tra testi e contesti, Proceedings del XIII Convegno Annuale AIUCD2024}, } @INPROCEEDINGS{TOMMASI_2024_INPROCEEDINGS_TZMQ_507161, AUTHOR = {Tommasi, A. and Zavattari, C. and Mallia, M. and Quochi, V.}, TITLE = {REST services for Corpus management Annotation and SearcH}, YEAR = {2024}, ABSTRACT = {This paper presents a back-end software that offers a set of micro web services for the general-purpose management and search of text documents and annotations. Initially developed for a digital epigraphy project, the system focuses on integrating texts and lexicons represented in different paradigms. Nonetheless, the solution is designed to be general and adaptable across various domains}, KEYWORDS = {Corpus and annotation management, Digital Epigraphy}, PAGES = {1-5}, URL = {https://iris.cnr.it/handle/20.500.14243/507161}, CONFERENCE_NAME = {CLARIN Annual Conference 2024}, BOOKTITLE = {Proceedings of the CLARIN Annual Conference 2024}, } @TECHREPORT{QUOCHI_2024_TECHREPORT_Q_514764, AUTHOR = {Quochi, V.}, TITLE = {Epilexo Search: un’interfaccia di fruizione di lessici antichi per il progetto ItAnt}, YEAR = {2024}, ABSTRACT = {Il medesimo documento descrive lo sviluppo di “EpiLexO Search”, una piattaforma web per la fruizione di dati relativi ai lessici elettronici collegati a materiali testuali e bibliografici, nonché a risorse lessicografiche esterne, creati attraverso la piattaforma di editing dei lessici “EpiLexO Editor”. L’obiettivo del progetto è quello di creare uno strumento che possa permettere agli utenti interessati a visualizzare tutte le informazioni peculiari relative ai dati delle iscrizioni codificate secondo gli standard dell’epigrafia digitale-in questo caso di Epidoc-e dei dati relativi al lessico computazionale e dei Linguistic Linked Open Data, tramite l’utilizzo del modello OntoLex-Lemon. Oltre alla consultazione dei lessici, EpiLexO Search include un particolare sistema per la renderizzazione dei testi epigrafici secondo le convenzioni di Leida con annessi dati LOD e dei sistemi per la reperibilità di dati da più fonti per consentire agli utenti di effettuare ricerche incrociate e avanzate. Questo documento esplora le varie fasi della progettazione della piattaforma, dettagliando le specifiche tecniche di ciascun componente e delineando il modello generale per l'organizzazione e la presentazione dei dati}, KEYWORDS = {Digital Lexicography, Linguistic Linked Open Data}, URL = {https://iris.cnr.it/handle/20.500.14243/514764}, } @MISC{ERJAVEC_2024_MISC_EKOOAAAAAAAABBBBBBBCCLCDDDDDDDFFFGGGGGGGHIJJJKKKLLLMMMMMMMMMNNNNOPPPPPPPPPQRRRRRRSSSSTTTVVVVVVVVWYZF_483001, AUTHOR = {Erjavec, T. and Kopp, M. and Ogrodniczuk, M. and Osenova, P. and Agerri, R. and Agirrezabal, M. and Agnoloni, T. and Aires, J. and Albini, M. and Alkorta, J. and Antiba Cartazo, I. and Arrieta, E. and Barcala, M. and Bardanca, D. and Barkarson, S. and Bartolini, R. and Battistoni, R. and Bel, N. and Bonet Ramos, M. D. M. and Calzada Pérez, M. and Cardoso, A. and Çöltekin, Ç. and Coole, M. and Darģis, R. and De Does, J. and De Libano, R. and Depoorter, G. and Depuydt, K. and Diwersy, S. and Dodé, R. and Fernandez, K. and Fernández Rei, E. and Frontini, F. and Garcia, M. and García Díaz, N. and García Louzao, P. and Gavriilidou, M. and Gkoumas, D. and Grigorov, I. and Grigorova, V. and Haltrup Hansen, D. and Iruskieta, M. and Jarlbrink, J. and Jelencsik Mátyus, K. and Jongejan, B. and Kahusk, N. and Kirnbauer, M. and Kryvenko, A. and Ligeti Nagy, N. and Ljubešić, N. and Luxardo, G. and Magariños, C. and Magnusson, M. and Marchetti, C. and Marx, M. and Meden, K. and Mendes, A. and Mochtak, M. and Mölder, M. and Montemagni, S. and Navarretta, C. and Nitoń, B. and Norén, F. M. and Nwadukwe, A. and Ojsteršek, M. and Pančur, A. and Papavassiliou, V. and Pereira, R. and Pérez Lago, M. and Piperidis, S. and Pirker, H. and Pisani, M. and Pol, H. V. D. and Prokopidis, P. and Quochi, V. and Rayson, P. and Regueira, X. L. and Rii, A. and Rudolf, M. and Ruisi, M. and Rupnik, P. and Schopper, D. and Simov, K. and Sinikallio, L. and Skubic, J. and Tamper, M. and Tungland, L. M. and Tuominen, J. and Van Heusden, R. and Varga, Z. and Vázquez Abuín, M. and Venturi, G. and Vidal Miguéns, A. and Vider, K. and Vivel Couso, A. and Vladu, A. I. and Wissik, T. and Yrjänäinen, V. and Zevallos, R. and Fišer, D.}, TITLE = {Linguistically annotated multilingual comparable corpora of parliamentary debates ParlaMint. ana 4. 1}, YEAR = {2024}, ABSTRACT = {ParlaMint 4. 1 is a set of comparable corpora containing transcriptions of parliamentary debates of 29 European countries and autonomous regions, mostly starting in 2015 and extending to mid-2022. The individual corpora comprise between 9 and 126 million words and the complete set contains over 1. 2 billion words. The transcriptions are divided by days with information on the term, session and meeting, and contain speeches marked by the speaker and their role (e. g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. The corpora have extensive metadata, most importantly on speakers (name, gender, MP and minister status, party affiliation), on their political parties and parliamentary groups (name, coalition/opposition status, Wikipedia-sourced left-to-right political orientation, and CHES variables, https: //www. chesdata. eu/). Note that some corpora have further metadata, e. g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The transcriptions are also marked with the subcorpora they belong to ("reference", until 2020-01-30, "covid", from 2020-01-31, and "war", from 2022-02-24). An overview of the statistics of the corpora is avaialable on GitHub in the folder Build/Metadata, in particular for the release 4. 1 at https: //github. com/clarin-eric/ParlaMint/tree/v4. 1/Build/Metadata. The corpora are encoded according to the ParlaMint encoding guidelines (https: //clarin-eric. github. io/ParlaMint/) and schemas (included in the distribution). The ParlaMint. ana linguistic annotation includes tokenization; sentence segmentation; lemmatisation; Universal Dependencies part-of-speech, morphological features, and syntactic dependencies; and the 4-class CoNLL-2003 named entities. Some corpora also have further linguistic annotations, in particular PoS tagging according a language-specific scheme, with their corpus TEI headers giving further details on the annotation vocabularies and tools used. This entry contains the ParlaMint. ana TEI-encoded linguistically annotated corpora; the derived CoNLL-U files along with TSV metadata of the speeches; and the derived vertical files (with their registry file), suitable for use with CQP-based concordancers, such as CWB, noSketch Engine or KonText. Also included is the 4. 1 release of the sample data and scripts available at the GitHub repository of the ParlaMint project at https: //github. com/clarin-eric/ParlaMint and the log files produced in the process of building the corpora for this release. The log files show e. g. known errors in the corpora, while more information about known problems is available in the open issues at the GitHub repository of the project. This entry contains the linguistically marked-up version of the corpus, while the text version, i. e. without the linguistic annotation is also available at http: //hdl. handle. net/11356/1912. Another related resource, namely the ParlaMint corpora machine translated to English ParlaMint-en. ana 4. 1 can be found at http: //hdl. handle. net/11356/1910. As opposed to the previous version 4. 0, this version fixes a number of bugs and restructures the ParlaMint GitHub repository. The DK corpus has been linguistically re-annotated to remove bugs, while its speeches are now also marked with topics. The PT corpus has been extended to 2024-03 and the UA corpus to 2023-11, which also has improved language marking (uk vs. ru) on segments}, KEYWORDS = {ParlaCLARIN, linguistic annotation, pos-tagging, Named Entity Recognition, linguistic dependency annotation, UD}, URL = {https://iris.cnr.it/handle/20.500.14243/483001}, } @MISC{MALLIA_2024_MISC_MBBMPRTZZQ_479301, AUTHOR = {Mallia, M. and Bandini, M. and Bellandi, A. and Murano, F. and Piccini, S. and Rigobianco, L. and Tommasi, A. and Zavattari, C. and Zinzi, M. and Quochi, V.}, TITLE = {DigItAnt. A platform for creating, linking and exploiting LOD lexica with heterogeneous resources}, YEAR = {2024}, ABSTRACT = {Poster presented at the LDL 2024 Workshop, Torino, 25/05/2024}, KEYWORDS = {Linguisti Linked Open Data, Ancient Languages, Digital Historical Linguistics, Language Technology for Digital Humanities}, URL = {https://doi.org/10.5281/zenodo.11384067}, DOI = {10.5281/zenodo.11384066}, } @MISC{MALLIA_2024_MISC_MQ_532940, AUTHOR = {Mallia, M. and Quochi, V.}, TITLE = {DigItAnt Search}, YEAR = {2024}, ABSTRACT = {DigItAnt-search is the GUI web application od the DigItAnt platform, designed to explore, visualise and navigate the different sources of information created or linked within the national ItAnt project (https: //www. prin-italia-antica. unifi. it/). DigItAnt is an innovative platform designed to support historical linguistic and epigraphic studies, and researchers in the creation, management and consultation of digital linguistic resources for the fragmentary ancient languages. DigItAnt-search allows to explore interactively various sources of information in a unified and easily accessible environment}, KEYWORDS = {Historical linguistics, Digital epigraphy, Linguistic Open Linked Data, Web GUI application, Search interface}, URL = {https://github.com/DigItAnt/DigItAnt_search}, } @MISC{PEDONESE_2024_MISC_PKMFQS_561741, AUTHOR = {Pedonese, G. and Khan, A. F. and Mallia, M. and Frontini, F. and Quochi, V. and Squadrito, E.}, TITLE = {Linguistic Linked Open Data for Humanists}, YEAR = {2024}, ABSTRACT = {Having achieved popularity as a way of publishing and accessing data in different fields of the sciences and for sharing large encyclopaedic datasets such as DBpedia (derived from Wikipedia), linked data is becoming more and more popular in different areas of the humanities. In this course we will present a comprehensive introduction to the creation, publication, and use of linked open data for anyone who wants to work with linguistic datasets – such as lexicons and corpora – and especially for those who come from a linguistic or humanist background. We will look at the basics of linked data and the Semantic Web and introduce the various different standards technologies that make up the Semantic Web stack before focusing on the particular case of linked data language resources. During the course we will study the most important tools, vocabularies, and resources available in the Semantic Web and provide hands-on training for the creation and querying of linguistic linked data. We will look at how Semantic Web technologies can contribute to the creation of FAIR language resources as well as how to publish your resource on the linked open data cloud. We will also show how the Semantic Web query language SPARQL can be a powerful tool for data exploration}, KEYWORDS = {Linked Open Data, Linguistics}, URL = {https://iris.cnr.it/handle/20.500.14243/561741}, DOI = {10.5281/zenodo.13897931}, } @ARTICLE{MURANO_2023_ARTICLE_MQDRZ_459322, AUTHOR = {Murano, F. and Quochi, V. and Del Grosso, A. M. and Rigobianco, L. and Zinzi, M.}, TITLE = {Describing Inscriptions of Ancient Italy. The ItAnt Project and Its Information Encoding Process}, YEAR = {2023}, ABSTRACT = {This paper discusses the challenges addressed in the digital scholarly encoding of the fragmentary texts of the languages of Ancient Italy according to the TEI/EpiDoc Guidelines in XML format. This contribution describes the solutions and customisations that have been adopted for dealing with the peculiarities of our epigraphical documentation and with the formalisation of epigraphical information deemed interesting for data retrieval in a historical linguistic perspective. The making of a digital corpus consisting of new critical editions of selected inscriptions is a work carried out in the context of the project "Languages and Cultures of Ancient Italy. Historical Linguistics and Digital Models", which aims to investigate the languages of Ancient Italy by combining the traditional methods, proper to historical linguistics, with methods and technologies proper to the digital humanities and computational lexicography. More specifically, the purpose of the project is to create a collection of interrelated digital language resources which comprise: 1) the digital corpus of texts editions; 2) a computational lexicon compliant with the Web Semantic requirements; 3) a relevant bibliographic reference dataset encoded according to the FRBRoo/LRMoo specifications. Additionally, selected textual data and scientific interpretations will be encoded by using CIDOC CRM and its extensions, namely CRMtex and CRMinf. The present contribution tackles one of the main aspects of the project, and proposes significant innovations in the encoding of critical editions for epigraphic texts of fragmentary languages, which will hopefully foster future interoperability and integration with other external datasets, a paramount concern of the project}, KEYWORDS = {text encoding, ancient languages, digital epigraphy, TEI/EpiDoc}, PAGES = {15}, URL = {https://dl.acm.org/doi/pdf/10.1145/3606703}, VOLUME = {16}, DOI = {10.1145/3606703}, ISSN = {1556-4711}, JOURNAL = {JOURNAL ON COMPUTING AND CULTURAL HERITAGE}, } @INPROCEEDINGS{BOSCHETTI_2023_INPROCEEDINGS_BRQ_479742, AUTHOR = {Boschetti, F. and Rigobianco, L. and Quochi, V.}, TITLE = {Domain-Specific Languages for Epigraphy: The Case of ItAnt}, YEAR = {2023}, ABSTRACT = {ItAnt is a research project devoted to the languages and cultures of ancient Italy witnessed by a digital collection of inscriptions. This contribution illustrates how the definition of a Domain-Specific Language can support the activity of the epigraphists involved in the project by increasing the human readability of the encoded data without sacrificing the compliance to standard models and formats. Finally, an example of concrete use of the encoded texts within the CLARIN-IT DigItAnt platform will be briefly described}, KEYWORDS = {Domain-specific languages, Digital Scholarly Editions, Ancient Languages, Digital Humanities, Text Representation, TEI EpiDoc}, PAGES = {5}, URL = {https://office.clarin.eu/v/CE-2023-2328_CLARIN2023_ConferenceProceedings.pdf}, CONFERENCE_NAME = {CLARIN Annual Conference 2023}, BOOKTITLE = {CLARIN Annual Conference Proceedings 2023}, } @INPROCEEDINGS{MARTELLI_2023_INPROCEEDINGS_MBCCCGKKKKLLNOPQSSTUN_479241, AUTHOR = {Martelli, F. and Bejgu, A. S. and Campagnano, C. and Cibej, J. and Costa, R. and Gantar, A. and Kallas, J. and Koeva, S. and Koppel, K. and Krek, S. and Langemets, M. and Lipp, V. and Nimb, S. and Olsen, S. and Pedersen, B. S. and Quochi, V. and Salgado, A. and Simon, L. and Tiberius, C. and Urena Ruiz, R. J. and Navigli, R.}, TITLE = {XL-WA: a Gold Evaluation Benchmark for Word Alignment in 14 Language Pairs}, YEAR = {2023}, ABSTRACT = {Word alignment plays a crucial role in several Natural Language Processing tasks, such as lexicon injection and cross-lingual label projection. The evaluation of word alignment systems relies heavily on manually-curated datasets, which are not always available, especially in mid-and low-resource languages. In order to address this limitation, we propose XL-WA, a novel entirely manually-curated evaluation benchmark for word alignment covering 14 language pairs. We illustrate the creation process of our benchmark and compare statistical and neural approaches to word alignment in both language-specific and zero-shot settings, thus investigating the ability of state-of-the-art models to generalize on unseen language pairs. We release our new benchmark at: https: //github. com/SapienzaNLP/XL-WA}, KEYWORDS = {Deep Learning, Multilinguality, Natural Language Processing, Word Alignment}, PAGES = {9}, URL = {https://iris.cnr.it/handle/20.500.14243/479241}, VOLUME = {3596}, PUBLISHER = {CEUR-WS}, CONFERENCE_NAME = {9th Italian Conference on Computational Linguistics, CLiC-it 2023}, BOOKTITLE = {Proceedings of the Ninth Italian Conference on Computational Linguistics}, } @MISC{MALLIA_2023_MISC_MQ_545243, AUTHOR = {Mallia, M. and Quochi, V.}, TITLE = {EpiLexO}, YEAR = {2023}, ABSTRACT = {EpiLexO is a user friendly web application for the creation and editing of an integrated system of language resources for ancient fragmentary languages centered on the lexicon, in compliance with current digital humanities and Linked Open Data principles. EpiLexo allows for the editing of lexica with all relevant cross-references: for their linking to their testimonies, as well as to bibliographic information and other (external) resources and common vocabularies. This front-end application rests on a Service-Oriented Architecture with two main back-end components, the LexO-server (\handle) and the CASH-server (1github), which manage lexica and textual documents respectively via Rest-ful APIs web-services, plus additional services for the management of other aspects such as access and authentication, XML rendering, etc. All code is available on https: //github. com/DigItAnt/ The application has been developed in the context of a project on the languages of fragmentary attestation of ancient Italy, but can be applied to other similar contexts}, KEYWORDS = {Historical linguistics, Digital epigraphy, Linguistic Open Linked Data, Web GUI application, Lexicon Editor}, URL = {https://github.com/DigItAnt/LexO-angular-dev}, } @INCOLLECTION{DELFANTE_2022_INCOLLECTION_DFMQ_419162, AUTHOR = {Del Fante, D. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {Italian Language Resources. From CLARIN-IT to the VLO and Back: Sketching a Methodology for Monitoring LRs Visibility}, YEAR = {2022}, ABSTRACT = {This paper sketches a user-oriented, qualitative methodology for both (i) monitoring the existence and availability of language resources relevant for a given CLARIN national community and language and (ii) assessing the offering potential of CLARIN, in terms of Language Resources provided to national consortia. From the user perspective, the methodology has been applied to investigate the visibility of language resources available for Italian within the CLARIN central services, in particular the Virtual Language Observatory. As a proof-of-concept, the methodology has been tested on the resources available through the CLARIN-IT data centres, but, ideally, it could be applied by any national data centre aiming to assess the existence of LRs in CLARIN for any given languages and check their accessibility for the interested users. It is thus argued that such an assessment might be a useful instrument in the hands of national coordinators and centre managers for (i) bringing to the fore both strengths and critical issues about their data providing community and (ii) for planning targeted actions to improve and increase both visibility and accessibility of their LRs}, KEYWORDS = {Virtual Language Observatory, CLARIN-IT, CLARIN-ERIC, Qualitative Assessment Methodology, User Involvement}, PAGES = {10-22}, URL = {https://ecp.ep.liu.se/index.php/clarin/article/view/413/371}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, BOOKTITLE = {Selected Papers from the CLARIN Annual Conference 2021}, } @INPROCEEDINGS{AGNOLONI_2022_INPROCEEDINGS_ABFMMQRV_446358, AUTHOR = {Agnoloni, T. and Bartolini, R. and Frontini, F. and Montemagni, S. and Marchetti, C. and Quochi, V. and Ruisi, M. and Venturi, G.}, TITLE = {Making Italian Parliamentary Records Machine-Actionable: the Construction of the ParlaMint-IT corpus}, YEAR = {2022}, ABSTRACT = {This paper describes the process of acquisition, cleaning, interpretation, coding and linguistic annotation of a collection of parliamentary debates from the Senate of the Italian Republic covering the COVID-19 pandemic emergency period and a former period for reference and comparison according to the CLARIN ParlaMint prescriptions. The corpus contains 1199 sessions and 79, 373 speeches for a total of about 31 million words, and was encoded according to the ParlaCLARIN TEI XML format. It includes extensive metadata about the speakers, sessions, political parties and parliamentary groups. As required by the ParlaMint initiative, the corpus was also linguistically annotated for sentences, tokens, POS tags, lemmas and dependency syntax according to the universal dependencies guidelines. Named entity annotation and classification is also included. All linguistic annotation was performed automatically using state-of-the-art NLP technology with no manual revision. The Italian dataset is freely available as part of the larger ParlaMint 2. 1 corpus deposited and archived in CLARIN repository together with all other national corpora. It is also available for direct analysis and inspection via various CLARIN services and has already been used both for research and educational purposes}, KEYWORDS = {parliamentary debates, CLARIN ParlaMint, corpus creation, corpus annotation}, PAGES = {117-124}, URL = {https://aclanthology.org/2022.parlaclarin-1.17/}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-85-6}, CONFERENCE_NAME = {Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of The Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference}, } @INPROCEEDINGS{DELFANTE_2022_INPROCEEDINGS_DFMQ_416549, AUTHOR = {Del Fante, D. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {CLARIN-IT: An Overview on the Italian Clarin Consortium After Six Years of Activity}, YEAR = {2022}, ABSTRACT = {This paper offers an overview of the Italian CLARIN consortium after six years since its establishment. The members, the centres and the repositories and the most important collections are described. Lastly, in order to showcase the visibility and the accessiblity of Language Resources provided by CLARIN-IT from a user-perspective, we show how Italian resources are findable within CLARIN ERI}, KEYWORDS = {Language Resources, Data Repositories and Archives, Research Infrastructures, CLARIN}, PAGES = {8}, URL = {http://ceur-ws.org/Vol-3160/short21.pdf}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, CONFERENCE_NAME = {Italian Research Conference on Digital Libraries}, CONFERENCE_PLACE = {Aachen}, BOOKTITLE = {Proceedings of the 18th Italian Research Conference on Digital Libraries}, EDITOR = {Di Nunzio, G. M. and Portelli, B. and Redavid, D. and Silvello, G.}, } @INPROCEEDINGS{QUOCHI_2022_INPROCEEDINGS_QBKMMPRTZ_412363, AUTHOR = {Quochi, V. and Bellandi, A. and Khan, F. and Mallia, M. and Murano, F. and Piccini, S. and Rigobianco, L. and Tommasi, A. and Zavattari, C.}, TITLE = {From Inscriptions to Lexica and Back: A Platform for Editing and Linking the Languages of Ancient Italy}, YEAR = {2022}, ABSTRACT = {Available language technology is hardly applicable to scarcely attested ancient languages, yet their digital semantic representation, though challenging, is an asset for the purpose of sharing and preserving existing cultural knowledge. In the context of a project on the languages and cultures of ancient Italy, we took up this challenge. This paper thus describes the development of a user friendly web platform, EpiLexO, for the creation and editing of an integrated system of language resources for ancient fragmentary languages centered on the lexicon, in compliance with current digital humanities and Linked Open Data principles. EpiLexo allows for the editing of lexica with all relevant cross-references: for their linking to their testimonies, as well as to bibliographic information and other (external) resources and common vocabularies. The focus of the current implementation is on the languages of ancient Italy, in particular Oscan, Faliscan, Celtic and Venetic; however, the technological solutions are designed to be general enough to be potentially applicable to different contexts and scenarios}, KEYWORDS = {Digital Epigraphy, Restsprachen, Lexicon Editing and Linking, tools for DH}, PAGES = {59-67}, URL = {https://aclanthology.org/2022.lt4hala-1.0/}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {979-10-95546-78-8}, CONFERENCE_NAME = {Second Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2022)}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2022)}, EDITOR = {Sprugnoli, R. and Passarotti, M.}, } @INPROCEEDINGS{QUOCHI_2022_INPROCEEDINGS_QBMTZ_412366, AUTHOR = {Quochi, V. and Bellandi, A. and Mallia, M. and Tommasi, A. and Zavattari, C.}, TITLE = {Supporting Ancient Historical Linguistics and Cultural Studies with EpiLexO}, YEAR = {2022}, ABSTRACT = {This contribution presents a system of independent software components meant to support the creation of ecosystems of interrelated language data (i. e. lexica linked to textual testimonies, concepts, metadata, bibliographic references, and other external lexical resources) according to the current state-of-the-art representational models for the semantic web. The system is implemented as a set of autonomous servers exposing Restful APIs that in principle can serve different frontend applications and use cases. In this work they serve the EpiLexO GUI application designed and geared to support scholars of ancient languages of fragmentary attestation in their studies. The development of both the back-ends and the front-end is still work-in progress, but a first version is ready for use}, KEYWORDS = {tools for DH, ancient languages, restsprachen, lexicon editor, corpus management, lexicon-text linking}, PAGES = {39-43}, URL = {https://office.clarin.eu/v/CE-2022-2118-CLARIN2022_ConferenceProceedings.pdf}, ISSN = {2022-2118}, CONFERENCE_NAME = {CLARIN Annual Conference 2022}, BOOKTITLE = {CLARIN Annual Conference Proceedings 2022}, EDITOR = {Erjavec, T. and Eskevich, M.}, } @INPROCEEDINGS{MARINETTI_2022_INPROCEEDINGS_MMQBBDPRSZMMM_436338, AUTHOR = {Marinetti, A. and Murano, F. and Quochi, V. and Ballerini, M. and Boschetti, F. and Del Grosso, A. M. and Piccini, S. and Rigobianco, L. and Solinas, P. and Zinzi, M. and Monachini, M. and Mallia, M. and Middei, E.}, TITLE = {Challenges in Encoding Fragmentary Attested Languages}, YEAR = {2022}, ABSTRACT = {The ItAnt project investigates the langages of ancient Italy, whose only attestation consist in epigraphic evidence, focusing on Venetic, Oscan, Faliscan and Celtic languages. For this purpose, the project combines the traditional method proper to historical linguistics with the setting up of digital technologies, developing computational tools specifically designes to create a digital set of interrelated resources}, KEYWORDS = {digital epigraphy, eLexicography, Linguistic Linked Open Data, Text Encoding}, URL = {https://ciegl2022.sciencesconf.org/resource/page/id/30}, CONFERENCE_NAME = {XVI Congresso Internazionale di Epigrafia greca e latina}, } @TECHREPORT{MARTELLI_2022_TECHREPORT_MMCNVUFQKKLDTTCSKIDGM_412365, AUTHOR = {Martelli, F. and Maru, M. and Campagnano, C. and Navigli, R. and Velardi, P. and Ureñaruiz, R. and Frontini, F. and Quochi, V. and Kallas, J. and Koppel, K. and Langemets, M. and De Does, J. and Tempelaars, R. and Tiberius, C. and Costa, R. and Salgado, A. and Krek, S. and Ibej, J. and Dobrovoljc, K. and Gantar, P. and Munda, T.}, TITLE = {D3. 8 Lexical-semantic analytics for NLP}, YEAR = {2022}, ABSTRACT = {The present document illustrates the work carried out in task 3. 3 (work package 3) focused on lexicalsemantic analytics for Natural Language Processing (NLP). This task aims at computing analytics for lexicalsemantic information such as words, senses and domains in the available resources, investigating their role in NLP applications. Specifically, this task concentrates on three research directions, namely i) which grouping senses based on their semantic similari sense clustering, in ty improves the performance of NLP tasks such as Word Sense Disambiguation (WSD), ii) domain labeling of text, in which the lexicographic resources made available by the ELEXIS project for research purposes allow better performances to be achieved, and fin senses ally iii) analysing the, for which a software package is made available. diachronic distribution of In this deliverable, we illustrate the research activities aimed at achieving the aforementioned goals and put forward suggestions for future works. Importantly, we stress the crucial role played by highquality lexicalsemantic r esources when investigating such linguistic aspects and their impact on NLP applications. To this end, as an additional contribution, we address the paucity of manually the ELEXIS parallelannotated data in the lexical senseannotated datasetsemantic research field and introduce, a novel entirely manuallyavailable in 10 European languages and featuring 5 annotation layers}, KEYWORDS = {research infrastructures, lexicography, lexical resources, word-sense disambiguation, WSD, sense-annotated language data, multilinguality}, PAGES = {67}, URL = {https://elex.is/wp-content/uploads/ELEXIS_D3_8_Lexical-Semantic_Analytics_for_NLP_final_report.pdf}, } @TECHREPORT{TASOVAC_2022_TECHREPORT_TTBBBCUFHHMKKKKMMMMMQARSSVWWZ_446092, AUTHOR = {Tasovac, T. and Tiberius, C. and Bamberg, C. and Bellandi, A. and Burch, T. and Costa, R. and Uro, M. and Frontini, F. and Hennemann, J. and Heylen, K. and Milojakubíek and Khan, F. and Klee, A. and Kosem, I. and Ková, V. and Matuka, O. and McCrae, J. and Monachini, M. and Mörth, K. and Munda, T. and Quochi, V. and Andrarepar and Roche, C. and Salgado, A. and Sievers, H. and Váradi, T. and Weyand, S. and Woldrich, A. and Zhanial, S.}, TITLE = {D5. 3 Overview of Online Tutorials and Instruction Manuals}, YEAR = {2022}, ABSTRACT = {The ELEXIS Curriculum is an integrated set of training materials which contextualizes ELEXIS tools and services inside a broader, systematic pedagogic narrative. This means that the goal of the ELEXIS Curriculum is not simply to inform users about the functionalities of particular tools and services developed within the project, but to show how such tools and services are a) embedded in both lexicographic theory and practice; and b) representative of and contributing to the development of digital skills among lexicographers. The scope and rationale of the curriculum are described in more detail in the Deliverable D5. 2 Guidelines for Producing ELEXIS Tutorials and Instruction Manuals. The goal of this deliverable, as stated in the project DOW, is to provide "a clear, structured overview of tutorials and instruction manuals developed within the project. "}, KEYWORDS = {ELEXIS, lexicography, training materials}, PAGES = {31}, URL = {https://elex.is/wp-content/uploads/ELEXIS_D5_3_Overview-of-Online-Tutorials-and-Instruction-Manuals.pdf}, } @MISC{FRONTINI_2022_MISC_FBQMMZUW_441101, AUTHOR = {Frontini, F. and Bellandi, A. and Quochi, V. and Monachini, M. and Mörth, K. and Zhanial, S. and Ďurčo, M. and Woldrich, A.}, TITLE = {CLARIN Tools and Resources for Lexicographic Work}, YEAR = {2022}, ABSTRACT = {This course introduces lexicographers to the CLARIN Research Infrastructure and highlights language resources and tools useful for lexicographic practices. The course consists of two parts. In Part 1, you will learn about CLARIN, its technical and knowledge infrastructure, and about how to deposit and find lexical resources in CLARIN. In Part 2, you will become acquainted with CLARIN tools that can be used to create lexical resources}, KEYWORDS = {CLARIN, lexicography}, URL = {https://elexis.humanistika.org/id/UnwYPq70Dewbn7XDEjsMM}, } @MISC{MARTELLI_2022_MISC_MNKKGKNSOLKKDUSLVGLQMFTTCSIM_446359, AUTHOR = {Martelli, F. and Navigli, R. and Krek, S. and Kallas, J. and Gantar, P. and Koeva, S. and Nimb, S. and Sandford Pedersen, B. and Olsen, S. and Langemets, M. and Koppel, K. and Üksik, T. and Dobrovoljc, K. and Ureñaruiz, R. and Sanchosánchez, J. and Lipp, V. and Váradi, T. and Gyrffy, A. and László, S. and Quochi, V. and Monachini, M. and Frontini, F. and Tiberius, C. and Tempelaars, R. and Costa, R. and Salgado, A. and Ibej, J. and Munda, T.}, TITLE = {Parallel sense-annotated corpus ELEXIS-WSD 1. 0}, YEAR = {2022}, ABSTRACT = {ELEXIS-WSD is a parallel sense-annotated corpus in which content words (nouns, adjectives, verbs, and adverbs) have been assigned senses. Version 1. 0 contains sentences for 10 languages: Bulgarian, Danish, English, Spanish, Estonian, Hungarian, Italian, Dutch, Portuguese, and Slovene. The corpus was compiled by automatically extracting a set of sentences from WikiMatrix (Schwenk et al., 2019), a large open-access collection of parallel sentences derived from Wikipedia, using an automatic approach based on multilingual sentence embeddings. The sentences were manually validated according to specific formal, lexical and semantic criteria (e. g. by removing incorrect punctuation, morphological errors, notes in square brackets and etymological information typically provided in Wikipedia pages). To obtain a satisfying semantic coverage, we filtered out sentences with less than 5 words and less than 2 polysemous words were filtered out. Subsequently, in order to obtain datasets in the other nine target languages, for each selected sentence in English, the corresponding WikiMatrix translation into each of the other languages was retrieved. If no translation was available, the English sentence was translated manually. The resulting corpus is comprised of 2, 024 sentences for each language}, KEYWORDS = {Word Sense Disambiguation, corpus parallelo, disambiguazione automatica del senso, annotazione semantica multilingue}, URL = {https://iris.cnr.it/handle/20.500.14243/446359}, } @MISC{QUOCHI_2022_MISC_QB_446071, AUTHOR = {Quochi, V. and Bellandi, A.}, TITLE = {LexO editor: the basics-video tutorial}, YEAR = {2022}, ABSTRACT = {Video tutorial sull'uso di LexO, un editor di lessici secondo il modello Ontolex-lemon. Il tutoria è parte dell' ELEXIS training programme disponibile sulla piattaforma DARIAH-teach}, KEYWORDS = {lexicon editor, video tutorial, training material, lexO, online web application}, URL = {https://www.youtube.com/watch?v=9KE0laMaTAs\&list=PLoD829qNERpYKq8JRkY4EIGgZCdi0QHOd}, } @INPROCEEDINGS{DELFANTE_2021_INPROCEEDINGS_DFMQ_447069, AUTHOR = {Del Fante, D. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {CLARIN-IT Resources in CLARIN ERIC-a Bird's-Eye View}, YEAR = {2021}, ABSTRACT = {The paper investigates the visibility of CLARIN-IT language resources within the services of the CLARINERICcentral infrastructure, notably the Virtual Language Observatory, the Switchboard and the Federated Content Search, from a user perspective in order to identify possible issues. While the experiment focused on one national consortium, the ultimate goal is to develop an assessment methodology that can be used by any national consortia aiming to review the accessibility of their resources and tools within the CLARIN central services}, KEYWORDS = {FAIR, research infrastructure for SSH, language resources, findability, CLARIN}, PAGES = {129-133}, URL = {https://office.clarin.eu/v/CE-2021-1923-CLARIN2021_ConferenceProceedings.pdf}, ISSN = {2021-1923}, CONFERENCE_NAME = {CLARIN Annual Conference 2021}, } @INPROCEEDINGS{MARTELLI_2021_INPROCEEDINGS_MNKTKGKNPOLKKDUSLVGLQMFTCSIM_443238, AUTHOR = {Martelli, F. and Navigli, R. and Krek, S. and Tiberius, C. and Kallas, J. and Gantar, P. and Koeva, S. and Nimb, S. and Pedersen, B. S. and Olsen, S. and Langements, M. and Koppel, K. and Üksik, T. and Dobrovolijc, K. and Ureña Ruiz, R. J. and Sancho Sánchez, J. L. and Lipp, V. and Váradi, T. and Győrffy, A. and László, S. and Quochi, V. and Monachini, M. and Frontini, F. and Tempelaars, R. and Costa, R. and Salgado, A. and Čibej, J. and Munda, T.}, TITLE = {Designing the ELEXIS Parallel Sense-Annotated Dataset in 10 European Languages}, YEAR = {2021}, ABSTRACT = {Over the course of the last few years, lexicography has witnessed the burgeoning of increasingly reliable automaticapproaches supporting the creation of lexicographic resources such as dictionaries, lexical knowledge bases andannotated datasets. In fact, recent achievements in the field of Natural Language Processing and particularly inWord Sense Disambiguation have widely demonstrated their effectiveness not only for the creation of lexicographicresources, but also for enabling a deeper analysis of lexical-semantic data both within and across languages. Nevertheless, we argue that the potential derived from the connections between the two fields is far from exhausted. In this work, we address a serious limitation affecting both lexicography and Word Sense Disambiguation, i. e. thelack of high-quality sense-annotated data and describe our efforts aimed at constructing a novel entirely manuallyannotated parallel dataset in 10 European languages. For the purposes of the present paper, we concentrate on theannotation of morpho-syntactic features. Finally, unlike many of the currently available sense-annotated datasets, we will annotate semantically by using senses derived from high-quality lexicographic repositories}, KEYWORDS = {Digital lexicography, Word Sense Disambiguation, Computational Linguistics, Corpus Linguistics, Natural Language Processing}, PAGES = {377-395}, URL = {https://static-curis.ku.dk/portal/files/279888836/eLex_2021_22_pp377_395.pdf}, VOLUME = {2021}, PUBLISHER = {Lexical Computing (Brno, CZE)}, CONFERENCE_NAME = {eLex 2021}, CONFERENCE_PLACE = {Brno}, BOOKTITLE = {Electronic lexicography in the 21st century (eLex 2021): Post-editing lexicography}, } @INPROCEEDINGS{MARINETTI_2021_INPROCEEDINGS_MMQBBDPRS_449219, AUTHOR = {Marinetti, A. and Murano, F. and Quochi, V. and Ballerini, M. and Boschetti, F. and Del Grosso, A. M. and Piccini, S. and Rigobianco, L. and Solinas, P.}, TITLE = {Languages and Cultures of Ancient Italy. Historical Linguistics and Digital Models}, YEAR = {2021}, ABSTRACT = {The abstract accompanies a poster presenting an overview of the project "Languages and cultures of Ancient Italy", which had just started. The project brings together competences from Historical Linguistics, Computational Lexicography and Digital Humanities. The main objective of the project is to investigate the cultures of ancient Italy on the basis of theirlinguistic documentation (7th-1stc. B. C.) by means of digital tools specifically tailored for their peculiarities}, KEYWORDS = {digital epigraphy, computational lexicons, text -lexicon linking, restsprachen, digital models, digital humanities}, PAGES = {528-532}, URL = {https://amsacta.unibo.it/id/eprint/6712/}, ISBN = {9788894253559}, CONFERENCE_NAME = {10th National Conference of Associazione per l'Informatica Umanistica e la Cultura Digitale}, BOOKTITLE = {AIUCD 2021-Book of Extended Abstracts}, } @MISC{ERJAVEC_2021_MISC_EOOLSGRPKBSVDDJHNCDVMLCAFMQVRMBSRDUPBKMBCDLR_446076, AUTHOR = {Erjavec, T. and Ogrodniczuk, M. and Osenova, P. and Ljubei, N. and Simov, K. and Grigorova, V. and Rudolf, M. and Panur, A. and Kopp, M. and Barkarson, S. and Steingrímsson, S. and Van Der Pol, H. and Depoorter, G. and De Does, J. and Jongejan, B. and Haltrup Hansen, D. and Navarretta, C. and Calzada Pérez, M. and D De Macedo, L. and Van Heusden, R. and Marx, M. and Çöltekin, Ç. and Coole, M. and Agnoloni, T. and Frontini, F. and Montemagni, S. and Quochi, V. and Venturi, G. and Ruisi, M. and Marchetti, C. and Battistoni, R. and Sebk, M. and Ring, O. and Daris, R. and Utka, A. and Petkeviius, M. and Briediené, M. and Krilaviius, T. and Morkeviius, V. and Bartolini, R. and Cimino, A. and Diwersy, S. and Luxardo, G. and Rayson, P.}, TITLE = {Linguistically annotated multilingual comparable corpora of parliamentary debates ParlaMint. ana 2. 1}, YEAR = {2021}, ABSTRACT = {ParlaMint 2. 1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (from November 1st 2019), or being "reference" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e. g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e. g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https: //clarin-eric. github. io/parla-clarin/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the linguistically marked-up version of the corpus, while the text version is available at http: //hdl. handle. net/11356/1432. The ParlaMint. ana linguistic annotation includes tokenization, sentence segmentation, lemmatisation, Universal Dependencies part-of-speech, morphological features, and syntactic dependencies, and the 4-class CoNLL-2003 named entities. Some corpora also have further linguistic annotations, such as PoS tagging or named entities according to language-specific schemes, with their corpus TEI headers giving further details on the annotation vocabularies and tools}, KEYWORDS = {covid-19, ParlaCLARIN, CLARIN, linguistic annotation, pos-tagging, Named Entity Recognition, linguistic dependency annotation, UD, dibattiti parlamentari, parlamenti, discorso politico}, URL = {https://iris.cnr.it/handle/20.500.14243/446076}, } @MISC{ERJAVEC_2021_MISC_EOOLSGRPKBSVDDJHNCDVMLCAFMQVRMBSRDUPBKMBCDLR_446080, AUTHOR = {Erjavec, T. and Ogrodniczuk, M. and Osenova, P. and Ljubei, N. and Simov, K. and Grigorova, V. and Rudolf, M. and Panur, A. and Kopp, M. and Barkarson, S. and Steingrímsson, S. and Van Der Pol, H. and Depoorter, G. and De Does, J. and Jongejan, B. and Haltrup Hansen, D. and Navarretta, C. and Calzada Pérez, M. and D De Macedo, L. and Van Heusden, R. and Marx, M. and Çöltekin, Ç. and Coole, M. and Agnoloni, T. and Frontini, F. and Montemagni, S. and Quochi, V. and Venturi, G. and Ruisi, M. and Marchetti, C. and Battistoni, R. and Sebk, M. and Ring, O. and Daris, R. and Utka, A. and Petkeviius, M. and Briediené, M. and Krilaviius, T. and Morkeviius, V. and Bartolini, R. and Cimino, A. and Diwersy, S. and Luxardo, G. and Rayson, P.}, TITLE = {Multilingual comparable corpora of parliamentary debates ParlaMint 2. 1}, YEAR = {2021}, ABSTRACT = {ParlaMint 2. 1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (after November 1st 2019), or being "reference" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e. g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e. g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https: //clarin-eric. github. io/parla-clarin/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the ParlaMint TEI-encoded corpora with the derived plain text version of the corpus along with TSV metadata on the speeches. Also included is the 2. 0 release of the data and scripts available at the GitHub repository of the ParlaMint project. Note that there also exists the linguistically marked-up version of the corpus, which is available at http: //hdl. handle. net/11356/1431}, KEYWORDS = {ParlaMint, ParlaCLARIN, dibattiti parlamentari, covid-19, discorso politico, CLARIN}, URL = {https://iris.cnr.it/handle/20.500.14243/446080}, } @TECHREPORT{BARTOLINI_2020_TECHREPORT_BQMA_398626, AUTHOR = {Bartolini, R. and Quochi, V. and Monachini, M. and Affé, F.}, TITLE = {Relazione di fine progetto "PIM-Piattaforma Integrata Monitoraggio"}, YEAR = {2020}, ABSTRACT = {Il documento presenta l'attività svolta dal CNR-ILC nel ruolo di subcontraente di COMDATA per la realizzazione di moduli di trattamento automatico del linguaggio e la consulenza per l'integrazione di metodi di clustering automatico di documenti nella Digital Library del progetto PIM}, KEYWORDS = {accesso intelligente al testo, digital library, natural language processing}, PAGES = {156}, URL = {https://iris.cnr.it/handle/20.500.14243/398626}, } @BOOK{FIER_2019_BOOK_FLABDDDDDEGHKLMMNNNNNNOOQRSSTVVV_407479, AUTHOR = {Fier, D. and Lenardi, J. and Auzia, I. and Bernstein Ratner, N. and De Smedt, K. and Dobrovoljc, K. and Dodé, R. and Domeij, R. and Dyvik, H. and Erjavec, T. and Gerassimenko, O. and Haji, J. and Ken, M. and Ljubei, N. and Macwhinney, B. and Monachini, M. and Nava, B. and Navarreta, C. and Nedyalkova, A. and Nielsen, K. and Noémi Vadászlaak, M. and Nylund Skog, S. and Offersgaard, L. and Osenova, P. and Quochi, V. and Reinsone, S. and Skadia, I. and Simov, K. and Tichý, O. and Vadász, N. and Váradi, T. and Vider, K.}, TITLE = {Tour de CLARIN Two}, YEAR = {2019}, ABSTRACT = {The second volume of Tour de CLARIN is organized into two parts. In Part 1, we present the seven countries which have been featured: Estonia, Latvia, Denmark, Italy, Slovenia, Hungary, and Bulgaria. In this part, each country is presented with five chapters: an introduction to the consortium, their members and their work; a description of one of their key resources; a presentation of an outstanding tool; an account of a successful event for the researchers and students in their network; and an interview with a renowned researcher from the digital humanities or social sciences who has successfully used the consortium infrastructure in their research}, KEYWORDS = {CLARIN, CLARIN Consortia, CLARIN resources}, PAGES = {1-87}, URL = {https://doi.org/10.5281/zenodo.3754164}, DOI = {10.5281/zenodo.3754164.svg}, PUBLISHER = {CLARIN-Common language resources technology infrastructure (Utrecht, NLD)}, ISBN = {9789082990911}, CONFERENCE_PLACE = {Utrecht}, EDITOR = {Fi?er, D. and Lenardi?, J.}, } @INCOLLECTION{MONACHINI_2019_INCOLLECTION_MQ_407476, AUTHOR = {Monachini, M. and Quochi, V.}, TITLE = {Tour de CLARIN: Italy}, YEAR = {2019}, ABSTRACT = {Il Tour de CLARIN è un'iniziativa di CLARIN ERIC che mira a evidenziare periodicamente importanti attività di coinvolgimento degli utenti di un particolare consorzio nazionale CLARIN. Dopo aver visitato 11 paesi, a febbraio e marzo 2019 il Tour de CLARIN si ferma in Italia per visitare CLARIN-IT. La tappa italiana del Tour de CLARIN è iniziata con un post sul blog di CLARIN che ha presentato il consorzio italiano: "Tour de CLARIN: Italia" (01/03/2019)}, KEYWORDS = {CLARIN, CLARIN Consortia}, PAGES = {40-42}, URL = {https://office.clarin.eu/v/CE-2019-1537-Tour-de-CLARIN-volume-II-2019.pdf}, DOI = {10.5281/zenodo.3754164}, PUBLISHER = {CLARIN-Common language resources technology infrastructure (Utrecht, NLD)}, ISSN = {2019-1537}, ISBN = {9789082990911}, CONFERENCE_PLACE = {Utrecht}, BOOKTITLE = {Tour de CLARIN Volume Two}, EDITOR = {Fiser, D. and Lenardic, J.}, } @INPROCEEDINGS{DOMINUTTI_2019_INPROCEEDINGS_DPDMQ_390432, AUTHOR = {Dominutti, E. and Pifferi, L. and Dell'Orletta, F. and Montemagni, S. and Quochi, V.}, TITLE = {Building an Italian written-spoken parallel corpus: A pilot study}, YEAR = {2019}, ABSTRACT = {This paper presents a pilot study towards the creation of a monolingual written-spoken parallel corpus in Italian, featuring two main novelties in the general landscape of spoken corpora: the alignment with the written counterpart of the same content and the spoken variety dealt with, represented by transcriptions of radio news broadcasting}, URL = {https://iris.cnr.it/handle/20.500.14243/390432}, } @INCOLLECTION{NICOLAS_2018_INCOLLECTION_NKMDCAEBQV_371916, AUTHOR = {Nicolas, L. and König, A. and Monachini, M. and Del Gratta, R. and Calamai, S. and Abel, A. and Enea, A. and Biliotti, F. and Quochi, V. and Vincenzo Stella, F.}, TITLE = {CLARIN-IT: State of Affairs, Challenges and Opportunities}, YEAR = {2018}, ABSTRACT = {his paper gives an overview on the Italian national CLARIN consortium as it currently stands two years after its creation at the end of 2015. It thus discusses the current state of affairs of the consortium on several aspects, especially with regards to members. It also discusses the events and initiatives that have been undertaken, as well as the ones that are planned in the close future. It finally outlines the conclusions of a user survey performed to understand the expectations of a targeted user population and provides indications regarding the next steps planned}, KEYWORDS = {CLARIN-IT Consortium Pisa Bolzano Siena}, PAGES = {1-14}, URL = {http://www.ep.liu.se/ecp/contents.asp?issue=147}, ISBN = {978-91-7685-273-6}, BOOKTITLE = {Selected papers from the CLARIN Annual Conference 2017, Budapest, 18-20 September 2017}, } @INPROCEEDINGS{SORIA_2018_INPROCEEDINGS_SQR_371917, AUTHOR = {Soria, C. and Quochi, V. and Russo, I.}, TITLE = {The DLDP Survey on Digital Use and Usability of EU Regional and Minority Languages}, YEAR = {2018}, ABSTRACT = {This paper reports about the design, the results and the key findings of a survey launched by the Digital Language Diversity Project about the digital use and usability of regional and minority languages. The aim of the survey-the first of this kind-was to investigate the real needs and expectations of European minority language speakers regarding digital opportunities. The focus on four languages (Basque, Breton, Karelian and Sardinian) at different stages of digital development offers a starting point to develop strategies for assessing digital vitality of these languages and overcoming specific difficulties}, KEYWORDS = {minority languages, digital survival, electronic communication}, PAGES = {4155-4160}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/684.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @TECHREPORT{BARONI_2018_TECHREPORT_BQRSCGHKSS_462551, AUTHOR = {Baroni, P. and Quochi, V. and Russo, I. and Soria, C. and Ceberio Berger, K. and Gurrutxaga Hernaiz, A. and Hicks, D. and Kruse, E. and Salonen, T. and Sarhimaa, A.}, TITLE = {Kit per la sopravvivenza digitale della lingua sarda-Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua sarda}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua sarda (versione italiana)}, KEYWORDS = {digital vitality, digital diversity, digital language sur, recommendations, Sardinian}, PAGES = {12}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Sardinian_IT.pdf}, } @TECHREPORT{CEBERIOBERGER_2018_TECHREPORT_CGBHKQRSSS_428764, AUTHOR = {Ceberio Berger, K. and Gurrutxaga Hernaiz, A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Kit de Supervivencia Lingüística Digital del Euskera-Recomendaciones del DLDP para mejorar la Vitalidad Digital del euskera}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del basco (versione spagnola)}, KEYWORDS = {digital diversity, digital vitality, recommendations, Basque, digital survival}, PAGES = {28}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Basque_ES.pdf}, } @TECHREPORT{CEBERIOBERGER_2018_TECHREPORT_CGBHKQRSSS_428763, AUTHOR = {Ceberio Berger, K. and Gurrutxaga Hernaiz, A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Euskarak Mundu Digitalean Bizirauteko Kita-DLDPren gomendioak, euskararen bizitasun digitala hobetu dadin}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale della lingua basca (versione basca)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Basque}, PAGES = {27}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Basque_EU.pdf}, } @TECHREPORT{CEBERIOBERGER_2018_TECHREPORT_CGBHKQRSSS_428733, AUTHOR = {Ceberio Berger, K. and Gurrutxaga Hernaiz, A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Digital Language Survival Kit}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale delle lingue (versione inglese integrale)}, KEYWORDS = {sopravvivenza digitale, lingue minoritarie, less-resourced languages}, PAGES = {38}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_460815, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio Berger, K. and Gurrutxaga Hernaiz, A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Diversità Linguistica Digitale: la Roadmap-Raccomandazioni strategiche & Sequenza}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione italiana sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_IT.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_428760, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio Berger, K. and Gurrutxaga Hernaiz, A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Roadmap}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione inglese integrale)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {19}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Roadmap.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_460811, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio Berger, K. and Gurrutxaga Hernaiz, A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {DLDP etenemissuunnitelma-Toimenpidesuunnitelmat ja aikajana}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione finlandese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_FI.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_462989, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio Berger, K. and Gurrutxaga Hernaiz, A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {La Roadmap DLDP-Recommandations de politique et calendrier}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione francese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_FR.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_460807, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Roadmap-Policy Recommendations & Timeline}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione inglese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_EN.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_460814, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {La DLDP Hoja de Ruta-Políticas recomendadas & Cronograma}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione spagnola sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_ES.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_462988, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Die DLDP Roadmap-Strategieempfehlungen & Zeitplan}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione tedesca sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_DE.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_460816, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {DLDP Bide Orria-Gomendatutako politikak & Kronograma}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione basca sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_EU.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HSBCGKQRSS_421382, AUTHOR = {Hicks, D. and Soria, C. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A.}, TITLE = {Pak treuzveviñ ar Brezhoneg niverel-Erbedoù an DLDP evit gwellaat buhezegezh niverel ar brezhoneg}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del bretone (versione bretone)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Breton_BR.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HSBCGKQRSS_421387, AUTHOR = {Hicks, D. and Soria, C. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A.}, TITLE = {Kit de survie numerique pour la langue bretonne-Les recommandations du DLDP pour améliorer la vitalité numérique du Breton}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del bretone (versione francese)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Breton_FR.pdf}, } @TECHREPORT{SALONEN_2018_TECHREPORT_SBCGHKQRSS_462987, AUTHOR = {Salonen, T. and Baroni, P. and Ceberio Berger, K. and Gurrutxaga Hernaiz, A. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Sarhimaa, A. and Soria, C.}, TITLE = {Karjalan digitualine hengihjiämispakkavus-DLDP-rekomendatsiet karjalan kielen digitualizen elinvoimazuon kohendamizeh}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua careliana (versione careliana)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Karelian}, PAGES = {12}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Karelian_KRL.pdf}, } @TECHREPORT{SALONEN_2018_TECHREPORT_SBCGHKQRSS_421393, AUTHOR = {Salonen, T. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Sarhimaa, A. and Soria, C.}, TITLE = {Karjalan digitaalinen kielenselviytymispakkaus-DLDP-suositukset karjalan kielen digitaalisen elinvoimaisuuden parantamiseksi}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale della lingua careliana (versione finlandese)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Karelian}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Karelian_FI.pdf}, } @MISC{CEBERIO_2018_MISC_CGSRQ_420185, AUTHOR = {Ceberio, K. and Gurrutxaga, A. and Soria, C. and Russo, I. and Quochi, V.}, TITLE = {How to Use the Digital Language Vitality Scale}, YEAR = {2018}, ABSTRACT = {The Digital Language Vitality Scale is an instrument developed within the framework of the Digital Language Diversity Project (www. dldp. eu) for estimating the degree of digital vitality of any given language. It aims to be an instrument for self-assessment of the digital vitality of any language, although it is aimed in particular at identifying current gaps, needs and requirements regarding the extent to which a language community is active/vital on digital media and devices so that adequate digital language planning can be done. This document instructs prospective adopters on how to best use it}, KEYWORDS = {Diversità Linguistica, BLARK, Sopravvivenza linguistica digitale}, PAGES = {18}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Vitality-Scale.pdf}, } @INPROCEEDINGS{NICOLAS_2017_INPROCEEDINGS_NKMDCAEBQ_330362, AUTHOR = {Nicolas, L. and Konig, A. and Monachini, M. and Del Gratta, R. and Calamai, S. and Abel, A. and Enea, A. and Biliotti, F. and Quochi, V.}, TITLE = {CLARIN-IT: State of Affairs, Challenges and Opportunities}, YEAR = {2017}, ABSTRACT = {This paper provides an overview on the Italian national CLARIN consortium and the status of CLARIN-IT in general. It thus discusses the current state of affairs of the consortium and provi-des information on the members, especially with regards to what they offer to CLARIN in terms of resources, services and expertise, and what CLARIN offers them to further their own research}, KEYWORDS = {Italian CLARIN consortium, CLARIN-IT}, PAGES = {4}, URL = {https://www.clarin.eu/event/2017/clarin-annual-conference-2017-budapest-hungary}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, } @MISC{SORIA_2017_MISC_SRQ_334735, AUTHOR = {Soria, C. and Russo, I. and Quochi, V.}, TITLE = {Reports on Digital Language Diversity in Europe}, YEAR = {2017}, ABSTRACT = {In these reports we present the results of the first survey about the actual needs of European minority languages speakers in terms of digital opportunities}, KEYWORDS = {regional languahges, minority languages, digital vitality, digital use}, URL = {http://www.dldp.eu/content/reports-digital-language-diversity-europe}, } @INCOLLECTION{QUOCHI_2016_INCOLLECTION_Q_320606, AUTHOR = {Quochi, V.}, TITLE = {Development and representation of Italian light-fare constructions}, YEAR = {2016}, ABSTRACT = {The essay describes the study of the development and use of light fare 'do' constructions in Child-directed Speech and in Child Language with the twofold goal of showing that a Construction Grammar approach is viable, and of providing support to usage-based, functional predictions on language acquisition. The analysis of naturalistic data derived from the CHILDES database lead to two main findings: first, a representation of fare Light Verb Constructions as a family of constructions organized like a radial category is not only possible but more explicative, second, there exists a 'fare' pivot schema that children generalize at an early stage because it serves the purpose of naming new events, activities or situations}, KEYWORDS = {Corpus linguistic, Language Acquisition, Construction Grammar, phraseology}, PAGES = {39-64}, URL = {https://benjamins.com/#catalog/books/cal.19.03quo/details}, DOI = {10.1075/cal.19.03quo}, PUBLISHER = {John Benjamins Publishing Company (Amsterdam/Philadelphia, USA)}, ISBN = {9789027204417}, CONFERENCE_PLACE = {Amsterdam/Philadelphia}, BOOKTITLE = {Corpus-based Approaches to Construction Grammar}, EDITOR = {Yoon, J. and Th Gries, S.}, } @INPROCEEDINGS{SORIA_2016_INPROCEEDINGS_SRQHGST_324686, AUTHOR = {Soria, C. and Russo, I. and Quochi, V. and Hicks, D. and Gurrutxaga, A. and Sarhimaa, A. and Tuomisto, M.}, TITLE = {Fostering digital representation of EU regional and minority languages: the Digital Language Diversity Project}, YEAR = {2016}, ABSTRACT = {Poor digital representation of minority languages further prevents their usability on digital media and devices. The Digital Language Diversity Project, a three-year project funded under the Erasmus programme, aims at addressing the problem of low digital representation of EU regional and minority languages by giving their speakers the intellectual an practical skills to create, share, and reuse online digital content. Availability of digital content and technical support to use it are essential prerequisites for the development of language-based digital applications, which in turn can boost digital usage of these languages. In this paper we introduce the project, its aims, objectives and current activities for sustaining digital usability of minority languages through adult education}, KEYWORDS = {Less-resourced languages, Language Technology, digital language vitality, digital language diversity}, PAGES = {3256-3260}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Paris}, } @INPROCEEDINGS{DELGRATTA_2015_INPROCEEDINGS_DFMPRBGKQSC_307390, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Goggi, S. and Khan, F. and Quochi, V. and Soria, C. and Calzolari, N.}, TITLE = {Visualising Italian Language Resources: a Snapshot}, YEAR = {2015}, ABSTRACT = {This paper aims to provide a first snapshot of Italian Language Resources (LRs) and their uses by the community, as documented by the papers presented at two different conferences, LREC2014 and CLiC-it 2014. The data of the former were drawn from the LOD version of the LRE Map, while those of the latter come from manually analyzing the proceedings. The results are presented in the form of visual graphs and confirm the initial hypothesis that Italian LRs require concrete actions to enhance their visibility}, KEYWORDS = {Italian Language Resources}, PAGES = {100-104}, URL = {https://books.openedition.org/aaccademia/1277?lang=it}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, BOOKTITLE = {Proceedings of the Second Italian Conference on Computational Linguistics CLiC-it 2015}, EDITOR = {Bosco, C. and Tonelli, S. and Zanzotto, F. M.}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FQM_267184, AUTHOR = {Frontini, F. and Quochi, V. and Monachini, M.}, TITLE = {Generative Lexicon and polysemy: inducing logical alternations}, YEAR = {2015}, ABSTRACT = {The current paper brings together the results of a series of experiments for inducing regular sense alternations, or regular/ logical polysemy, from a computational lexicon based on the Generative Lexicon theory. The results are discussed in light of the potential benefits and uses of the amended algorithm}, KEYWORDS = {Polysemy, Generative Lexicon, Logical Alternations}, PAGES = {7}, URL = {https://iris.cnr.it/handle/20.500.14243/267184}, PUBLISHER = {MAPLEX2015 Multiple Approaches to Lexicon Conference (Yamagata, JPN)}, CONFERENCE_NAME = {MAPLEX2015 Multiple Approaches to Lexicon Conference}, CONFERENCE_PLACE = {Yamagata}, EDITOR = {Hsieh, S. K. and Kanzaki, K.}, } @ARTICLE{SORIA_2014_ARTICLE_SCMQBCMOP_260814, AUTHOR = {Soria, C. and Calzolari, N. and Monachini, M. and Quochi, V. and Bel, N. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {The language resource Strategic Agenda: the FLaReNet synthesis of community recommendations}, YEAR = {2014}, ABSTRACT = {The main purpose of this paper is to serve as a landmark for future research and in particular for future strategic, infrastructural and coordination initiatives. It presents a preliminary plan for actions and infrastructures that could become the basis for future initiatives in the sector of Language Resources and Technologies (LRTs). The FLaReNet Language Resource Strategic Agenda presents a set of recommen-dations for the development and progress of LRT in Europe, as issued from a three-year consultation of the FLaReNet European project. Recommendations cover a broad range of topics and activities, spanning over production and use of language resources, licensing, maintenance and preservation issues, infrastructures for language resour-ces, resource identification and sharing, evaluation and validation, interoperability and policy issues. The intended recipients belong to a large set of players and stakeholders in LRT, ranging from individuals to research and education institutions, to policy-makers, funding agencies, SMEs and large companies, service and media providers}, KEYWORDS = {Strategic agenda, Language resources planning, Recommended priority actions}, PAGES = {753-775}, URL = {https://iris.cnr.it/handle/20.500.14243/260814}, VOLUME = {48 (4)}, DOI = {10.1007/s10579-014-9279-y}, ISSN = {1574-020X}, JOURNAL = {LANGUAGE RESOURCES AND EVALUATION}, } @INCOLLECTION{CALZOLARI_2014_INCOLLECTION_CMQST_259412, AUTHOR = {Calzolari, N. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Lexicons, Terminologies, Ontologies: Reflections from Experiences in Resource Construction}, YEAR = {2014}, ABSTRACT = {This contribution aims at highlighting the strong interconnection between lexicons, terminologies and ontologies and especially the fundamental role that ontologies and lexica mutually play. Our view is that lexical resources are evolving in nature, from ontologically based lexicons we are going towards lexically based ontologies. We explore different instantiations of the current trend of using formal ontologies as a core module of computational lexicons, presenting the advantages especially in multilingual and terminological contexts. We present work showing that the lexical knowledge already present in non formal computational lexicons can be exploited to derive or enrich a formal ontology without much manual effort. In the terminology domain, we describe the construction of a resource for biology, directly linked to a parallel domain-ontology, that combines characteristics of both lexicons and terminologies, so that is can allow for intelligent access to content. Finally, we describe our experience in two projects in which formal ontologies play a central role in the context of multilingual computational lexicons, where the ontology is what acts as the glue among the different monolingual lexicons and what provides cross-lingual reasoning capabilities}, KEYWORDS = {Computational Lexicons, Ontology, Terminology, Interoperability, Standards}, PAGES = {103-121}, URL = {http://www.springer.com/computer/ai/book/978-3-642-45326-7}, DOI = {10.1007/978-3-642-45327-4_7}, PUBLISHER = {Springer (Berlin Heidelberg, DEU)}, ISBN = {978-3-642-45326-7}, CONFERENCE_PLACE = {Berlin Heidelberg}, BOOKTITLE = {Language, Culture, Computation. Computational Linguistics and Linguistics. Essays Dedicated to Yaacov Choueka on the Occasion of His 75th Birthday, Part III}, EDITOR = {Dershowitz, N. and Nissan, E.}, } @INPROCEEDINGS{ANTICO_2014_INPROCEEDINGS_AQMM_259425, AUTHOR = {Antico, G. and Quochi, V. and Monachini, M. and Martinelli, M.}, TITLE = {Marrying Technical Writing with LRT}, YEAR = {2014}, ABSTRACT = {In the last years the Technical Writer operational scenarios and the workflow sensibly changed; specifically, "free style" writing-or manual writing-has become outdated and technical writing is now much more concerned with structured management of content than in the past. Technical writing has become more demanding due to a number of factors among which the rise and spread of mobile devices usage. This paper discusses the new needs of technical writing and content management business and how LRT can help it improve quality and productivity}, KEYWORDS = {controlled language, technical writing, content management systems}, PAGES = {19-25}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Paris}, EDITOR = {Isahara, H. and Lee, K. S. C. S. and Nam, S.}, } @INPROCEEDINGS{BARTOLINI_2014_INPROCEEDINGS_BQDRM_228452, AUTHOR = {Bartolini, R. and Quochi, V. and De Felice, I. and Russo, I. and Monachini, M.}, TITLE = {From Synsets to Videos: Enriching ItalWordNet Multimodally}, YEAR = {2014}, ABSTRACT = {The paper describes the multimodal enrichment of ItalWordNet action verbs' entries by means of an automatic mapping with a conceptual ontology of action types instantiated by video scenes (ImagAct). The two resources present significative differences as well as interesting complementary features, such that a mapping of these two resources can lead to a an enrichment of IWN, through the connection between synsets and videos apt to illustrate the meaning described by glosses. Here, we describe an approach inspired by ontology matching methods for the automatic mapping of ImagAct video scenes onto ItalWordNet. The experiments described in the paper are conducted on Italian, but the same methodology can be extended to other languages for which WordNets have been created, since ImagAct is available also for English, Chinese and Spanish. This source of multimodal information can be exploited to design second language learning tools, as well as for language grounding in action recognition in video sources and potentially for robotics}, KEYWORDS = {Action ontology, Multimodality, WordNet}, PAGES = {3110-3117}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {LREC 2014. European Language Resources Association ELRA: Paris (Francia)}, CONFERENCE_PLACE = {Paris}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{DEFELICE_2014_INPROCEEDINGS_DBRQM_224510, AUTHOR = {De Felice, I. and Bartolini, R. and Russo, I. and Quochi, V. and Monachini, M.}, TITLE = {Evaluating ImagAct-WordNet mapping for English and Italian through videos}, YEAR = {2014}, ABSTRACT = {In this paper we present the results of the evaluation of an automatic mapping between two lexical resources, WordNet/ItalWordNet and ImagAct, a conceptual ontology of action types instantiated by video scenes. Results are compared with those obtained from a previous experiment performed only on Italian data. Differences between the two evaluation strategies, as well as between the quality of the mappings for the two languages considered in this paper, are iscussed}, KEYWORDS = {Language Resources (LRs)}, PAGES = {128-131}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-CLICit-2014.pdf}, DOI = {10.12871/CLICIT2014126}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-88-67-41472-7}, CONFERENCE_NAME = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 \& the Fourth International Workshop EVALITA 2014. Pisa University Press srl: Pisa (Italia)}, CONFERENCE_PLACE = {Pisa}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{FRONTINI_2014_INPROCEEDINGS_FQM_259129, AUTHOR = {Frontini, F. and Quochi, V. and Monachini, M.}, TITLE = {Polysemy alternations extraction using the PAROLE SIMPLE CLIPS Italian lexicon}, YEAR = {2014}, ABSTRACT = {This paper presents the results of an experiment of polysemy alternations induction from a lexicon (Utt and Pad´o, 2011; Frontini et al., 2014), discussing the results and proposing an amendment in the original algorithm}, KEYWORDS = {Language Resources and Technologies}, PAGES = {175-179}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-CLICit-2014.pdf}, DOI = {10.12871/CLICIT2014134}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-88-67-41472-7}, CONFERENCE_NAME = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 \& the Fourth International Workshop EVALITA 2014}, CONFERENCE_PLACE = {Pisa}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{FRONTINI_2014_INPROCEEDINGS_FQPUM_222781, AUTHOR = {Frontini, F. and Quochi, V. and Padó, S. and Utt, J. and Monachini, M.}, TITLE = {Polysemy Index for Nouns: an Experiment on Italian using the PAROLE SIMPLE CLIPS Lexical Database}, YEAR = {2014}, ABSTRACT = {An experiment is presented to induce a set of polysemous basic type alternations (such as ANIMAL-FOOD, or BUILDING-INSTITUTION) by deriving them from the sense alternations found in an existing lexical resource. The paper builds on previous work and applies those results to the Italian lexicon PAROLE SIMPLE CLIPS. The new results show how the set of frequent type alternations that can be induced from the lexicon is partly different from the set of polysemy relations selected and explicitly applied by lexicographers when building it. The analysis of mismatches shows that frequent type alternations do not always correspond to prototypical polysemy relations, nevertheless the proposed methodology represents a useful tool offered to lexicographers to systematically check for possible gaps in their resource}, KEYWORDS = {Polysemy, lexical resources, semantics}, PAGES = {2955-2963}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {LREC 2014 Ninth International Conference on Language Resources and Evaluation Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{PANUNZI_2014_INPROCEEDINGS_PDGJMMQR_257368, AUTHOR = {Panunzi, A. and De Felice, I. and Gregori, L. and Jacoviello, S. and Monachini, M. and Moneglia, M. and Quochi, V. and Russo, I.}, TITLE = {Translating action verbs using a dictionary of images: the IMAGACT ontology}, YEAR = {2014}, ABSTRACT = {Action verbs have many meanings, covering actions in different ontological types. Moreover, each language categorizes action in its own way. One verb can refer to many different actions and one action can be identified by more than one verb. The range of variations within and across languages is largely unknown, causing trouble in all translation tasks. IMAGACT is a corpus-based ontology of action concepts, derived from English and Italian spontaneous speech corpora, which makes use of the universal language of images to identify the different action types extended by verbs referring to action in English, Italian, Chinese and Spanish. This paper presents the IMAGACT search interface and the various kinds of linguistic information the user can derive from it. IMAGACT makes explicit the variation of meaning of action verbs within one language and allows comparisons of verb variations within and across languages. Because the action concepts are represented with videos, extension into new languages beyond those presently implemented in IMAGACT is done using competence-based judgments by mother-tongue informants, without intense lexicographic work involving underdetermined semantic descriptions}, KEYWORDS = {Action verbs, Image ontology, Multilingual dictionary, Computer-aided translation}, PAGES = {1163-1170}, URL = {http://euralex2014.eurac.edu/en/callforpapers/Documents/EURALEX%202014_gesamt.pdf}, DOI = {10.13140/2.1.3719.2320}, PUBLISHER = {EURAC (Bolzano, ITA)}, ISBN = {978-88-88906-97-3}, CONFERENCE_NAME = {XVI EURALEX International Congress: The User in Focus}, CONFERENCE_PLACE = {Bolzano}, BOOKTITLE = {Proceedings of the XVI EURALEX International Congress: The User in Focus}, EDITOR = {Abel, A. and Vettori, C. and Ralli, N.}, } @INPROCEEDINGS{KHAN_2013_INPROCEEDINGS_KFDMQ_259365, AUTHOR = {Khan, F. and Frontini, F. and Del Gratta, R. and Monachini, M. and Quochi, V.}, TITLE = {Generative Lexicon Theory and Linguistic Linked Open Data}, YEAR = {2013}, ABSTRACT = {In this paper we look at how Generative Lexicon theory can assist in providing a more thorough definition of word senses as links between items in a RDF-based lexicon and concepts in an ontology. We focus on the definition of lexical sense in lemon and show its limitations before defining a new model based on lemon and which we term lemonGL. This new model is an initial attempt at providing a way of structuring lexico-ontological resources as linked data in such a way as to allow a rich representation of word meaning (following the GL theory) while at the same time (attempting to) re-main faithful to the separation between the lexicon and the ontology as recommended by the lemon model}, URL = {https://iris.cnr.it/handle/20.500.14243/259365}, } @INPROCEEDINGS{CASELLI_2012_INPROCEEDINGS_CFQRR_222834, AUTHOR = {Caselli, T. and Frontini, F. and Quochi, V. and Rubino, F. and Russo, I.}, TITLE = {Flexible Acquisition of Subcategorization Frames in Italian}, YEAR = {2012}, ABSTRACT = {Lexica of predicate-argument structures constitute a useful tool for several tasks in NLP. This paper describes a web-service system for automatic acquisition of verb subcategorization frames (SCFs) from parsed data in Italian. The system acquires SCFs in an unsupervised manner. We created two gold standards for the evaluation of the system, the first by mixing together information from two lexica (one manually created and the second automatically acquired) and manual exploration of corpus data and the other annotating data extracted from a specialized corpus (environmental domain). Data filtering is accomplished by means of the maximum likelihood estimate (MLE). The evaluation phase has allowed us to identify the best empirical MLE threshold for the creation of a lexicon (P=0. 653, R=0. 557, F1=0. 601). In addition to this, we assigned to the extracted entries of the lexicon a confidence score based on the relative frequency and evaluated the extractor on domain specific data. The confidence score will allow the final user to easily select the entries of the lexicon in terms of their reliability: one of the most interesting feature of this work is the possibility the final users have to customize the results of the SCF extractor, obtaining different SCF lexica in terms of size and accuracy}, KEYWORDS = {lexicon, automatic acquisition, subcategorisation frames}, PAGES = {2842-2848}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/summaries/390.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {9782951740877}, CONFERENCE_NAME = {Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{DELGRATTA_2012_INPROCEEDINGS_DFMQRAL_117790, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Quochi, V. and Rubino, F. and Abrate, M. and Lo Duca, A.}, TITLE = {L-LEME: an Automatic Lexical Merger based on the LMF Standard}, YEAR = {2012}, ABSTRACT = {The present paper describes LMF LExical MErger (L-LEME), an architecture to combine two lexicons in order to obtain new resource(s). L-LEME relies on standards, thus exploiting the benefits of the ISO Lexical Markup Framework (LMF) to ensure interoperability. L-LEME is meant to be dynamic and heavily adaptable: it allows the users to configure it to meet their specific needs. The L-LEME architecture is composed of two main modules: the Mapper, which takes in input two lexicons A and B and a set of user-defined rules and instructions to guide the mapping process (Directives D) and gives in output all matching entries. The algorithm also calculates a cosine similarity score. The Builder takes in input the previous results, a set of Directives D1 and produces a new LMF lexicon C. The Directives allow the user to define its own building rules and different merging scenarios. L-LEME is applied to a specific concrete task within the PANACEA project, namely the merging of two Italian SubCategorization Frame (SCF) lexicons. The experiment is interesting in that A and B have different philosophies behind, being A built by human introspection and B automatically extracted. Ultimately, L-LEME has interesting repercussions in many language technology applications}, KEYWORDS = {LMF, Lexicon mapping, similarity score}, PAGES = {31-40}, URL = {https://iris.cnr.it/handle/20.500.14243/117790}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC) 2012}, BOOKTITLE = {Proceedings of the LREC 2012 Workshop on Language Resource Merging}, EDITOR = {Bel, N. and Gavrilidou, M. and Monachini, M. and Quochi, V. and Rimell, L.}, } @INPROCEEDINGS{FRONTINI_2012_INPROCEEDINGS_FQR_128272, AUTHOR = {Frontini, F. and Quochi, V. and Rubino, F.}, TITLE = {Automatic Creation of Quality Multi-Word Lexica from Noisy Text Data}, YEAR = {2012}, ABSTRACT = {This paper describes the design of a tool for the automatic creation of multi-word lexica that is deployed as a web service and runs on automatically web-crawled data within the framework of the PANACEA platform. The main purpose of our task is to provide a (computationally "light") tool that creates a full high quality lexical resource of multi-word items. Within the platform, this tool is typically inserted in a work flow whose first step is automatic web-crawling. Therefore, the input data of our lexical extractor is intrinsically noisy. The paper evaluates the capacity of the tool to deal with noisy data, and in particular with texts containing a significant amount of duplicated paragraphs. The accuracy of the extraction of multi-word expressions from the original crawled corpus is compared to the accuracy of the extraction from a later "de-duplicated" version of the corpus. The paper shows how our method can extract with sufficiently good precision also from the original, noisy crawled data. The output of our tool is a multi-word lexicon formatted and encoded in XML according to the Lexical Mark-up Framework}, KEYWORDS = {Lexical induction, multi-word extraction, web-based distributed platform, noisy data}, URL = {http://www.kde.cs.tut.ac.jp/~aono/pdf/COLING2012/AND/pdf/AND04.pdf}, PUBLISHER = {ACM, Association for computing machinery (New York, USA)}, ISBN = {978-1-4503-1919-5}, CONFERENCE_NAME = {AND 2012}, CONFERENCE_PLACE = {New York}, BOOKTITLE = {Proceedings of the Sixth Workshop on Analytics for Noisy Unstructured Text Data}, } @INPROCEEDINGS{POCH_2012_INPROCEEDINGS_PTHQB_259420, AUTHOR = {Poch, M. and Toral, A. and Hamon, O. and Quochi, V. and Bel, N.}, TITLE = {Towards a User-Friendly Platform for Building Language Resources based on Web Services}, YEAR = {2012}, ABSTRACT = {This paper presents the platform developed in the PANACEA project, a distributed factory that automates the stages involved in the acquisition, production, updating and maintenance of Language Resources required by Machine Translation and other Language Technologies. We adopt a set of tools that have been successfully used in the Bioinformatics field, they are adapted to the needs of our field and used to deploy web services, which can be combined to build more complex processing chains (workflows). This paper describes the platform and its different components (web services, registry, workflows, social network and interoperability). We demonstrate the scalability of the platform by carrying out a set of massive data experiments. Finally, a validation of the platform across a set of required criteria proves its usability for different types of users (non-technical users and providers)}, KEYWORDS = {service platform, workflow, interoperability}, PAGES = {1156-1163}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/543_Paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {Eighth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of the Eighth International Conference on Language Resources and Evaluation, LREC 2012}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{QUOCHI_2012_INPROCEEDINGS_QFR_128266, AUTHOR = {Quochi, V. and Frontini, F. and Rubino, F.}, TITLE = {A MWE Acquisition and Lexicon Builder Web Service}, YEAR = {2012}, ABSTRACT = {This paper describes the development of a web-service tool for the automatic extraction of Multi-word expressions lexicons, which has been integrated in a distributed platform for the automatic creation of linguistic resources. The main purpose of the work described is thus to provide a (computationally "light") tool that produces a full lexical resource: multi-word terms/items with relevant and useful attached information that can be used for more complex processing tasks and applications (e. g. parsing, MT, IE, query expansion, etc.). The output of our tool is a MW lexicon formatted and encoded in XML according to the Lexical Mark-up Framework. The tool is already functional and available as a service. Evaluation experiments show that the tool precision is of about 80%}, KEYWORDS = {Multiword extraction, lexical resources, LMF, web services.}, PAGES = {2291-2306}, URL = {http://aclweb.org/anthology/C/C12/C12-1140.pdf}, PUBLISHER = {Curran Associates (Red Hook, NY 12571, USA)}, ISBN = {9781627483896}, CONFERENCE_NAME = {International Conference on Computational Linguistics (COLING)}, CONFERENCE_PLACE = {Red Hook, NY 12571}, BOOKTITLE = {Proceedings of COLING 2012: Technical Papers}, EDITOR = {Kay, M. and Boitet, C.}, } @INPROCEEDINGS{RUBINO_2012_INPROCEEDINGS_RFQ_128261, AUTHOR = {Rubino, F. and Frontini, F. and Quochi, V.}, TITLE = {Integrating NLP Tools in a Distributed Environment: A Case Study Chaining a Tagger with a Dependency Parser}, YEAR = {2012}, ABSTRACT = {The present paper tackles the issue of PoS tag conversion within the framework of a distributed web service platform for the automatic creation of language resources. PoS tagging is now considered a "solved problem"; yet, because of the differences in the tagsets, interchange of the various PoS taggers vailable is still hampered. In this paper we describe the implementation of a PoS-tagged-corpus converter, which is needed for chaining together in a workflow the FreeLing PoS tagger for Italian and the DESR dependency parser, given that these two tools have been developed independently. The conversion problems experienced during the implementation, related to the properties of the different tagsets and of tagset conversion in general, are discussed together with the solutions adopted. Finally, the converter is evaluated by assessing the impact of conversion on the performance of the dependency parser by comparing with the outcome of the native pipeline. From this we learn that in most cases parsing errors are due to actual tagging errors, and not to conversion itself. Besides, information on accuracy loss is an important feature in a distributed environment of (NLP) services, where users need to decide which services best suit their needs}, KEYWORDS = {PoS tag conversion, interoperability, NLP pipelines}, PAGES = {2125-2131}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/summaries/726.html}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {9782951740877}, CONFERENCE_NAME = {Language Resources and Evaluation Conference 2012}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{SORIA_2012_INPROCEEDINGS_SBCMMOPQC_5324, AUTHOR = {Soria, C. and Bel, N. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Calzolari, N.}, TITLE = {The FLaReNet Strategic Language Resource Agenda}, YEAR = {2012}, ABSTRACT = {The FLaReNet Strategic Agenda highlights the most pressing needs for the sector of Language Resources and Technologies and presents a set of recommendations for its development and progress in Europe, as issued from a three-year consultation of the FLaReNet European project. The FLaReNet recommendations are organised around nine dimensions: a) documentation b) interoperability c) availability, sharing and distribution d) coverage, quality and adequacy e) sustainability f) recognition g) development h) infrastructure and i) international cooperation. As such, they cover a broad range of topics and activities, spanning over production and use of language resources, licensing, maintenance and preservation issues, infrastructures for language resources, resource identification and sharing, evaluation and validation, interoperability and policy issues. The intended recipients belong to a large set of players and stakeholders in Language Resources and Technology, ranging from individuals to research and education institutions, to policy-makers, funding agencies, SMEs and large companies, service and media providers. The main goal of these recommendations is to serve as an instrument to support stakeholders in planning for and addressing the urgencies of the Language Resources and Technologies of the future}, KEYWORDS = {strategic agenda, language resources planning, recommended priority actions}, PAGES = {1379-1386}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, BOOKTITLE = {Proceedings of the 8th international conference on Language Resources and Evaluation (LREC2012)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Dogan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{QUOCHI_2012_INPROCEEDINGS_Q_128313, AUTHOR = {Quochi, V.}, TITLE = {How predictive are grammatical constructions in Italian? The case of the caused-motion construction}, YEAR = {2012}, ABSTRACT = {Differently from English, Italian has a rich morpho logical system and a relative free word-order. For these reasons, the suitability of a "full-scope" constructional approach to Italian is not given. Although Goldberg's (1995, 2006) version of Construction grammar language is constructions all the way down (or up), one could still argue that in Italian, i. e. a language rich in morphology, abstract, grammatical constructions do not play a role, thus weakening the constructionist view. One of the strong points in favour of goldberg's approach is that argument structure constructions in English have been found to be highly predictive of sentence meaning (Goldberg et al. 2005), which provides a motivationfor their early acquisition by children. Many of such studies and evidences are still missing for Italian. This contribution will therefore attempt to start filling this gap by testing the predictive power of the Italian Caused Motion Construction. Data is taken from the CHILDES database (MacWhinney 2000) and annotated according to constructional properties and verb meaning. The annotation is then used to calculate the Cue and Category Validity (Murphy 2002) of both the Construction and the main verbs, which measures their predictive power (i. e respectively their reliability and availability) in relation to the overall sentence meaning. Results show that the Italian Caused Motion Construction is not only more reliable than verbs as a predictor of overall sentence meaning, but it is also more available}, KEYWORDS = {Construction Grammar, Psicolinguistica, Linguistica del corpus}, PAGES = {265-265}, URL = {http://www.sle2012.eu/downloads/Book_abstracts_SLE2012_23aug_final.pdf}, CONFERENCE_NAME = {45th Annual Meeting of the Societas Linguistica Europaea (SLE2012)}, } @TECHREPORT{POCH_2012_TECHREPORT_PHQDTTPB_129399, AUTHOR = {Poch, M. and Hamon, O. and Quochi, V. and Del Gratta, R. and Toral, A. and Thurmair, G. and Prokopidis, P. and Bel, N.}, TITLE = {D3. 4 Third version (v4) of the integrated platform and documentation}, YEAR = {2012}, ABSTRACT = {The deliverable describes the third and final version of the PANACEA platform}, KEYWORDS = {infrastrutture, Trattamento del linguaggio naturale}, URL = {https://iris.cnr.it/handle/20.500.14243/129399}, } @TECHREPORT{QUOCHI_2012_TECHREPORT_QFBHPPBTTK_130130, AUTHOR = {Quochi, V. and Frontini, F. and Bartolini, R. and Hamon, O. and Poch Riera, M. and Padro, M. and Bel, N. and Thurmair, G. and Toral, A. and Kamran, A.}, TITLE = {D7. 4 Third evaluation report. Evaluation of PANACEA v3 and produced resources}, YEAR = {2012}, ABSTRACT = {D7. 4 reports on the evaluation of the different components integrated in the PANACEA third cycle of development as well as the final validation of the platform itself. All validation and evaluation experiments follow the evaluation criteria already described in D7. 1. The main goal of WP7 tasks was to test the (technical) functionalities and capabilities of the middleware that allows the integration of the various resource-creation components into an interoperable distributed environment (WP3) and to evaluate the quality of the components developed in WP5 and WP6. The content of this deliverable is thus complementary to D8. 2 and D8. 3 that tackle advantages and usability in industrial scenarios. It has to be noted that the PANACEA third cycle of development addressed many components that are still under research. The main goal for this evaluation cycle thus is to assess the methods experimented with and their potentials for becoming actual production tools to be exploited outside research labs. For most of the technologies, an attempt was made to re-interpret standard evaluation measures, usually in terms of accuracy, precision and recall, as measures related to a reduction of costs (time and human resources) in the current practices based on the manual production of resources. In order to do so, the different tools had to be tuned and adapted to maximize precision and for some tools the possibility to offer confidence measures that could allow a separation of the resources that still needed manual revision has been attempted. Furthermore, the extension to other languages in addition to English, also a PANACEA objective, has been evaluated. The main facts about the evaluation results are now summarized}, KEYWORDS = {PANACEA, evaluation, machine translation}, URL = {https://iris.cnr.it/handle/20.500.14243/130130}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQ_130143, AUTHOR = {Rimell, L. and Bel, N. and Padró, M. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {D6. 2 Integrated Final Version of the Components for Lexical Acquisition}, YEAR = {2012}, ABSTRACT = {The PANACEA project has addressed one of the most critical bottlenecks that threaten the development of technologies to support multilingualism in Europe, and to process the huge quantity of multilingual data produced annually. Any attempt at automated language processing, particularly Machine Translation (MT), depends on the availability of language-specific resources. Such Language Resources (LR) contain information about the language's lexicon, i. e. the words of the language and the characteristics of their use. In Natural Language Processing (NLP), LRs contribute information about the syntactic and semantic behaviour of words-i. e. their grammar and their meaning-which inform downstream applications such as MT. To date, many LRs have been generated by hand, requiring significant manual labour from linguistic experts. However, proceeding manually, it is impossible to supply LRs for every possible pair of European languages, textual domain, and genre, which are needed by MT developers. Moreover, an LR for a given language can never be considered complete nor final because of the characteristics of natural language, which continually undergoes changes, especially spurred on by the emergence of new knowledge domains and new technologies. PANACEA has addressed this challenge by building a factory of LRs that progressively automates the stages involved in the acquisition, production, updating and maintenance of LRs required by MT systems. The existence of such a factory will significantly cut down the cost, time and human effort required to build LRs. WP6 has addressed the lexical acquisition component of the LR factory, that is, the techniques for automated extraction of key lexical information from texts, and the automatic collation of lexical information into LRs in a standardized format. The goal of WP6 has been to take existing techniques capable of acquiring syntactic and semantic information from corpus data, improving upon them, adapting and applying them to multiple languages, and turning them into powerful and flexible techniques capable of supporting massive applications. One focus for improving the scalability and portability of lexical acquisition techniques has been to extend exiting techniques with more powerful, less "supervised" methods. In NLP, the amount of supervision refers to the amount of manual annotation which must be applied to a text corpus before machine learning or other techniques are applied to the data to compile a lexicon. More manual annotation means more accurate training data, and thus a more accurate LR. However, given that it is impractical from a cost and time perspective to manually annotate the vast amounts of data required for multilingual MT across domains, it is important to develop techniques which can learn from corpora with less supervision. Less supervised methods are capable of supporting both large-scale acquisition and efficient domain adaptation, even in the domains where data is scarce. Another focus of lexical acquisition in PANACEA has been the need of LR users to tune the accuracy level of LRs. Some applications may require increased precision, or accuracy, where the application requires a high degree of confidence in the lexical information used. At other times a greater level of coverage may be required, with information about more words at the expense of some degree of accuracy. Lexical acquisition in PANACEA has investigated confidence thresholds for lexical acquisition to ensure that the ultimate users of LRs can generate lexical data from the PANACEA factory at the desired level of accuracy}, KEYWORDS = {Lexical Acquisition}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D6.2.pdf}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQD_130256, AUTHOR = {Rimell, L. and Bel, N. and Padrò, M. and Frontini, F. and Monachini, M. and Quochi, V. and Del Gratta, R.}, TITLE = {D6. 3 Monolingual lexica for English, Spanish and Italian tuned for a particular domain (LAB and ENV)}, YEAR = {2012}, ABSTRACT = {This document presents the lexica acquired using PANACEA platform for Labour and Environment domains. The languages of the lexica are English, Spanish and Italian. The lexical information acquired depends on the language, according to the available tools in the platform}, KEYWORDS = {Lexicon Acqusition}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D6.3.pdf}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQD_130161, AUTHOR = {Rimell, L. and Bel, N. and Padró, M. and Frontini, F. and Monachini, M. and Quochi, V. and Del Gratta, R.}, TITLE = {D6. 5 Merged dictionaries}, YEAR = {2012}, ABSTRACT = {This document presents the merged dictionaries delivered in PANACEA. Those dictionaries result from merging already existing lexica, generally for general domain, with domain specific lexica acquired using PANACEA platform. The domain specific lexica are presented and delivered in D6. 3 and the merging repository that allowed the multilevel merging in D6. 4}, KEYWORDS = {merged dictionaries, computational lexicon}, URL = {http://www.panacea-lr.eu//en/deliverables/list}, } @ARTICLE{THOMPSON_2011_ARTICLE_TMMCDLMMPQRSVRA_175344, AUTHOR = {Thompson, P. and McNaught, J. and Montemagni, S. and Calzolari, N. and Del Gratta, R. and Lee, V. and Marchi, S. and Monachini, M. and Pezik, P. and Quochi, V. and Rupp, C. and Sasaki, Y. and Venturi, G. and Rebholzschuhmann, D. and Ananiadou, S.}, TITLE = {The BioLexicon: a large-scale terminological resource for biomedical text mining}, YEAR = {2011}, ABSTRACT = {Background Due to the rapidly expanding body of biomedical literature, biologists require increasingly sophisticated and efficient systems to help them to search for relevant information. Such systems should account for the multiple written variants used to represent biomedical concepts, and allow the user to search for specific pieces of knowledge (or events) involving these concepts, e. g., protein-protein interactions. Such functionality requires access to detailed information about words used in the biomedical literature. Existing databases and ontologies often have a specific focus and are oriented towards human use. Consequently, biological knowledge is dispersed amongst many resources, which often do not attempt to account for the large and frequently changing set of variants that appear in the literature. Additionally, such resources typically do not provide information about how terms relate to each other in texts to describe events. Results This article provides an overview of the design, construction and evaluation of a large-scale lexical and conceptual resource for the biomedical domain, the BioLexicon. The resource can be exploited by text mining tools at several levels, e. g., part-of-speech tagging, recognition of biomedical entities, and the extraction of events in which they are involved. As such, the BioLexicon must account for real usage of words in biomedical texts. In particular, the BioLexicon gathers together different types of terms from several existing data resources into a single, unified repository, and augments them with new term variants automatically extracted from biomedical literature. Extraction of events is facilitated through the inclusion of biologically pertinent verbs (around which events are typically organized) together with information about typical patterns of grammatical and semantic behaviour, which are acquired from domain-specific texts. In order to foster interoperability, the BioLexicon is modelled using the Lexical Markup Framework, an ISO standard. Conclusions The BioLexicon contains over 2. 2 M lexical entries and over 1. 8 M terminological variants, as well as over 3. 3 M semantic relations, including over 2 M synonymy relations. Its exploitation can benefit both application developers and users. We demonstrate some such benefits by describing integration of the resource into a number of different tools, and evaluating improvements in performance that this can bring}, KEYWORDS = {Text Mining, Information Extraction, Computational Lexicon}, PAGES = {1-29}, URL = {http://www.biomedcentral.com/1471-2105/12/397}, VOLUME = {12 (397)}, DOI = {10.1186/1471-2105-12-397}, ISSN = {1471-2105}, JOURNAL = {BMC BIOINFORMATICS}, } @EDITORIAL{CALZOLARI_2011_EDITORIAL_CBSGMQ_174748, AUTHOR = {Calzolari, N. and Baroni, P. and Soria, C. and Goggi, S. and Monachini, M. and Quochi, V.}, TITLE = {Proceedings of the 3rd European Language Resources and Technologies Forum: Language Resources in the Sharing Age-the Strategic Agenda}, YEAR = {2011}, ABSTRACT = {Proceedings of the third FLaReNet forum on the European Language Resources and Technologies, held in Venezia, at the Auditorium Santa Margherita of the Università Ca' Foscari, on 26-27 May 2011}, KEYWORDS = {Language Resources, Language Technologies}, PAGES = {86}, URL = {http://www.flarenet.eu/sites/default/files/FLaReNet_Forum_2011_Proceedings.pdf}, } @INPROCEEDINGS{CALZOLARI_2011_INPROCEEDINGS_CMQ_217943, AUTHOR = {Calzolari, N. and Monachini, M. and Quochi, V.}, TITLE = {Interoperability Framework: The FLaReNet action plan proposal}, YEAR = {2011}, ABSTRACT = {Standards are fundamental to ex-change, preserve, maintain and integrate data and language resources, and as an essential basis of any language resource infrastructure. This paper promotes an Interoperability Framework as a dynamic environment of standards and guidelines, also intended to support the provision of language-(web)service interoperability. In the past two decades, the need to define common practices and formats for linguistic resources has been increasingly recognized and sought. Today open, collaborative, shared data is at the core of a sound language strategy, and standardisation is actively on the move. This paper first describes the current landscape of standards, and presents the major barriers to their adoption; then, it describes those scenarios that critically involve the use of standards and provide a strong motivation for their adoption; lastly, a series of actions and steps needed to operationalise standards and achieve a full interoperability for Language Resources and Technologies are proposed}, KEYWORDS = {Language Resources, standards}, PAGES = {41-49}, URL = {https://iris.cnr.it/handle/20.500.14243/217943}, ISBN = {978-974-466-564-5}, CONFERENCE_NAME = {Workshop on Language Resources, Technology and Services in the Sharing Paradigm}, } @INPROCEEDINGS{QUOCHI_2011_INPROCEEDINGS_Q_225688, AUTHOR = {Quochi, V.}, TITLE = {The development of Light-'do' Verb Constructions in Italian}, YEAR = {2011}, ABSTRACT = {This contribution presents the results of a study of the development of Light 'do' Verb Constructions in Italian based on naturalistic data. The claim is that there exists a Light Verb pivot schema that accounts for new productive formations and that this pattern is learnt by young children because it constitutes a labeling technique for naming new events, activities and situations. The findings of this research support two hypotheses of language acquisition. The results are based on analysis of longitudinal transcriptions of adult children interactions contained in the CHILDES databank (MacWhinney 2000)}, KEYWORDS = {child language, construction grammar, light verb constructions}, PAGES = {256-257}, URL = {http://sle2011.cilap.es/downloads/book_abstracts.pdf}, CONFERENCE_NAME = {SLE 2011-44 TH ANNUAL MEETING}, BOOKTITLE = {SLE 2011-44TH ANNUAL MEETING BOOK OF ABSTRACT}, EDITOR = {Arista, J. M.}, } @TECHREPORT{ARRANZ_2011_TECHREPORT_ABBCCDFGMQRR_231385, AUTHOR = {Arranz, V. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Del Gratta, R. and Frontini, F. and Goggi, S. and Monachini, M. and Quochi, V. and Rubino, F. and Russo, I.}, TITLE = {The FLaReNet Databook}, YEAR = {2011}, ABSTRACT = {The FLaReNet Databook is not only the collection of all the factual material collected during the activities of the project, but also a set on innovative initiatives and instruments that will remain in place for the continuous collection of such "facts". The purpose of the Databook is in fact, on one side, to consolidate the analyses carried out in the project and, at the same time, to set up the proper mechanisms that will enable the provision of a continuous stream of relevant factual material, also after the end of the project}, KEYWORDS = {Language Resources (LRs)}, PAGES = {1-8}, URL = {http://www.flarenet.eu/?q=FLaReNet_Databook}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CBCMMOPQS_174181, AUTHOR = {Calzolari, N. and Bel, N. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C.}, TITLE = {Final FLaReNet deliverable: Language Resources for the Future-The Future of Language Resources}, YEAR = {2011}, ABSTRACT = {Language Technologies (LT), together with their backbone, Language Resources (LR), provide an essential support to the challenge of Multilingualism and ICT of the future. The main task of language technologies is to bridge language barriers and to help creating a new environment where information flows smoothly across frontiers and languages, no matter the country, and the language, of origin. To achieve this goal, all players involved need to act as a community able to join forces on a set of shared priorities. However, until now the field of Language Resources and Technology has long suffered from an excess of individuality and fragmentation, with a lack of coherence concerning the priorities for the field, the direction to move, not to mention a common timeframe. The context encountered by the FLaReNet project was thus represented by an active field needing a coherence that can only be given by sharing common priorities and endeavours. FLaReNet has contributed to the creation of this coherence by gathering a wide community of experts and making them participate in the definition of an exhaustive set of recommendations}, KEYWORDS = {language resources and technologies, infrastructures}, PAGES = {97}, URL = {https://iris.cnr.it/handle/20.500.14243/174181}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CQS_174758, AUTHOR = {Calzolari, N. and Quochi, V. and Soria, C.}, TITLE = {FLaReNet Strategic Language Resource Agenda}, YEAR = {2011}, ABSTRACT = {Despite the complexity of handling its languages, the European Union has established that cultural and language differences are a unique asset to be preserved. Europe needs to find means-such as technological ones-to overcome the language barriers to support citizens and industry in a multilingual globalised world. The large majority of industrial technological applications that handle natural language, i. e. Machine Translation, Crosslingual Information Retrieval, Multilingual Information Extraction, Automatic Document Indexing, Question Answering, Natural Language Interfaces, etc., include Language Resources as critical components. Although Language Technologies may consist of language independent engines, they depend on the availability of language-dependent knowledge under the form of Language Resources for their real-life implementation. At the same time, it is proved that a critical mass of Language Resources can make advancement in research and technology development possible and quicker, making Europe the leader of the market related to multilingualism. Companies such as Google or Microsoft play a dominant role in this framework, as they have access to a huge amount of data in many different languages, devote considerable resources to Language Technologies, have massive computing power and a direct research-to-application pipeline using a new business model based on so-called "free" services. The fact that a US company like Google is delivering some of the most comprehensive Language Technology solutions to support multilingualism should raise concern among EU officials}, KEYWORDS = {Language resources, infrastructures}, PAGES = {23}, URL = {https://iris.cnr.it/handle/20.500.14243/174758}, } @TECHREPORT{MONACHINI_2011_TECHREPORT_MQCBBCCFHKLMOPPRSUW_177116, AUTHOR = {Monachini, M. and Quochi, V. and Calzolari, N. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Francopoulo, G. and Hinrichs, E. and Krauwer, S. and Lemnitzer, L. and Mariani, J. and Odijk, J. and Piperidis, S. and Przepiorkowski, A. and Romary, L. and Schmidt, H. and Uszkoreit, H. and Wittenburg, P.}, TITLE = {The Standards' Landscape Towards an Interoperability Framework}, YEAR = {2011}, ABSTRACT = {This document proposes an overview of the current scene towards an Interoperability Framework and acts as a reference point for the current standards that the community fosters and encourages to adopt/improve. This initiative is in close synchronization with other relevant initiatives such as CLARIN, ELRA, ISO and TEI and META-Share. The document builds on the CLARIN Standardisation Action Plan and adapts and extends it to the needs of the broader LT Community, beyond the SSH research areas including the industry. The main goal of this document is to give a practical orientation for various LT players, both commercial and academic; the main message being that a harmonized domain of language resources and technology can be achieved stepwise, but that an effort to adopt standards is necessary to overcome fragmentation. NB: This is to be intended by no means as a static, closed document, rather a dynamic one which needs to be constantly/periodically revised and updated by the community itself}, KEYWORDS = {Standards, interoperability}, PAGES = {23}, URL = {https://iris.cnr.it/handle/20.500.14243/177116}, } @INPROCEEDINGS{CALZOLARI_2010_INPROCEEDINGS_CSDGQRCMP_65174, AUTHOR = {Calzolari, N. and Soria, C. and Del Gratta, R. and Goggi, S. and Quochi, V. and Russo, I. and Choukri, K. and Mariani, J. and Piperidis, S.}, TITLE = {The LREC Map of Language Resources and Technologies}, YEAR = {2010}, ABSTRACT = {In this paper we present the LREC Map of Language Resources (data and tools), an innovative feature introduced in conjunction with the LREC 2010 Conference. The purpose of the Map is to shed light on the vast amount of resources that represent the background of the research presented at LREC, in the attempt to fill in a gap in the community knowledge about the resources that are used or created worldwide. It also aims at a change of culture in the field, actively engaging each researcher in the documentation task about resources. The Map has been developed on the basis of the information provided by LREC authors during the submission of papers to the LREC 2010 conference and the LREC workshops, and contains information about almost 2000 resources. The paper illustrates the motivation behind this initiative, its main characteristics, its relevance and future impact in the field, the metadata used to describe the resources, and finally presents some of the most relevant findings}, KEYWORDS = {LR national/international projects, organizational/policy issues}, PAGES = {949-956}, URL = {http://www.lrec-conf.org/proceedings/lrec2010/index.html}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {LREC 2010 Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {LREC'10-Seventh International Conference on Language Resources and Evaluation. Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{DELGRATTA_2010_INPROCEEDINGS_DDBCEMQSTC_65149, AUTHOR = {Del Gratta, R. and D'Onofrio, L. and Bartolini, R. and Caselli, T. and Enea, A. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A. and Calzolari, N.}, TITLE = {A Web-based Architecture for Interoperability of Lexical Resources}, YEAR = {2010}, ABSTRACT = {In this paper we present aWeb Service Architecture for managing high level interoperability of Language Resources (LRs) by means of a Service Oriented Architecture (SOA) and the use of ISO standards, such as ISO LMF. We propose a layered architecture which separates the management of legacy resources (data collection) from data aggregation (workflow) and data access (user requests). We provide a case study to demonstrate how the proposed architecture is capable of managing data exchange among different lexical services in a coherent way and show how the use of a lexical standard becomes of primary importance when a protocol of interoperability is defined}, KEYWORDS = {Interoperability, Web sercives, Lexical resources}, PAGES = {53-62}, URL = {http://weblab.iit.cnr.it/kyoto/www2.let.vu.nl/twiki/pub/Kyoto/Publications/icgl2010_DOnofrioetal.pdf}, PUBLISHER = {City university of Hong Kong press (Hong Kong, CHN)}, ISBN = {978-962-442-323-5}, CONFERENCE_NAME = {2nd International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, BOOKTITLE = {2nd International Conference on Global Interoperability for Language Resources, ICGL 2010}, EDITOR = {Fang, A. C. and Ide, N. and Webster, J.}, } @INPROCEEDINGS{JEZEK_2010_INPROCEEDINGS_JQ_65150, AUTHOR = {Jezek, E. and Quochi, V.}, TITLE = {Capturing Coercions in Texts: a First Annotation Exercise}, YEAR = {2010}, ABSTRACT = {In this paper we report the first results of an annotation exercise of argument coercion phenomena performed on Italian texts. Our corpus consists of ca 4000 sentences from the PAROLE sottoinsieme corpus (Bindi et al. 2000) annotated with Selection and Coercion relations among verb-noun pairs formatted in XML according to the Generative Lexicon Mark-up Language (GLML) format (Pustejovsky et al., 2008). For the purposes of coercion annotation, we selected 26 Italian verbs that impose semantic typing on their arguments in either Subject, Direct Object or Complement position. Every sentence of the corpus is annotated with the source type for the noun arguments by two annotators plus a judge. An overall agreement of 0. 87 kappa indicates that the annotation methodology is reliable. A qualitative analysis of the results allows us to outline some suggestions for improvement of the task: 1) a different account of complex types for nouns has to be devised and 2) a more comprehensive account of coercion mechanisms requires annotation of the deeper meaning dimensions that are targeted in coercion operations, such as those captured by Qualia relations}, KEYWORDS = {Corpus (creation, annotation, etc.), Knowledge Discovery/Representation, Semantics}, PAGES = {1464-1471}, URL = {http://www.lrec-conf.org/proceedings/lrec2010/summaries/713.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of the Seventh International Conference on Language Resources and Evaluation-LREC'10}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{PUSTEJOVSKY_2010_INPROCEEDINGS_PRPJBQ_65141, AUTHOR = {Pustejovsky, J. and Rumshisky, A. and Plotnick, A. and Jezek, E. and Batiukova, O. and Quochi, V.}, TITLE = {SemEval-2010 Task 7: Argument Selection and Coercion}, YEAR = {2010}, ABSTRACT = {The paper describes the Argument Selection and Coercion task for the SemEval-2010 evaluation exercise, which involves characterizing the type of compositional operation that exists between a predicate and the arguments it selects. Specifically, the goal is to identify whether the type that a verb selects is satisfied directly by the argument, or whether the argument must change type to satisfy the verb typing}, KEYWORDS = {semantic annotation, verb coercion}, URL = {http://www.aclweb.org/anthology/S10-1005}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-932432-70-1}, CONFERENCE_NAME = {Fifth International Workshop on Semantic Evaluation (SemEval 2010)}, CONFERENCE_PLACE = {Stroudsburg}, BOOKTITLE = {Proceedings of the 5th International Workshop on Semantic Evaluation}, EDITOR = {Erk, K. and Strapparava, C.}, } @TECHREPORT{CALZOLARI_2010_TECHREPORT_CSBQBBCMOP_183250, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Quochi, V. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 4}, YEAR = {2010}, KEYWORDS = {Language Resources}, URL = {https://iris.cnr.it/handle/20.500.14243/183250}, } @INCOLLECTION{QUOCHI_2009_INCOLLECTION_QDSBMC_50342, AUTHOR = {Quochi, V. and Del Gratta, R. and Sassolini, E. and Bartolini, R. and Monachini, M. and Calzolari, N.}, TITLE = {A Standard Lexical-Terminological Resource for the Bio Domain}, YEAR = {2009}, ABSTRACT = {The present paper describes a large-scale lexical resource for the biology domain designed both for human and for machine use. This lexicon aims at semantic interoperability and extendability, through the adoption of ISO-LMF standard for lexical representation and through a granular and distributed encoding of relevant information. The first part of this contribution focuses on three aspects of the model that are of particular interest to the biology community: the treatment of term variants, the representation on bio events and the alignment with a domain ontology. The second part of the paper describes the physical implementation of the model: a relational database equipped with a set of automatic uploading procedures. Peculiarity of the BioLexicon is that it combines features of both terminologies and lexicons. A set verbs relevant for the domain is also represented with full details on their syntactic and semantic argument structure}, KEYWORDS = {Lexical representation model, Lexical Database, Computational Lexicography, Special Domains, Standards}, PAGES = {325-335}, URL = {https://link.springer.com/chapter/10.1007/978-3-642-04235-5_28}, VOLUME = {5603}, DOI = {10.1007/978-3-642-04235-5_28}, PUBLISHER = {Springer (Berlin, Heidelberg, DEU)}, ISBN = {978-3-642-04235-5}, CONFERENCE_PLACE = {Berlin, Heidelberg}, BOOKTITLE = {Human Language Technology. Challenges of the Information Society}, } @EDITORIAL{CALZOLARI_2009_EDITORIAL_CBBBCGMMOPQST_1087, AUTHOR = {Calzolari, N. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Goggi, S. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Proceedings of the 1st European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, ABSTRACT = {Proceedings of the first FLaReNet Forum on the European Language Resources and Technologies, held in Vienna, at the Austrian Academy of Science, on 12-13 February 2009}, KEYWORDS = {Language Resources, Language Technologies, Multilingual, Digital}, PAGES = {105}, URL = {http://www.flarenet.eu/sites/default/files/Vienna09_Proceedings.pdf}, } @INPROCEEDINGS{JEZEK_2009_INPROCEEDINGS_JQC_65125, AUTHOR = {Jezek, E. and Quochi, V. and Calzolari, N.}, TITLE = {Relevance of Qualia Relations in Coercive Contexts}, YEAR = {2009}, KEYWORDS = {annotation, annotation scheme, semantics, type shift}, URL = {https://iris.cnr.it/handle/20.500.14243/65125}, CONFERENCE_NAME = {5th International Conference on Generative Approaches to the Lexicon}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CBGMQST_183230, AUTHOR = {Calzolari, N. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://iris.cnr.it/handle/20.500.14243/183230}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CBGMQST_183227, AUTHOR = {Calzolari, N. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Dissemination Plan}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://iris.cnr.it/handle/20.500.14243/183227}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CMSBGQT_183229, AUTHOR = {Calzolari, N. and Monachini, M. and Soria, C. and Baroni, P. and Goggi, S. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 2}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://iris.cnr.it/handle/20.500.14243/183229}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBCGMQTBBCMOP_183224, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Caselli, T. and Goggi, S. and Monachini, M. and Quochi, V. and Toral, A. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Action Plan}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://iris.cnr.it/handle/20.500.14243/183224}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBGMQT_183228, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Evaluation Plan for the functioning of the Network}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://iris.cnr.it/handle/20.500.14243/183228}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBMQ_183225, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Monachini, M. and Quochi, V.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Annual Report No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://iris.cnr.it/handle/20.500.14243/183225}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBMQT_183231, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Monachini, M. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Project Presentation}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://iris.cnr.it/handle/20.500.14243/183231}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBBCCMMOPQT_183226, AUTHOR = {Calzolari, N. and Soria, C. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Blueprint of actions and infrastructures No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources, Infrastructures, Recommendations}, URL = {https://iris.cnr.it/handle/20.500.14243/183226}, } @MISC{CALZOLARI_2009_MISC_CBBBCGMMOPQST_183233, AUTHOR = {Calzolari, N. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Goggi, S. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://iris.cnr.it/handle/20.500.14243/183233}, } @MISC{CALZOLARI_2009_MISC_CBBCMOPBGMQST_183219, AUTHOR = {Calzolari, N. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Extended Report of: The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://iris.cnr.it/handle/20.500.14243/183219}, } @MISC{CALZOLARI_2009_MISC_CBBCMOPBGMQST_183222, AUTHOR = {Calzolari, N. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Short Report of The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://iris.cnr.it/handle/20.500.14243/183222}, } @MISC{QUOCHI_2009_MISC_Q_227948, AUTHOR = {Quochi, V.}, TITLE = {Usage scenarios and basic workflows}, YEAR = {2009}, PAGES = {5-5}, URL = {http://www.clarin.eu/sites/default/files/CLARIN_Newsletter_no_6.pdf}, VOLUME = {6}, } @INPROCEEDINGS{MONACHINI_2008_INPROCEEDINGS_MQDC_65105, AUTHOR = {Monachini, M. and Quochi, V. and Del Gratta, R. and Calzolari, N.}, TITLE = {Using LMF to Shape a Lexicon for the Biomedical Domain}, YEAR = {2008}, ABSTRACT = {This paper describes the design, implementation and population of the BioLexicon in the framework of BootStrep, an FP6 project. The BioLexicon (BL) is a lexical resource designed for text mining in the bio-domain. It has been conceived to meet both domain requirements and upcoming ISO standards for lexical representation. The data model and data categories are compliant to the ISO Lexical Markup Framework and the Data Category Registry. The BioLexicon integrates features of lexicons and terminologies: term entries (and variants) derived from existing resources are enriched with linguistic features, including sub-categorization and predicate-argument information, extracted from texts. Thus, it is an extendable resource. Furthermore, the lexical entries will be aligned to concepts in the BioOntology, the ontological resource of the project. The BL implementation is an extensible relational database with automatic population procedures. Population relies on a dedicated input data structure allowing to upload terms and their linguistic properties and "pull-and-push" them in the database. The BioLexicon teaches that the state-of-the-art is mature enough to aim at setting up a standard in this domain. Being conformant to lexical standards, the BioLexicon is interoperable and portable to other areas}, KEYWORDS = {Domain terminologies, Computational lexicons, Lexical standards, Lexical architectures}, PAGES = {153-157}, URL = {https://iris.cnr.it/handle/20.500.14243/65105}, CONFERENCE_NAME = {LangTech 2008-Tecnologia applicata alla linguistica}, BOOKTITLE = {LangTech 2008-Tecnologia applicata alla linguistica}, EDITOR = {Delogu, C. and Falcone, M.}, } @INPROCEEDINGS{QUOCHI_2008_INPROCEEDINGS_QC_227370, AUTHOR = {Quochi, V. and Calderone, B.}, TITLE = {Learning properties of Noun Phrases: from data to functions}, YEAR = {2008}, ABSTRACT = {The paper presents two experiments of unsupervised classification of Italian noun phrases. The goal of the experiments is to identify the most prominent contextual properties that allow for a functional classification of noun phrases. For this purpose, we used a Self Organizing Map is trained with syntactically-annotated contexts containing noun phrases. The contexts are defined by means of a set of features representing morpho-syntactic properties of both nouns and their wider contexts. Two types of experiments have been run: one based on noun types and the other based on noun tokens. The results of the type simulation show that when frequency is the most prominent classification factor, the network isolates idiomatic or fixed phrases. The results of the token simulation experiment, instead, show that, of the 3 6 attributes represented in the original input matrix, only a few of them are prominent in the re-organization of the map. In particular, key features in the emergent macro-classification are the type of determiner and the grammatical number of the noun. An additional but not less interesting result is an organization into semantic/pragmatic micro-classes. In conclusions, our result confirm the relative prominence of determiner type and grammatical number in the task of noun (phrase) categorization}, KEYWORDS = {cognitive linguistics, noun phrase}, PAGES = {2596-2602}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/summaries/644.html}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {Sixth International Conference on Language Resources and Evaluation (LREC'08)}, } @INPROCEEDINGS{QUOCHI_2008_INPROCEEDINGS_QMDC_65076, AUTHOR = {Quochi, V. and Monachini, M. and Del Gratta, R. and Calzolari, N.}, TITLE = {A lexicon for biology and bioinformatics: the BOOTStrep experience}, YEAR = {2008}, ABSTRACT = {This paper describes the design, implementation and population of a lexical resource for biology and bioinformatics (the BioLexicon) developed within an ongoing European project. The aim of this project is text-based knowledge harvesting for support to information extraction and text mining in the biomedical domain. The BioLexicon is a large-scale lexical-terminological resource encoding different information types in one single integrated resource. In the design of the resource we follow the ISO/DIS 24613 "Lexical Mark-up Framework" standard, which ensures reusability of the information encoded and easy exchange of both data and architecture. The design of the resource also takes into account the needs of our text mining partners who automatically extract syntactic and semantic information from texts and feed it to the lexicon. The present contribution first describes in detail the model of the BioLexicon along its three main layers: morphology, syntax and semantics; then, it briefly describes the database implementation of the model and the population strategy followed within the project, together with an example. The BioLexicon database in fact comes equipped with automatic uploading procedures based on a common exchange XML format, which guarantees that the lexicon can be properly populated with data coming from different sources}, KEYWORDS = {Lexicon, Ontologies, Lexical database}, PAGES = {2285-2292}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/576_paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, } @INPROCEEDINGS{TORALRUIZ_2008_INPROCEEDINGS_TQDMSC_65089, AUTHOR = {Toral Ruiz, A. and Quochi, V. and Del Gratta, R. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Lexically-based Ontologies and Ontologically Based Lexicons}, YEAR = {2008}, ABSTRACT = {This paper deals with the relations between ontologies and lexicons. We study the role of these two components and their evolution during the last years in the field of Computational Linguistics. Subsequently, we survey the current lines of research at ILC-CNR which tackle this topic. They involve (I) the reuse of already existing Lexical Resources to derive formal ontologies, (II) the conversion and combination of terminologies into rich and formal Lexical Resources and (III) the use of formal ontologies as the backbone of multilingual Lexical Resources}, KEYWORDS = {Resource Infrastructure, UIMA, Clarin}, PAGES = {49-59}, URL = {https://iris.cnr.it/handle/20.500.14243/65089}, CONFERENCE_NAME = {AI*IA 2008-10th Congress of Italian Association for Artificial Intelligence}, BOOKTITLE = {AI*IA 2008-10th Congress of Italian Association for Artificial Intelligence}, } @EDITORIAL{NARDI_2007_EDITORIAL_NPQ_166678, AUTHOR = {Nardi, A. and Peters, C. and Quochi, V.}, TITLE = {CLEF 2007. Editorial}, YEAR = {2007}, ABSTRACT = {These Working Notes contain descriptions of the experiments conducted within CLEF 2007 organised by the Cross-Language evaluation Forum. The final papers-revised and extended as a result of the discussions at the Workshop-together with a comparative analysis of the results will appear in the CLEF 2007 Proceedings, to be published by Springer in their Lecture Notes for Computer Science series. CLEF organises a series of evaluation tracks designed to test different aspects of mono-and cross-language information retrieval system development. The intention is to encourage systems to move from monolingual text retrieval to the implementation of a full multilingual multimedia search service}, KEYWORDS = {Information Retrieval}, PAGES = {9}, URL = {https://iris.cnr.it/handle/20.500.14243/166678}, } @INPROCEEDINGS{CASELLI_2007_INPROCEEDINGS_CQ_65051, AUTHOR = {Caselli, T. and Quochi, V.}, TITLE = {Inferring the semantics of temporal prepositions in Italian}, YEAR = {2007}, KEYWORDS = {italian, prepositions, computational linguistics}, PAGES = {38-44}, URL = {http://www.aclweb.org/anthology/W07-1606}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, CONFERENCE_NAME = {Fourth ACL-SIGSEM Workshop on Prepositions}, CONFERENCE_PLACE = {Stroudsburg}, BOOKTITLE = {Proceedings of the Fourth ACL-SIGSEM Workshop on Prepositions}, EDITOR = {Fintan Costello, J. K. and Volk, M.}, } @INPROCEEDINGS{MONACHINI_2007_INPROCEEDINGS_MQRC_65056, AUTHOR = {Monachini, M. and Quochi, V. and Ruimy, N. and Calzolari, N.}, TITLE = {Lexical Relations and Domain Knowledge: The BioLexicon Meets the Qualia Structure}, YEAR = {2007}, URL = {https://iris.cnr.it/handle/20.500.14243/65056}, CONFERENCE_NAME = {GL2007: Fourth International Conference on Generative Approaches to the Lexicon}, EDITOR = {Bouillon, P. and Danlos, L. and Kanzaki, K.}, } @INPROCEEDINGS{QUOCHI_2007_INPROCEEDINGS_QDSMC_65109, AUTHOR = {Quochi, V. and Del Gratta, R. and Sassolini, E. and Monachini, M. and Calzolari, N.}, TITLE = {Toward a Standard Lexical Resource in the Bio Domain}, YEAR = {2007}, ABSTRACT = {The present paper describes a large-scale lexical resource for the biology domain designed both for human and for machine use. This lexicon aims at semantic interoperability and extendability, through the adoption of ISO-LMF standard for lexical representation and through a granular and distributed encoding of relevant information. The first part of this contribution focuses on three aspects of the model that are of particular interest to the biology community: the treatment of term variants, the representation on bio events and the alignment with a domain ontology. The second part of the paper describes the physical implementation of the model: a relational database equipped with a set of automatic uploading procedures. Peculiarity of the BioLexicon is that it combines features of both terminologies and lexicons. A set verbs relevant for the domain is also represented with full details on their syntactic and semantic argument structure}, KEYWORDS = {Lexical representation model, Lexical Database, Computational Lexicography, Special Domains, Standards}, PAGES = {295-299}, PUBLISHER = {Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)}, ISBN = {978-83-7177-413-3}, CONFERENCE_NAME = {LTC07-3rd Language and Technology Conference: Human Language Technology. Challenges of the Information Society}, CONFERENCE_PLACE = {Poznan}, } @INPROCEEDINGS{CALDERONE_2007_INPROCEEDINGS_CQ_225689, AUTHOR = {Calderone, B. and Quochi, V.}, TITLE = {Emergent Cognitive Functions of the Noun Phrase}, YEAR = {2007}, KEYWORDS = {noun phrase, emergence of language}, URL = {https://iris.cnr.it/handle/20.500.14243/225689}, CONFERENCE_NAME = {SLE 2007 Annual Meeting}, BOOKTITLE = {SLE 2007 Annual Meeting Book of Abstracts}, } @TECHREPORT{CALZOLARI_2007_TECHREPORT_CMQSGB_195954, AUTHOR = {Calzolari, N. and Monachini, M. and Quochi, V. and Soria, C. and Goggi, S. and Baroni, P.}, TITLE = {FLaReNet: Fostering Language Resources Network. Grant Agreement n° 617001, eContentPlus}, YEAR = {2007}, URL = {https://iris.cnr.it/handle/20.500.14243/195954}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DBCEMQS_195953, AUTHOR = {Del Gratta, R. and Bartolini, R. and Caselli, T. and Enea, A. and Monachini, M. and Quochi, V. and Sassolini, V.}, TITLE = {TimeML: An Ontological Mapping onto the UIMA Type Systems}, YEAR = {2007}, URL = {https://iris.cnr.it/handle/20.500.14243/195953}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DMQSC_195940, AUTHOR = {Del Gratta, R. and Monachini, M. and Quochi, V. and Sassolini, E. and Calzolari, N.}, TITLE = {Bio-Lexicon DataBase: Architecture, Concepts and Loading Software}, YEAR = {2007}, URL = {https://iris.cnr.it/handle/20.500.14243/195940}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DTQM_195952, AUTHOR = {Del Gratta, R. and Toral, A. and Quochi, V. and Monachini, M.}, TITLE = {LocalBioLex: A database framework for biolinguistic research on integrated databases}, YEAR = {2007}, URL = {https://iris.cnr.it/handle/20.500.14243/195952}, } @THESIS{QUOCHI_2007_THESIS_Q_479321, AUTHOR = {Quochi, V.}, TITLE = {A Usage-Based Approach to Light Verb Constructions in Italian: Development and Use}, YEAR = {2007}, ABSTRACT = {Oggetto di studio della ricerca è la costruzione a verbo supporto con verbo fare in ottica cognitiva e di apprendimento della prima lingua. L’ambito teorico di lavoro prescelto è la Linguistica Cognitiva e in particolar modo la Construction Grammar. La tesi che ha guidato questa ricerca è la convinzione che la Costruzione a Verbo Supporto (o leggero) costituisca un modello sintattico-semantico con funzione etichettante e che tale modello sia sfruttato dai bambini per nominare eventi per i quali non possiedono ancora un’etichetta. La costruzione a verbo supporto inoltre viene considerata come una struttura prototipica, una categoria radiale i cui confini non sono delineabili in maniera netta. Il primo capitolo presenta gli studi più importanti sul fenomeno delle costruzioni a verbo supporto e il background teorico sul quale si fonda la ricerca. Nel secondo capitolo si descrivono tre studi empirici volti a verificare per l’Italiano l’ipotesi di realtà psicologica delle Costruzioni (quali oggetti simbolici, coppie di forma e significato). Il terzo e quarto capitolo costituiscono il cuore della ricerca e sono dedicati allo studio empirico dei costrutti con verbo fare in un corpus di Italiano parlato L1 derivato dal database CHILDES. Da una parte si è analizzata la lingua di degli adulti per determinare le tipologie di costrutti rilevanti per il bambino e il ruolo della lingua di input nello sviluppo di questi costrutti nel bambino. Dall’altra si è studiato lo sviluppo dei costrutti fare Nome nella lingua del bambino per studiarne lo sviluppo e l’uso. I dati hanno confermato la tesi iniziale. Il bambino sfrutta lo schema della costruzione a verbo supporto per riferirsi ad azioni o eventi. Si sono individuate almeno due costruzioni per le quai il bambino mostra produttività. Il quinto, infine, è il capitolo conclusivo nel quale si dà una rappresentazione delle costruzioni identificate nel corpus nei termini di una famiglia di costruzioni al fine di mostrare come tale rappresentazione renda conto della vicinanza tra Costruzioni a Verbo Supporto e Costruzione Transitiva, e fornisca un buon modello esplicativo dei dati acquisizionali. Il capitolo si conclude con un riepilogo dei risultati ottenuti e un’indicazione degli sviluppi futuri}, URL = {https://iris.cnr.it/handle/20.500.14243/479321}, } @MISC{QUOCHI_2007_MISC_QDMC_129309, AUTHOR = {Quochi, V. and Del Gratta, R. and Monachini, M. and Calzolari, N.}, TITLE = {BioLexicon Model and Implementation}, YEAR = {2007}, URL = {https://iris.cnr.it/handle/20.500.14243/129309}, } @TECHREPORT{QUOCHI_2006_TECHREPORT_QMCDS_195923, AUTHOR = {Quochi, V. and Monachini, M. and Calzolari, N. and Del Gratta, R. and Sassolini, E.}, TITLE = {Bio-Lexicon Model and Preliminary ISO Conformant Data Categories}, YEAR = {2006}, URL = {https://iris.cnr.it/handle/20.500.14243/195923}, } @INCOLLECTION{QUOCHI_2005_INCOLLECTION_Q_134794, AUTHOR = {Quochi, V.}, TITLE = {Issues on the acquisition of Italian complex nominals from text corpora: a computational approach combining syntactic and semantic information}, YEAR = {2005}, ABSTRACT = {The paper addressed the issue of Italian Complex Nominals from an (automatic) acquisition and representational perspective. Just like English noun compounds, ICNs blur the distinction between the syntactic and the lexical component because they are (at least) partially non-transparent but, nevertheless, show regularities both at the syntactic and at the semantic level. This contribution reports on an experiment conducted to identify the highest possible number of productive syntactic-semantic patterns of ICN formation, and to make explicit the particular semantic relation that exists between the head of the phrase and its modifier(s). I rely on a non-traditional generative theory of the lexicon, namely the Generative Lexicon, as a model for the representation/ interpretation of ICNs which provides us with a structured representation of the internal semantics of lexical items. The experiment explored the representational power of the qualia structure with respect to ICNs}, KEYWORDS = {complex nominals, multiword expressions, lexicon, lexical representation, generative lexicon}, PAGES = {153-174}, URL = {https://iris.cnr.it/handle/20.500.14243/134794}, PUBLISHER = {Edizioni Plus srl (Pisa, ITA)}, ISBN = {9788884922366}, CONFERENCE_PLACE = {Pisa}, BOOKTITLE = {Studies in the Semantics of Lexical Combinatory Patterns}, EDITOR = {Bertuccelli, M.}, } @INPROCEEDINGS{QUOCHI_2004_INPROCEEDINGS_Q_64237, AUTHOR = {Quochi, V.}, TITLE = {Representing Italian Complex Nominals: A Pilot Study}, YEAR = {2004}, ABSTRACT = {A corpus-based investigation of Italian Complex Nominals (CNs), of the form N PP, which aims at clarifying their syntactic and semantic constitution, is presented. The main goal is to find out useful parameters for their representation in a computational lexicon. As a reference model we have taken an implementation of Pustejovsky's Generative Lexicon Theory (1995), the SIMPLE Italian Lexicon, and in particular the Extended Qualia Structure. Italian CN formation mainly exploits post-modification; of particular interest here are CNs of the kind N PP since this syntactic pattern is highly productive in Italian and such CNs very often translate compound nouns of other languages. One of the major problems posed by CNs for interpretation is the retrieval or identification of the semantic relation linking their components, which is (at least partially) implicit on the surface. Studying a small sample, we observed some interesting facts that could be useful when setting up a larger experiment to identify semantic relations and/or automatically learn the syntactic peculiarities of given semantic paradigms. Finally, a set of representational features exploiting the results from our corpus is proposed}, KEYWORDS = {Multiword expression, Complex Nominals, Italian language}, PAGES = {1863-1866}, URL = {https://iris.cnr.it/handle/20.500.14243/64237}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, BOOKTITLE = {Proceedings of the Fourth International Conference on Language Resources and Evaluation, LREC'04}, } @TECHREPORT{BARONI_2004_TECHREPORT_BCLQU_195897, AUTHOR = {Baroni, P. and Calzolari, N. and Lenci, A. and Quochi, V. and Ulivieri, M.}, TITLE = {Final Resources Landscape}, YEAR = {2004}, ABSTRACT = {ELSNET-4 Deliverable D6. 4}, KEYWORDS = {Language Resources, Landscapes}, PAGES = {11}, URL = {https://iris.cnr.it/handle/20.500.14243/195897}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_CLQ_77203, AUTHOR = {Calzolari, N. and Lenci, A. and Quochi, V.}, TITLE = {Towards Multiword and Multilingual Lexicons: Between Theory and Practice}, YEAR = {2003}, URL = {https://iris.cnr.it/handle/20.500.14243/77203}, CONFERENCE_NAME = {Linguistics and Phonetics 2002 Conference}, } @TECHREPORT{QUOCHI_2003_TECHREPORT_QJ_195190, AUTHOR = {Quochi, V. and Jan, O.}, TITLE = {"Appendix F: Representing noun compounds and support verbs in MILE (PISA & XMELLT)"}, YEAR = {2003}, URL = {http://www.ilc.cnr.it/EAGLES96/isle/clwg_doc/ISLE_D2.2-D3.2.zip}, }