@EDITORIAL{MELERO_2022_EDITORIAL_MSS_472132, AUTHOR = {Melero, M. and Sakriani, S. and Soria, C.}, TITLE = {Proceedings of The 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages (SIGUL2022)}, YEAR = {2022}, ABSTRACT = {Proceedings of the SIGUL 2022 workshop.}, KEYWORDS = {conference proceedings, less-resourced languages, language resources, NLP, ù}, URL = {https://aclanthology.org/events/lrec-2022/#2022-sigul-1}, ISBN = {979-10-95546-91-7}, } @INCOLLECTION{DOLOWYRYBINSKA_2021_INCOLLECTION_DS_443475, AUTHOR = {Dolowy Rybinska, N. and Soria, C.}, TITLE = {Surveying the ethnolinguistic vitality of two contested languages. The case of Kashubian and Piedmontese}, YEAR = {2021}, ABSTRACT = {In this chapter we present the results of a Polish-Italian research project aimed at evaluating and comparing the vitality of two contested languages: Kashubian in Poland and Piedmontese in Italy.}, KEYWORDS = {ethnolinguistic vitality, contested languages}, PAGES = {125-142}, URL = {https://publications.cnr.it/doc/443475}, DOI = {10.1075/wlp.8}, PUBLISHER = {John Benjamins (Amsterdam, NLD)}, ISBN = {9789027208040}, } @TECHREPORT{SAYERS_2021_TECHREPORT_SSHAAABBBCECDDDDDFFGGGGGGHLLJJKKMMMMMNRPSASSSTYBCCLKRP_472131, AUTHOR = {Sayers, D. and Sousa Silva, R. and Höhn, S. and Ahmedi, L. and Allkivi Metsoja, K. and Anastasiou, D. and Beňuš, Š. and Bowker, L. and Bytyçi, E. and Catala, A. and Çepani, A. and Chacón Beltrán, R. and Dadi, S. and Dalipi, F. and Despotovic, V. and Doczekalska, A. and Drude, S. and Fort, K. and Fuchs, R. and Galinski, C. and Galinski, C. and Galinski, C. and Gobbo, F. and Gungor, T. and Guo, S. and Höckner, K. and Láncos, P. and Libal, T. and Jantunen, T. and Jones, D. and Klimova, B. and Korkmaz, E. and Maučec, M. S. and Melo, M. and Meunier, F. and Migge, B. and Mititelu, V. B. and Névéol, A. and Rossi, A. and Pareja Lora, A. and Sanchez Stockhammer, C. and Şahin, A. and Soltan, A. and Soria, C. and Shaikh, S. and Turchi, M. and Yildirim Yayilgan, S. and Bessa, M. and Cabral, L. and Coler, M. and Liebeskind, C. and Kernerman, I. and Rousi, R. and Prys, C.}, TITLE = {The Dawn of the Human-Machine Era: A forecast of new and emerging language technologies}, YEAR = {2021}, ABSTRACT = {New language technologies are coming, thanks to the huge and competing private investment fuelling rapid progress; we can either understand and foresee their effects, or be taken by surprise and spend our time trying to catch up. This report scketches out some transformative new technologies that are likely to fundamentally change our use of language. Some of these may feel unrealistically futuristic or far-fetched, but a central purpose of this report - and the wider LITHME network - is to illustrate that these are mostly just the logical development and maturation of technologies currently in prototype. But will everyone benefit from all these shiny new gadgets? Throughout this report we emphasise a range of groups who will be disadvantaged and issues of inequality. Important issues of security and privacy will accompany new language technologies. A further caution is to re-emphasise the current limitations of AI. Looking ahead, we see many intriguing opportunities and new capabilities, but a range of other uncertainties and inequalities. New devices will enable new ways to talk, to translate, to remember, and to learn. But advances in technology will reproduce existing inequalities among those who cannot afford these devices, among the world's smaller languages, and especially for sign language. Debates over privacy and security will flare and crackle with every new immersive gadget. We will move together into this curious new world with a mix of excitement and apprehension - reacting, debating, sharing and disagreeing as we always do. Plug in, as the human-machine era dawns.}, KEYWORDS = {language technologies, human-machine communication}, URL = {https://doi.org/10.17011/jyx/reports/20210518/1}, } @EDITORIAL{BEERMANN_2020_EDITORIAL_BBSS_472133, AUTHOR = {Beermann, D. and Besacier, L. and Sakriani, S. and Soria, C.}, TITLE = {Proceedings of 1st Joint SLTU and CCURL Workshop (SLTU-CCURL 2020)}, YEAR = {2020}, ABSTRACT = {Proceedings of the 1st Joint SLTU and CCURL Workshop (SLTU-CCURL 2020)}, KEYWORDS = {less-resourced languages, NLP, language resources}, URL = {https://aclanthology.org/events/lrec-2020/#2020-sltu-1}, ISBN = {979-10-95546-35-1}, } @EDITORIAL{SORIA_2018_EDITORIAL_SBP_387365, AUTHOR = {Soria, C. and Besacier, L. and Pretorius, L.}, TITLE = {Proceedings of CCURL 2018-Sustaining knowledge diversity in the digital age}, YEAR = {2018}, ABSTRACT = {Proceedings of the CCURL 2018 workshop}, KEYWORDS = {knowledge diversity, digital age, language resources, language technologies}, PAGES = {i-75}, URL = {http://lrec-conf.org/workshops/lrec2018/W26/pdf/book_of_proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-22-1}, } @EDITORIAL{BERNHARD_2018_EDITORIAL_BS_443019, AUTHOR = {Bernhard, D. and Soria, C.}, TITLE = {Automatic processing of under-resourced languages|Traitement automatique des langues peu dotées}, YEAR = {2018}, KEYWORDS = {less-resourced languages, NLP}, PAGES = {7-14}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85063404297\&origin=inward}, VOLUME = {59}, PUBLISHER = {TAL (Saint-Cloud, Francia)}, ISSN = {1248-9433}, BOOKTITLE = {TAL. Traitement automatique des langues}, } @INPROCEEDINGS{SORIA_2018_INPROCEEDINGS_SQR_387362, AUTHOR = {Soria, C. and Quochi, V. and Russo, I.}, TITLE = {The DLDP Survey on Digital Use and Usability of EU Regional and Minority Languages}, YEAR = {2018}, ABSTRACT = {This paper reports about the design, the results and the key findings of a survey launched by the Digital Language Diversity Project about the digital use and usability of regional and minority languages. The aim of the survey - the first of this kind - was to investigate the real needs and expectations of European minority language speakers regarding digital opportunities. The focus on four languages (Basque, Breton, Karelian and Sardinian) at different stages of digital development offers a starting point to develop strategies for assessing digital vitality of these languages and overcoming specific difficulties.}, KEYWORDS = {minority languages, digital survival, electronic communication}, PAGES = {4155-4160}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/684.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @TECHREPORT{BARONI_2018_TECHREPORT_BQRSCGHKSS_483257, AUTHOR = {Baroni, P. and Quochi, V. and Russo, I. and Soria, C. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Salonen, T. and Sarhimaa, A.}, TITLE = {Kit per la sopravvivenza digitale della lingua sarda-Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua sarda}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua sarda (versione italiana)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Sardinian}, PAGES = {12}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Sardinian_IT.pdf}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443050, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Euskarak Mundu Digitalean Bizirauteko Kita-DLDPren gomendioak, euskararen bizitasun digitala hobetu dadin}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale della lingua basca (versione basca)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Basque}, PAGES = {27}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Basque_EU.pdf}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443051, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Kit de Supervivencia Lingüística Digital del Euskera-Recomendaciones del DLDP para mejorar la Vitalidad Digital del euskera}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del basco (versione spagnola)}, KEYWORDS = {digital diversity, digital vitality, recommendations, Basque, digital survival}, PAGES = {28}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Basque_ES.pdf}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443020, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Digital Language Survival Kit}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale delle lingue (versione inglese integrale)}, KEYWORDS = {sopravvivenza digitale, lingue minoritarie, less-resourced languages}, PAGES = {38}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_443047, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Roadmap}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione inglese integrale)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {19}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Roadmap.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483247, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Roadmap-Policy Recommendations & Timeline}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione inglese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_EN.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483251, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {DLDP etenemissuunnitelma-Toimenpidesuunnitelmat ja aikajana}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione finlandese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_FI.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483254, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {La DLDP Hoja de Ruta-Políticas recomendadas & Cronograma}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione spagnola sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_ES.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483255, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Diversità Linguistica Digitale: la Roadmap-Raccomandazioni strategiche & Sequenza}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione italiana sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_IT.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483256, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {DLDP Bide Orria-Gomendatutako politikak & Kronograma}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione basca sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_EU.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483262, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Die DLDP Roadmap-Strategieempfehlungen & Zeitplan}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione tedesca sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_DE.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483263, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {La Roadmap DLDP-Recommandations de politique et calendrier}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione francese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_FR.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HSBCGKQRSS_443354, AUTHOR = {Hicks, D. and Soria, C. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A.}, TITLE = {Pak treuzveviñ ar Brezhoneg niverel-Erbedoù an DLDP evit gwellaat buhezegezh niverel ar brezhoneg}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del bretone (versione bretone)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Breton_BR.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HSBCGKQRSS_443359, AUTHOR = {Hicks, D. and Soria, C. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A.}, TITLE = {Kit de survie numerique pour la langue bretonne-Les recommandations du DLDP pour améliorer la vitalité numérique du Breton}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del bretone (versione francese)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Breton_FR.pdf}, } @TECHREPORT{SALONEN_2018_TECHREPORT_SBCGHKQRSS_443365, AUTHOR = {Salonen, T. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Sarhimaa, A. and Soria, C.}, TITLE = {Karjalan digitaalinen kielenselviytymispakkaus-DLDP-suositukset karjalan kielen digitaalisen elinvoimaisuuden parantamiseksi}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale della lingua careliana (versione finlandese)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Karelian}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Karelian_FI.pdf}, } @TECHREPORT{SALONEN_2018_TECHREPORT_SBCGHKQRSS_483261, AUTHOR = {Salonen, T. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Sarhimaa, A. and Soria, C.}, TITLE = {Karjalan digitualine hengihjiämispakkavus-DLDP-rekomendatsiet karjalan kielen digitualizen elinvoimazuon kohendamizeh}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua careliana (versione careliana)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Karelian}, PAGES = {12}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Karelian_KRL.pdf}, } @MISC{CEBERIO_2018_MISC_CGSRQ_440548, AUTHOR = {Ceberio, K. and Gurrutxaga, A. and Soria, C. and Russo, I. and Quochi, V.}, TITLE = {How to Use the Digital Language Vitality Scale}, YEAR = {2018}, ABSTRACT = {The Digital Language Vitality Scale is an instrument developed within the framework of the Digital Language Diversity Project (www.dldp.eu) for estimating the degree of digital vitality of any given language. It aims to be an instrument for self-assessment of the digital vitality of any language, although it is aimed in particular at identifying current gaps, needs and requirements regarding the extent to which a language community is active/vital on digital media and devices so that adequate digital language planning can be done. This document instructs prospective adopters on how to best use it.}, KEYWORDS = {Diversità Linguistica, BLARK, Sopravvivenza linguistica digitale}, PAGES = {18}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Vitality-Scale.pdf}, } @INCOLLECTION{SORIA_2017_INCOLLECTION_S_382104, AUTHOR = {Soria, C.}, TITLE = {What is Digital Language Diversity and why should we care?}, YEAR = {2017}, ABSTRACT = {The relationship between language and the Internet is a growing area of policy interest and academic study, see for instance (MAAYA 2012), (Paolillo et al. 2005), (Pimienta 2001), (Kornai 2013), (Pimienta et al. 2009), (Rehm and Uszkoreit 2012). The emerging picture is one where language profoundly affects a person's experience of the Internet. It determines how much - if any - information you can access on Wikipedia. It orients a person's choices and decisions by shaping the results of a search engine, depending on the language used. It determines the range of services that can be available over the Internet, and therefore the amount of everyday tasks (such as buying a ticket, reviewing opinions about hotel and restaurants, purchasing books or other goods, etc.) that can be carried out virtually. Far from infinite, the Internet, it seems, is only as big as one's language. Should this hold true, it would be at odds with the original spirit of the Internet, which - according to the words of Tim Berners-Lee - would be a place "to cross barriers and connect cultures". But it is safe to argue that the extent to which a language can be used over the Internet not only affects a person's experience and choice of opportunities; it also affect the language itself. If a language is poorly or not supported to be used over digital devices, for instance if the keyboard of the PC is not equipped with the characters and diacritics necessary to write in the language, or if there is no spell checker for a language, then its usability becomes severely affected, and it might will never be used online. The language could become "digitally endangered", and its value and profile could be lessened, especially in the eyes of the new generations. These considerations call for closer examination of a number of related issues. First, the "digital language diversity", i.e. the linguistic diversity of the Internet. Second, it is important to reflect on the conditions that make it possible for a language to be used over digital devices, and about what can be done in order to grant this possibility to languages other than so-called "major" ones.}, KEYWORDS = {digital language diversity}, PAGES = {13-28}, URL = {http://www.linguapax.org/wp-content/uploads/2015/03/LinguapaxReview2016web.pdf}, } @EDITORIAL{PRETORIUS_2017_EDITORIAL_PS_382299, AUTHOR = {Pretorius, L. and Soria, C.}, TITLE = {Language Resources and Evaluation. Special Issue: Collaboration and Computing for Under-resourced Languages}, YEAR = {2017}, ABSTRACT = {Special issue of the journal "Language Resources and Evaluation", dedicated to under-resourced languages}, KEYWORDS = {under-resourced languages, language resources, minority languages, endangered languages, small languages}, PAGES = {891-1084}, URL = {https://link.springer.com/journal/10579/51/4/page/1}, VOLUME = {51}, DOI = {10.1007/s10579-017-9405-8}, PUBLISHER = {Springer (Berlin, DEU)}, } @EDITORIAL{SORIA_2017_EDITORIAL_SRQ_382301, AUTHOR = {Soria, C. and Russo, I. and Quochi, V.}, TITLE = {Reports on Digital Language Diversity in Europe}, YEAR = {2017}, ABSTRACT = {In these reports we present the results of the first survey about the actual needs of European minority languages speakers in terms of digital opportunities}, KEYWORDS = {regional languahges, minority languages, digital vitality, digital use}, URL = {http://www.dldp.eu/content/reports-digital-language-diversity-europe}, } @EDITORIAL{PRETORIUS_2017_EDITORIAL_PS_382062, AUTHOR = {Pretorius, L. and Soria, C.}, TITLE = {Introduction to the Special Issue}, YEAR = {2017}, KEYWORDS = {language resources, under-resourced languages, minority languages}, PAGES = {891-895}, URL = {https://link.springer.com/article/10.1007%2Fs10579-017-9405-8}, VOLUME = {51}, DOI = {10.1007/s10579-017-9405-8}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, BOOKTITLE = {Language resources and evaluation (Print)}, } @INPROCEEDINGS{RUSSO_2017_INPROCEEDINGS_RS_382094, AUTHOR = {Russo, I. and Soria, C.}, TITLE = {Digital Language Diversity on New Media: the DLDP Survey about European Minority Languages Speakers}, YEAR = {2017}, ABSTRACT = {How does the linguistic diversity of Europe reflect in the New Media? Do regional and minority languages contribute to EU digital language diversity? In this paper we will present the results of the first survey about actual needs of European minority languages speakers regarding digital opportunities. The survey is part of the work carried out by the Digital Language Diversity Project (DLDP), a three-year Erasmus+ project started in September 2015. The goal of DLDP is helping minority languages speakers in the acquisition of intellectual and practical skills to create, share, and reuse online digital content, at the same time defining general guidelines and best practices for the promotion of minority languages with poor digital representation, a fact that further prevents their usability on digital media and devices. The focus of the project is on four European minority languages at different stages of digital developments (Basque, Breton, Karelian and Sardinian), and this will enable a comparison about the role of the availability of digital content for promotion of digital usage of these languages and development of language-based digital applications. With the aim of understanding the specific needs and the peculiar behaviours of speakers of these languages, during Spring 2016 we conducted a survey focused on gathering information about their personal digital use of the language and about any known digital resource and services that make use of the language. We received feedback from almost 2000 speakers and we are now in the position of analysing results for future actions. In particular, taking into account media user typology elaborated by Brandtzæg (2010) (e.g. entertainment, instrumental and advanced users) we aim to profile speakers' answers according to these different classes, in order to better understand how to make speakers aware of the opportunities new media offer for preservation and revitalisation of minority languages.}, KEYWORDS = {minority languages, regional languages, new media, digital language diversity, digital language development}, URL = {https://minoritylanguagesnewmedia2017.files.wordpress.com/2017/03/final_abstracts-baal-cup-seminar-on-minority-languages-in-new-media.pdf}, CONFERENCE_NAME = {BAAL-Cambridge University Press Seminar on Minority Languages in New Media}, CONFERENCE_DATE = {27-28/4/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382071, AUTHOR = {Soria, C.}, TITLE = {The digital language vitality scale: a model for assessing digital vitality of languages}, YEAR = {2017}, ABSTRACT = {In this paper, we present the Digital Language Vitality Scale, a tool for measuring the degree of digital vitality of languages. Digital vitality can be defined as the extent to which a language is present, used and usable over the Internet through digital devices. The scale is inspired to ethnolinguistic vitality assessment (such as GIDS, Fishman 2001), updated by (Lewis and Simons 2010) as EGIDS, and the UNESCO "nine factors" (UNESCO 2003), and is based on previous work in this area such as (Kornai 2013) and (Gibson 2015). Seven levels of digital vitality are identified, from "pre-digital" to "digitally thriving", and a set of associated indicators. The indicators associated with the scale are proxies representing both digital representation (presence) of a language and digital use. They are clustered into three groups: a first group of indicators refers to digital usability of a language, for instance, the existence of Internet connection or the availability of standardised fonts for writing the language. A second group of indicators is related to the quality and amount of digital use of a language: if and how much a language is used for texting and emailing, on websites, blogs, if there are e-books, Wikipedias, if the language is used on social media. The last group of indicators correlates with the digital prestige of a language; they are a sign of a language that not only is used on digital media and devices, but it is so in a full-fledged way, enjoying the widest possible ranges of uses and applications (e.g. localised digital services, machine translation, edu-tainment products and services). The scale is currently being used in the context of the DLDP project (http://www.dldp.eu) as an assessing instrument for digital language planning, with particular reference to regional and minority languages.}, KEYWORDS = {digital vitality, language vitality, digital language diversity}, PAGES = {100-100}, URL = {https://icriml.indiana.edu/conference-program/Abstractbook.pdf}, CONFERENCE_NAME = {First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_PLACE = {Barcelona/Vic}, CONFERENCE_DATE = {19-21/04/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382081, AUTHOR = {Soria, C.}, TITLE = {Language policies and speakers' attitudes: evaluating the impact of official recognition on some of Italy's regional languages}, YEAR = {2017}, ABSTRACT = {The panel focuses on the minority (some of them highly endangered) languages of Italy, with a special attention to those which are not recognized (nor supported) by the Italian Government. Key points will be a. the official language policy of Italy, b. language discrimination, c. language ideology and d. the ambiguous role of academic institutions vis-à-vis languages and dialects, e. the effects (and results) of official support for recognized minority languages, as well as f. grassroots approaches to the standardization and development of unrecognized languages and new developments on the net.}, KEYWORDS = {minority languages, multilingualism, language policy}, PAGES = {42-42}, URL = {https://icriml.indiana.edu/conference-program/Abstractbook.pdf}, CONFERENCE_NAME = {First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_DATE = {19-21/04/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382083, AUTHOR = {Soria, C.}, TITLE = {Inquiring current digital use and usability of regional and minority languages: the DLDP survey}, YEAR = {2017}, ABSTRACT = {The Digital Language Diversity Project is a three-year project funded under the Erasmus+ programme that addresses the problem of low digital representation and use of EU regional and minority languages, a cause for their endangerment according to some scholars. One of the first actions of the project is to assess the current use and usability of four EU regional/minority languages, representing very different degrees of digital language representation and use: these languages are Sardinian, Karelian, Basque and Breton. From June to September 2016, the DLDP project has been spreading a survey, that was localized and translated into these languages. The survey is developed on the basis of previous work carried out in the area of ethnolinguistic vitality, such as the ELDIA Barometer, and other inquiries addressing specifically digital use of languages and availability and usability of digital resources and media. The DLDP survey consists of a general part collecting basic information on the informant (age, sex, proficiency level in the language, frequency of use, etc.). The second part is focused on gathering information about his/her personal digital use of the language and about any known digital resource and services that make use of the language. The survey is the first ever study of the digital needs of minority language speakers. It will give stakeholders and academia a detailed view into what actual language speakers are thinking about in terms of how they want to develop provision for their languages in the digital sphere. Therefore, we strongly encourage wide adoption and dissemination of the survey to regional and minority languages beyond the four investigated. The workshop intends to illustrate and discuss the model survey, to share it with researchers interested in adopting it for other languages, and to discuss collaboration paths.}, KEYWORDS = {digital language use, regional languages, minority languages, digital language vitality}, PAGES = {44-44}, URL = {https://icriml.indiana.edu/conference-program/Abstractbook.pdf}, CONFERENCE_NAME = {First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_DATE = {19-21/04/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382086, AUTHOR = {Soria, C.}, TITLE = {Alliances for digital linguistic diversity}, YEAR = {2017}, ABSTRACT = {Linguapax proposes a complementary pair of Roundtable discussions in the Conference's thematic line 1: The value of linguistic diversity, from an operational perspective. This "diptych" will present different types of alliances that create contexts for the preservation and continuation of linguistic diversity (RT 1), and will show how a plural perspective on linguistic diversity can emerge, drawing on Linguapax's experience as an international network (RT 2). In the first Roundtable discussion we will tackle the generation of different contexts of interaction (networks and alliances) to enhance the presence and vitality of linguistic diversity - those contexts emerging from linguistic, cultural and digital networks (although should be - in theory- mostly overlapping, an overview of their specific potentialities is useful and needed): 1. "Linguistic cooperation": International networks of projects/exchange of experiences in language revitalisation. Andoni Barreña (Garabide Elkartea, Basque Country); 2. Alliances for digital linguistic diversity. Claudia Soria. Consiglio Nazionale delle Ricerche. Pisa. 3. International cultural cooperation. Although "cultural cooperation" should be the common umbrella, it is usually driven by a specific logic in which linguistic diversity rarely comes consistently into play. On this occasion for dialogue, the Round Table will expose linguists/activists to a potentially useful vocabulary and world of networks and platforms. Jordi Pascual, expert on international cultural relations. This Roundtable aims to harness the relational potential of the Conference: On one hand, putting into dialogue two contributions already planning individual participation (1, 2), and on the other, inviting an 74 external perspective to crossfertilise the debate and increase the scope for networking. Given the operational approach of this Roundtable, active interaction with the audience will be a key element for achieving the desired multiplier effect. Moderator: Alícia Fuentes-Calle. Linguapax (Barcelona). Departament de Lingüística. Universitat de Barcelona.}, KEYWORDS = {linguistic diversity, digital language diversity}, URL = {https://publications.cnr.it/doc/382086}, CONFERENCE_NAME = {ROUNDTABLE DISCUSSION-Linguapax-I. Generating contexts for linguistic diversity to thrive: networks of linguistic, cultural and digital cooperation. First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_DATE = {19-21/04/2017}, } @TECHREPORT{RUSSO_2017_TECHREPORT_RS_382302, AUTHOR = {Russo, I. and Soria, C.}, TITLE = {Sardinian-a digital language?}, YEAR = {2017}, ABSTRACT = {In this report we present the results of the first survey about the actual needs of Sardinian speakers in terms of digital opportunities}, KEYWORDS = {digital use, digital language diversity, Sardinian}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Sardinian-Report.pdf}, } @EDITORIAL{SORIA_2016_EDITORIAL_SPDMSW_355531, AUTHOR = {Soria, C. and Pretorius, L. and Declerck, T. and Mariani, J. and Scannell, K. and Wandl Vogt, E.}, TITLE = {CCURL 2016 Collaboration and Computing for Under-Resourced Languages: Towards an Alliance for Digital Language Diversity}, YEAR = {2016}, ABSTRACT = {Atti del Workshop "CCURL 2016 Collaboration and Computing for Under-Resourced Languages: Towards an Alliance for Digital Language Diversity"}, KEYWORDS = {Less-resourced languages, Language Technology, digital language vitality, digital language diversity}, PAGES = {1-103}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/workshops/LREC2016Workshop-CCURL2016_Proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, } @INPROCEEDINGS{DELGRATTA_2016_INPROCEEDINGS_DFMPRBKSC_355425, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Khan, F. and Soria, C. and Calzolari, N.}, TITLE = {LREC as a Graph: People and Resources in a Network}, YEAR = {2016}, ABSTRACT = {This proposal describes a new way to visualise resources in the LREMap, a community-built repository of language resource descriptions and uses. The LREMap is represented as a force-directed graph, where resources, papers and authors are nodes. The analysis of the visual representation of the underlying graph is used to study how the community gathers around LRs and how LRs are used in research.}, KEYWORDS = {Language Resources, Resources Documentation, Data Visualisation}, PAGES = {2529-2532}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{SORIA_2016_INPROCEEDINGS_SRQHGST_355526, AUTHOR = {Soria, C. and Russo, I. and Quochi, V. and Hicks, D. and Gurrutxaga, A. and Sarhimaa, A. and Tuomisto, M.}, TITLE = {Fostering digital representation of EU regional and minority languages: the Digital Language Diversity Project}, YEAR = {2016}, ABSTRACT = {Poor digital representation of minority languages further prevents their usability on digital media and devices. The Digital Language Diversity Project, a three-year project funded under the Erasmus+ programme, aims at addressing the problem of low digital representation of EU regional and minority languages by giving their speakers the intellectual an practical skills to create, share, and reuse online digital content. Availability of digital content and technical support to use it are essential prerequisites for the development of language-based digital applications, which in turn can boost digital usage of these languages. In this paper we introduce the project, its aims, objectives and current activities for sustaining digital usability of minority languages through adult education.}, KEYWORDS = {Less-resourced languages, Language Technology, digital language vitality, digital language diversity}, PAGES = {3256-3260}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, } @INPROCEEDINGS{DOLOWYRYBINSKA_2016_INPROCEEDINGS_DS_354799, AUTHOR = {Dolowy Rybinska, N. and Soria, C.}, TITLE = {Surveying the ethnolinguistic vitality of two regional collateral languages: the case of Kashubian and Piedmontese}, YEAR = {2016}, ABSTRACT = {The paper presents the results of a Polish-Italian research project concerning the vitality of two regional collateral languages: Kashubian in Poland and Piedmontese in Italy. Despite their diffeent status (Kashubian is a language recognised under the Polish law while Piedmontese is not), they are both perceived as dialects of the State language by the inhabitants of Poland and Italy. The status and prestige of both languages in their respective countries are low; consciousness about the importance of their maintenance within the communities and outside them is weakening. As they belong to the same language family as the dominant language they were/are treated as dialects of the State languages not worth of preservation. Current accounts of language vitality for Kashubian and Piedmontese are not entirely satisfactory in that they seem to overestimate the importance of the number of speakers over speakers' attitudes and stigma. In this paper, we will present the preliminary results of the survey, focusssing on the interdependence between actual and perceived use of the two languages on the one side, and different ethnolinguistic vitality parameters, such as self-assessment of language proficiency, awareness of the language institutional status and policies, attitudes towards the language, and language ideology. This research is a pilot study that aims to raise the discussion on current assessment of ethnolinguistic vitality and to broaden it to other languages that are contested, unrecognized or treated as dialects of the State languages.}, KEYWORDS = {regional and minority languages ethnolinguistic vitality study}, URL = {https://publications.cnr.it/doc/354799}, CONFERENCE_NAME = {Contested Languages in the Old World #2}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {5-6/05/2016}, } @INCOLLECTION{SORIA_2015_INCOLLECTION_S_333636, AUTHOR = {Soria, C.}, TITLE = {Assessing the effect of official recognition on the vitality of minority and regional languages: a case study from Italy}, YEAR = {2015}, ABSTRACT = {In 1999, a rather controversial Italian law granted official recognition to twelve endangered regional and minority languages but denied it to others that were nevertheless also classed as endangered by UNESCO and the Ethnologue. This turn of events has produced a perfect scenario to assess the impact of language policies on protected languages and, at the same time, the effects of lack of official protection and recognition for languages that are denied such institutional support. This chapter presents the results of a survey carried out among speakers of these endangered languages. It assesses their vitality in terms of speaker numbers, domains of use, intergenerational transmission and speaker attitudes, arguing that a correlation can be established, on the one hand, between positive speaker attitudes and favourable language policies and, on the other, between lack of policy support and negative language attitudes. The chapter further argues language policy can actually alter linguistic behaviour.}, KEYWORDS = {language policy, endangered languages, regional languages}, PAGES = {123-137}, URL = {https://publications.cnr.it/doc/333636}, PUBLISHER = {Cambridge university press (Cambridge, GBR)}, ISBN = {978-1-107-09922-7}, BOOKTITLE = {Policy and Planning for Endangered Languages}, EDITOR = {Jones, M. C.}, } @INPROCEEDINGS{DELGRATTA_2015_INPROCEEDINGS_DFMPRBGKQSC_342213, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Goggi, S. and Khan, F. and Quochi, V. and Soria, C. and Calzolari, N.}, TITLE = {Visualising Italian Language Resources: a Snapshot}, YEAR = {2015}, ABSTRACT = {This paper aims to provide a first snapshot of Italian Language Resources (LRs) and their uses by the community, as documented by the papers presented at two different conferences, LREC2014 and CLiC-it 2014. The data of the former were drawn from the LOD version of the LRE Map, while those of the latter come from manually analyzing the proceedings. The results are presented in the form of visual graphs and confirm the initial hypothesis that Italian LRs require concrete actions to enhance their visibility.}, KEYWORDS = {Italian Language Resources}, PAGES = {100-104}, URL = {https://books.openedition.org/aaccademia/1277?lang=it}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 December 2015}, BOOKTITLE = {Proceedings of the Second Italian Conference on Computational Linguistics CLiC-it 2015}, EDITOR = {Bosco, C. and Tonelli, S. and Zanzotto, F. M.}, } @INPROCEEDINGS{SORIA_2015_INPROCEEDINGS_S_332517, AUTHOR = {Soria, C.}, TITLE = {Towards a notion of "Digital Language Diversity"}, YEAR = {2015}, ABSTRACT = {This paper introduces the concept of digital language diversity and advocates for its increase in order to foster the digital vitality of languages, and secure their overall vitality.}, KEYWORDS = {digital language diversity, NLP, less-resourced languages, regional languages, minority languages, digital rights}, PAGES = {111-125}, URL = {https://publications.cnr.it/doc/332517}, CONFERENCE_NAME = {3rd International Conference on Linguistic and Culturaol Diversity in Cyberspace}, CONFERENCE_PLACE = {Yakutsk, Russian Federation}, CONFERENCE_DATE = {30/06/2014-03/07/2014}, BOOKTITLE = {Linguistic and Cultural Diversity in Cyberspace-Proceedings of the 3rd International Conference}, EDITOR = {Kuzmin, E. and Parshakova, A. and Ignatova, D.}, } @INPROCEEDINGS{SORIA_2015_INPROCEEDINGS_S_332521, AUTHOR = {Soria, C.}, TITLE = {Towards an Alliance for Digital Language Diversity: Vision, Goals, and Challenges}, YEAR = {2015}, ABSTRACT = {In order to foster the world's digital language diversity, and to ensure equal digital opportunities for languages, we encourage the creation of an Alliance for Digital Language Diversity, i.e. a network of different stakeholders involved in the creation and deployment of data. The Alliance needs - and presupposed - educational activities aimed at building the necessary digital skills and creating the psychological self-confidence necessary for speakers to produce data using their mother tongue.}, KEYWORDS = {digital language diversity, less-resourced languages, minority languages, digital rights, data production}, URL = {https://publications.cnr.it/doc/332521}, CONFERENCE_NAME = {Ugra Global Expert Meeting on Multilingualism in Cyberspace}, CONFERENCE_PLACE = {Khany-Maniysk, Russia}, CONFERENCE_DATE = {4-9/07/2015}, } @ARTICLE{SORIA_2014_ARTICLE_SCMQBCMOP_285553, AUTHOR = {Soria, C. and Calzolari, N. and Monachini, M. and Quochi, V. and Bel, N. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {The language resource Strategic Agenda: the FLaReNet synthesis of community recommendations}, YEAR = {2014}, ABSTRACT = {The main purpose of this paper is to serve as a landmark for future research and in particular for future strategic, infrastructural and coordination initiatives. It presents a preliminary plan for actions and infrastructures that could become the basis for future initiatives in the sector of Language Resources and Technologies (LRTs). The FLaReNet Language Resource Strategic Agenda presents a set of recommen- dations for the development and progress of LRT in Europe, as issued from a three- year consultation of the FLaReNet European project. Recommendations cover a broad range of topics and activities, spanning over production and use of language resources, licensing, maintenance and preservation issues, infrastructures for language resour- ces, resource identification and sharing, evaluation and validation, interoperability and policy issues. The intended recipients belong to a large set of players and stakeholders in LRT, ranging from individuals to research and education institutions, to policy- makers, funding agencies, SMEs and large companies, service and media providers}, KEYWORDS = {Strategic agenda, Language resources planning, Recommended priority actions}, PAGES = {753-775}, URL = {https://publications.cnr.it/doc/285553}, VOLUME = {48}, DOI = {10.1007/s10579-014-9279-y}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @INCOLLECTION{CALZOLARI_2014_INCOLLECTION_CNMQST_286868, AUTHOR = {Calzolari and Nicoletta and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Lexicons, Terminologies, Ontologies: Reflections from Experiences in Resource Construction}, YEAR = {2014}, ABSTRACT = {This contribution aims at highlighting the strong interconnection between lexicons, terminologies and ontologies and especially the fundamental role that ontologies and lexica mutually play. Our view is that lexical resources are evolving in nature, from ontologically based lexicons we are going towards lexically based ontologies. We explore different instantiations of the current trend of using formal ontologies as a core module of computational lexicons, presenting the advantages especially in multilingual and terminological contexts. We present work showing that the lexical knowledge already present in non formal computational lexicons can be exploited to derive or enrich a formal ontology without much manual effort. In the terminology domain, we describe the construction of a resource for biology, directly linked to a parallel domain-ontology, that combines characteristics of both lexicons and terminologies, so that is can allow for intelligent access to content. Finally, we describe our experience in two projects in which formal ontologies play a central role in the context of multilingual computational lexicons, where the ontology is what acts as the glue among the different monolingual lexicons and what provides cross-lingual reasoning capabilities.}, KEYWORDS = {Computational Lexicons, Ontology, Terminology, Interoperability, Standards}, PAGES = {103-121}, URL = {http://www.springer.com/computer/ai/book/978-3-642-45326-7}, VOLUME = {8003}, DOI = {10.1007/978-3-642-45327-4_7}, PUBLISHER = {Springer (Berlin Heidelberg, DEU)}, ISBN = {978-3-642-45326-7}, BOOKTITLE = {Language, Culture, Computation. Computational Linguistics and Linguistics. Essays Dedicated to Yaacov Choueka on the Occasion of His 75th Birthday, Part III}, EDITOR = {Dershowitz, N. and Nissan, E.}, } @EDITORIAL{PRETORIUS_2014_EDITORIAL_PSB_285396, AUTHOR = {Pretorius, L. and Soria, C. and Baroni, P.}, TITLE = {Proceedings of the Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)}, YEAR = {2014}, ABSTRACT = {Proceedings del Workshop su Collaborazione e Computazione per le Lingue con Risorse Insufficienti nell'era dei Dati Aperti Collegati (CCURL 2014 | Reykjavik, 26/05/2014)}, KEYWORDS = {under-resourced languages}, PAGES = {107}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, } @INPROCEEDINGS{DELGRATTA_2014_INPROCEEDINGS_DFKMS_285395, AUTHOR = {Del Gratta, R. and Frontini, F. and Khan, F. and Mariani, J. and Soria, C.}, TITLE = {The LREMap for Under-Resourced Languages}, YEAR = {2014}, ABSTRACT = {A complete picture of currently available language resources and technologies for the under-resourced languages of Europe is still lacking. Yet this would help policy makers, researchers and developers enormously in planning a roadmap for providing all languages with the necessary instruments to act as fully equipped languages in the digital era. In this paper we introduce the LRE Map and show its utility for documenting available language resources and technologies for under-resourced languages. The importance of the serialization of the LREMap into (L)LOD along with the possibility of its connection to a wider world is also introduced.}, KEYWORDS = {language resources, less-resourced languages, linguistic linked open data}, PAGES = {78-83}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, CONFERENCE_NAME = {Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)}, CONFERENCE_PLACE = {Reykjavik}, CONFERENCE_DATE = {26/05/2014}, BOOKTITLE = {Proceedings of the Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)}, EDITOR = {Pretorius, L. and Soria, C. and Baroni, P.}, } @INPROCEEDINGS{RANDACCIO_2014_INPROCEEDINGS_RSZ_285389, AUTHOR = {Randaccio, S. and Soria, C. and Zoli, C.}, TITLE = {Standardized orthography: a shoe for barefoot}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/285389}, CONFERENCE_NAME = {Language Endangerment: Orthography Development for Language Maintenance and Revitalisation}, CONFERENCE_PLACE = {Cambridge, UK}, CONFERENCE_DATE = {04/07/2014}, } @INPROCEEDINGS{SORIA_2014_INPROCEEDINGS_S_285384, AUTHOR = {Soria, C.}, TITLE = {Regional and Minority Languages of Italy, and the importance of terminology}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/285384}, CONFERENCE_NAME = {ELEN General Assembly}, CONFERENCE_PLACE = {Helsinki, Finlandia}, CONFERENCE_DATE = {04/10/2014}, } @INPROCEEDINGS{SORIA_2014_INPROCEEDINGS_S_285385, AUTHOR = {Soria, C.}, TITLE = {Towards a notion of "Digital Language Diversity": the role of technologies for preserving multilingualism}, YEAR = {2014}, KEYWORDS = {digital language diversity, NLP, less-resourced languages, regional languages, minority languages, digital rights}, URL = {https://publications.cnr.it/doc/285385}, CONFERENCE_NAME = {Seminario tenuto presso Department of English and Linguistics, University of Mainz}, CONFERENCE_PLACE = {Mainz}, CONFERENCE_DATE = {10/06/2014}, } @INPROCEEDINGS{SORIA_2014_INPROCEEDINGS_S_285388, AUTHOR = {Soria, C.}, TITLE = {Towards a notion of Digital Language Diversity}, YEAR = {2014}, KEYWORDS = {digital language diversity, NLP, less-resourced languages, regional languages, minority languages, digital rights}, URL = {https://publications.cnr.it/doc/285388}, CONFERENCE_NAME = {International Conference Linguistic Diversity in Cyberspace}, CONFERENCE_PLACE = {Yakutsk, Federazione Russa}, CONFERENCE_DATE = {28/06/2014-03/07/2014}, } @INCOLLECTION{CALZOLARI_2013_INCOLLECTION_CMS_280537, AUTHOR = {Calzolari, N. and Monachini, M. and Soria, C.}, TITLE = {LMF-Historical Context and Perspectives}, YEAR = {2013}, ABSTRACT = {The importance of designing standards for language resources (LR) is firmly established, starting with the Expert Advisory Group for Language Engineering (EAGLES) and International Standards for Language Engineering (ISLE) initiatives. Both EAGLES and ISLE stress the importance of reaching a consensus on (linguistic and nonlinguistic) "content", in addition to agreement on formats and encoding issues, and also address the needs of content processing and Semantic Web technologies. The recommendations for standards and best practices issued within the projects became, through the INTERA and mainly the LIRICS project, the International Organization for Standardization (ISO) within the ISO TC37/SC4 committee, where Lexical Markup Framework (LMF) was developed. Standards are fundamental to exchange, preserve, maintain and integrate data and LRs, to achieve interoperability in general, and they are an essential basis of any LR infrastructure.}, KEYWORDS = {EAGLES, international standards for language engineering, interoperability, lexical markup framework (LMF)}, PAGES = {1-18}, URL = {http://dx.doi.org/10.1002/9781118712696.ch1}, DOI = {10.1002/9781118712696.ch1}, PUBLISHER = {John Wiley \& Sons, Inc (Hoboken, USA)}, ISBN = {978-1-118-71259-7}, BOOKTITLE = {LMF Lexical Markup Framework}, EDITOR = {Gil, F. and Patrick, P.}, } @INCOLLECTION{HAYASHI_2013_INCOLLECTION_HMSSC_285427, AUTHOR = {Hayashi, Y. and Monachini, M. and Savas, B. and Soria, C. and Calzolari, N.}, TITLE = {LMF as a Foundation for Servicized Lexical Resources}, YEAR = {2013}, ABSTRACT = {This chapter argues that the lexical markup framework (LMF) can play a significant role in realizing servicized lexical resources on the Web. To accomplish this goal, it begins with a brief introduction of the notion of servicized resources, and then presents a technical architecture of, what is called, LMF-aware lexicon access services. It presents two implementation showcases to demonstrate the applicability of the LMF and to discuss its possible extensions. The first example deals with WordNet-type computational semantic lexicons, while the other takes up a machine-readable bilingual dictionary primarily compiled for human usage. To conclude the chapter, the final sections summarize the results while reviewing related work.}, KEYWORDS = {lexical markup framework (LMF), LMF-aware lexicon access services, servicized lexical resources}, PAGES = {201-213}, URL = {http://onlinelibrary.wiley.com/doi/10.1002/9781118712696.ch14/references}, DOI = {10.1002/9781118712696.ch14}, PUBLISHER = {Wiley-ISTE (Hoboken, USA)}, ISBN = {9781118712696}, BOOKTITLE = {LMF-Lexical Markup Framework}, EDITOR = {Francopoulo, G.}, } @INCOLLECTION{VOSSEN_2013_INCOLLECTION_VSM_285402, AUTHOR = {Vossen, P. and Soria, C. and Monachini, M.}, TITLE = {Wordnet-LMF: A Standard Representation for Multilingual Wordnets}, YEAR = {2013}, ABSTRACT = {Wordnet-lexical markup framework (LMF) is an instantiation of LMF for representing Wordnet-like semantic dictionaries. Wordnet is a widely accepted resource and thus provides a good case for testing the viability of a representation in LMF and the acceptance by a wide range of users. Wordnet-LMF was developed in the framework of the EU project KYOTO for the specific purpose of endowing a set of wordnets with a standardized interoperability format allowing the interchange of semantic information. This chapter explains the choices that were made to model the wordnet information in LMF. It provides a preliminary assessment of LMF, by large-scale application to real lexical resources, endowing wordnet with a format representation that allows easier integration among resources sharing the same structure and, more importantly, across resources with different theoretical and implementation approaches.}, KEYWORDS = {KYOTO project, multilingual wordnets, Wordnet-lexical markup framework}, PAGES = {51-66}, URL = {http://dx.doi.org/10.1002/9781118712696.ch4}, DOI = {10.1002/9781118712696.ch4}, PUBLISHER = {Wiley-ISTE (Hoboken, USA)}, ISBN = {9781118712696}, BOOKTITLE = {LMF-Lexical Markup Framework}, EDITOR = {Francopoulo, G.}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_SM_225750, AUTHOR = {Soria, C. and Mariani, J.}, TITLE = {Searching LTs for minority languages}, YEAR = {2013}, ABSTRACT = {Les Technologies de la Langue (TL) sont un instrument nécessaire pour toutes les langues, en particulier celles qui aspirent à conquérir un espace dans les dispositifs numériques. Les langues qui ne sont pas équipées de technologies sont sérieusement menacées d'extinction numérique dans le long terme. Le projet META-NET a évalué l'état actuel des TL pour 30 langues européennes. Mais qu'en est-il des langues régionales et minoritaires ? Très peu d'informations sont disponibles pour elles. Dans cet article, nous présentons d'abord les données disponibles dans la LRE Map. Nous plaidons ensuite en faveur d'une campagne donnant une image complète des TL existant pour les langues minoritaires et régionales d'Europe. Cela aidera les décideurs, les chercheurs et les développeurs à planifier une feuille de route pour doter toutes les langues des instruments nécessaires pour fonctionner comme des langues correctement équipées dans l'ère numérique. Un recensement des TL disponibles et nécessaires est ainsi proposé.}, KEYWORDS = {Lingue regionali e minoritarie, risorse linguistiche, tecnologie linguistiche}, PAGES = {235-247}, URL = {http://www.taln2013.org/actes/www/TALARE-2013/actes/talare-2013-long-005.pdf}, CONFERENCE_NAME = {TALARE 2013: Traitement Automatique des Langues Régionales de France et d'Europe}, CONFERENCE_PLACE = {Les Sables d'Olonne}, CONFERENCE_DATE = {17-21 giugno 2013}, BOOKTITLE = {Actes de TALARE 2013: Traitement Automatique des Langues Régionales de France et d'Europe}, EDITOR = {Morin, E. and Estève, Y.}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_SMZ_285446, AUTHOR = {Soria, C. and Mariani, J. and Zoli, C.}, TITLE = {Dwarfs sitting on the giants' shoulders-how LTs for regional and minority languages can benefit from piggybacking major languages}, YEAR = {2013}, ABSTRACT = {LTs are a necessary instrument for all languages, especially for those aiming at conquering a space over digital devices. Languages that are not equipped with LT seriously face digital extinction in the long run. Many challenges are to be faced to equip minority languages with LTs (from basic to advanced): the almost complete lack of knowledge about available resources and technologies, the substantial delay in development of basic technologies, the lack of cooperation among minority languages communities, the chronic shortage of funding (in particular for minority languages not officially recognized, yet often the most vital ones over the Internet) and the limited economic value placed over LTs for minority languages by the digital market rules. In this paper we suggest how these challenges can be overcome, and how coordinated and standardized cooperation among all interested stakeholders can lead to better knowledge and awareness of the breadth and depth of available technologies.}, PAGES = {73-79}, URL = {https://publications.cnr.it/doc/285446}, ISBN = {978-0-9560210-5-2}, CONFERENCE_NAME = {XVII FEL Conference}, CONFERENCE_PLACE = {Ottawa, Canada}, CONFERENCE_DATE = {01/10/2013-04/10/2013}, BOOKTITLE = {Proceedings of the XVII FEL Conference}, EDITOR = {Norris, M. J. and Anonby, E. and Junker, M. and Ostler, N. and Patrick, D.}, } @INPROCEEDINGS{ZOLI_2013_INPROCEEDINGS_ZSR_285441, AUTHOR = {Zoli, C. and Soria, C. and Randaccio, S.}, TITLE = {The status, corpus planning and speakers' attitudes on Romagnol (ISO 639-3: rgn)}, YEAR = {2013}, ABSTRACT = {Even if recognized by Ethnologue with a clear ISO code, Romagnol is still underestimated and often considered an Italian dialect. One of the variants of Gallo-Italic languages, it has a strong linguistic identity and is well determined as Abstandsprache with regards to Veneto and Marchigiano; less clear the boundaries towards west, where there is a dialectal continuum with Emilian (egl). With respect to Emilian, Romagnol is developing as Ausbausprache, being strong and clear the perception the community has of a Romagnol history, territory, ethnicity, separated from that of Emilia. However, it lacks of a relevant factor for its status: a written standard. Institute "Friedrich Schürr" is one of the most active associations in the field of language support, but it has to do a crucial step: from a amateurish and folkloric activism to a real effort for a shared recognition of the language. The creation of a standard spelling is the path to follow for public presence, but there is still big tension between the old and the new vision. The fragmentation of oral varieties of Romagnol has never been a communication barrier, but nowadays it is perceived as a problem in terms of a written standardization. The Italian equation that "the language is written as it is spoken" causes the misleading belief that every difference in pronunciation must be registered in writing. It is actually the opposite: after six years of age we do not read letter by letter but we register the entire word "photo-graphing" it. Therefore, it is not obviously necessary that the phonetics matches the spelling. But minority speakers often get confused between speaking and writing, and fear that the standardization of the language may harm their local dialect. A standardized spelling only makes sense for a written language. If there were, for example, a talk show in Romagna, the titles and explanatory signs would be in standard Romagnol, but the presenter and the guests would talk in their own dialects (as it happens in German Switzerland or in Norway). At the same time, the speakers who fear standardization, also reject the use of tools such as electronic instruments for spellchecking (according to the belief that everyone writes in his or her own way) and do not accept the creation of neologisms because they are alien to the traditional language these speakers learned as children. These attitudes contribute to relegate minority languages such as Romagnol to the status of dialects and prevent them to evolve and flourish. In our presentation we will briefly sketch the dialectal situation of Romagnol, the main standardization problems and the issues arisen in the last years among activists. We will try to demonstrate that standardization is not only necessary but also fundamental if we want to give minority languages such as Romagnol the same status and dignity of national languages.}, KEYWORDS = {orthography, standardization, minority language, regional language}, PAGES = {124-125}, URL = {http://icml14.uni-graz.at/etc/upload/ICML_XIV_programme.pdf}, CONFERENCE_NAME = {14th International Conference on Minority Languages (ICML XIV)}, CONFERENCE_PLACE = {Graz, Austria}, CONFERENCE_DATE = {11/09/2013-17/09/2014}, BOOKTITLE = {International Conference on Minority Languages XIV (ICML XIV)}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_S_285397, AUTHOR = {Soria, C.}, TITLE = {Preserving Digital Language Diversity}, YEAR = {2013}, URL = {https://publications.cnr.it/doc/285397}, CONFERENCE_NAME = {All-Russia methodological seminar on protection of minority languages of peoples of Siberia and the Russian Far East}, CONFERENCE_PLACE = {Biysk, Altai Krai, Russian Federation}, CONFERENCE_DATE = {05/11/2013-08/11/2013}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_S_285434, AUTHOR = {Soria, C.}, TITLE = {You can speak it now: assessing the effect of official recognition on vitality of minority languages}, YEAR = {2013}, ABSTRACT = {In 1999, an Italian controversial law granted official recognition to twelve regional and minority languages, but denied it to others such as Piedmontese, Venetan, Sicilian, Emilian, Romagnol, that nevertheless are rated as endangered by UNESCO and Ethnologue. This particular situation offers an ideal laboratory to assess the impact of language policies on protected languages and at the same time the effect of lack of official protection and recognition on languages denied of institutional support. In this research we have coupled official census data with the results of an extensive survey carried out among speakers of all endangered languages of Italy, recognised or not, to re-assess their vitality in terms of speakers' number, domains of use, intergenerational transmission and speakers' attitudes. After illustrating the methodology adopted for the survey, we will show how the law was largely ineffective in producing quantitatively significant changes, while had a sharp effect on speakers' attitudes. We will argue that official recognition generally improved self-esteem and generated a pride in the language that is severely lacking among speakers of other languages, while lack of top-down recognition can be a powerful element in reinforcing negative feelings and overestimation of the diffculties of language revitalization.}, KEYWORDS = {minority language, regional language, endangered language, institutional support, speakers' attitudes}, URL = {https://publications.cnr.it/doc/285434}, CONFERENCE_NAME = {Conference Language Endangerment: Language Policy and Planning}, CONFERENCE_PLACE = {Cambridge, UK}, CONFERENCE_DATE = {26/07/2013}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_S_317649, AUTHOR = {Soria, C.}, TITLE = {Salviamo l'italiano dall'estinzione digitale}, YEAR = {2013}, URL = {https://publications.cnr.it/doc/317649}, CONFERENCE_NAME = {Salone del Libro di Torino, Convegno "D'Annunzio innovatore"}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {17/05/2013}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_SZR_285444, AUTHOR = {Soria, C. and Zoli, C. and Randaccio, S.}, TITLE = {Why the Internet should speak minority languages-and how. The role of Language Technologies for minority and contested languages}, YEAR = {2013}, ABSTRACT = {Many minority languages that are thriving to get a place in the digital space and are profiting of the new opportunities offered by the Internet and digital devices will seriously face digital extinction if they will not be supported by Language Technologies (Calzolari et al. 2012). LTs (spelling and grammar checkers, electronic dictionaries, localized interfaces, as well as search engines, language translators or information extraction tools) are a necessary instrument to secure usability of minority languages over the web (Soria et al. 2012, Krauwer 2003), thus ensuring those languages equal digital opportunities and raising their profile in the eyes of the younger, digitally-oriented generation. However, there are many challenges to be faced to equip minority languages with LTs (from basic to advanced): a substantial delay in development of basic technologies, a lack of cooperation among minority languages communities, a chronic shortage of funding (in particular for minority languages not officially recognized, yet often the most vital ones over the Internet) and the limited economic value placed over LTs for minority languages by the digital market rules. In this talk, on the basis of concrete examples and a survey about the digital use of minority languages of Italy, we will show how these challenges can be overcome and suggest a roadmap towards sustainable development of LTs for minority languages.}, URL = {https://publications.cnr.it/doc/285444}, CONFERENCE_NAME = {Conference Contested Languages in the Old World}, CONFERENCE_PLACE = {Bangor, UK}, CONFERENCE_DATE = {09/09/2013-10/09/2013}, } @ARTICLE{HAYASHI_2012_ARTICLE_HSMSC_218777, AUTHOR = {Hayashi, Y. and Savas, B. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LMF-aware Web services for accessing semantic lexicons}, YEAR = {2012}, ABSTRACT = {This paper demonstrates that Wordnet-LMF, a version of ISO LMF, allows us to effectively design and implement Web services for accessing WordNettype semantic lexicons that conform to the REST Web service architecture. The implemented prototype service currently provides access to native wordnets as well as to a bilingual concept dictionary. This paper thus describes slight revisions that were made to the Wordnet-LMF specifications to model and accommodate a nonwordnet-native bilingual concept dictionary.}, KEYWORDS = {Lexical markup framework Semantic lexicons Wordnets Language services RESTful Web service design}, PAGES = {253-264}, URL = {http://link.springer.com/content/pdf/10.1007%2Fs10579-012-9181-4.pdf}, VOLUME = {46}, DOI = {10.1007/s10579-012-9181-4}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @BOOK{CALZOLARI_2012_BOOK_CMSS_225736, AUTHOR = {Calzolari, N. and Magnini, B. and Speranza, M. and Soria, C.}, TITLE = {The Italian language in the digital age-La lingua italiana nell'era digitale}, YEAR = {2012}, ABSTRACT = {This white paper is part of a series that promotes knowledge about language technology and its potential. It addresses educators, journalists, politicians, language communities and others. The availability and use of language technology in Europe varies between languages. Consequently, the actions that are required to further support research and development of language technologies also differ for each language. The required actions depend on many factors, such as the complexity of a given language and the size of its community. META-NET, a Network of Excellence funded by the European Commission, has conducted an analysis of current language resources and technologies. This analysis focused on the 23 official European languages as well as other important national and regional languages in Europe. The results of this analysis suggest that there are many significant research gaps for each language. A more detailed expert analysis and assessment of the current situation will help maximise the impact of additional research and minimize any risks. META-NET consists of 54 research centres from 33 countries that are working with stakeholders from commercial businesses, government agencies, industry, research organisations, software companies, technology providers and European universities. Together, they are creating a common technology vision while developing a strategic research agenda that shows how language technology applications can address any research gaps by 2020.}, KEYWORDS = {digital extinction, tecnologie del linguaggio, lingua italiana}, PAGES = {69}, URL = {http://www.springer.com/computer/ai/book/978-3-642-30775-1}, DOI = {10.1007/978-3-642-30776-8}, PUBLISHED = {White Paper Series}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-3-642-30775-1}, EDITOR = {Rehm, G. and Uszkoreit, H.}, } @INPROCEEDINGS{CALZOLARI_2012_INPROCEEDINGS_CDFMRRS_220194, AUTHOR = {Calzolari, N. and Del Gratta, R. and Francopoulo, G. and Mariani, J. and Rubino, F. and Russo, I. and Soria, C.}, TITLE = {The LRE Map. Harmonising Community Descriptions of Resources}, YEAR = {2012}, ABSTRACT = {Accurate and reliable documentation of Language Resources is an undisputable need: documentation is the gateway to discovery of Language Resources, a necessary step towards promoting the data economy. Language resources that are not documented virtually do not exist: for this reason every initiative able to collect and harmonise metadata about resources represents a valuable opportunity for the NLP community. In this paper we describe the LRE Map, reporting statistics on resources associated with LREC2012 papers and providing comparisons with LREC2010 data. The LRE Map, jointly launched by FLaReNet and ELRA in conjunction with the LREC 2010 conference, is an instrument for enhancing availability of information about resources, either new or already existing ones, reinforcing and facilitating the use of standards in the community. The LRE Map web interface provides the possibility of searching according to a fixed set of metadata and to view the details of extracted resources. The LRE Map is continuing to collect bottom-up input about resources from authors of other conferences through standard submission process. This will help broadening the notion of "language resources" and attract to the field neighboring disciplines that so far have been only marginally involved by the standard notion of language resources.}, KEYWORDS = {Language resources, metadata, documentation}, PAGES = {1084-1089}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 May 2012}, BOOKTITLE = {Proceedings of LREC'12-The Eight International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, E. N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Idijk, J. and Piperidis, S.}, } @INPROCEEDINGS{SORIA_2012_INPROCEEDINGS_SBCMMOPQC_219679, AUTHOR = {Soria, C. and Bel, N. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Calzolari, N.}, TITLE = {The FLaReNet Strategic Language Resource Agenda}, YEAR = {2012}, ABSTRACT = {The FLaReNet Strategic Agenda highlights the most pressing needs for the sector of Language Resources and Technologies and presents a set of recommendations for its development and progress in Europe, as issued from a three-year consultation of the FLaReNet European project. The FLaReNet recommendations are organised around nine dimensions: a) documentation b) interoperability c) availability, sharing and distribution d) coverage, quality and adequacy e) sustainability f) recognition g) development h) infrastructure and i) international cooperation. As such, they cover a broad range of topics and activities, spanning over production and use of language resources, licensing, maintenance and preservation issues, infrastructures for language resources, resource identification and sharing, evaluation and validation, interoperability and policy issues. The intended recipients belong to a large set of players and stakeholders in Language Resources and Technology, ranging from individuals to research and education institutions, to policy-makers, funding agencies, SMEs and large companies, service and media providers. The main goal of these recommendations is to serve as an instrument to support stakeholders in planning for and addressing the urgencies of the Language Resources and Technologies of the future.}, KEYWORDS = {strategic agenda, language resources planning, recommended priority actions}, PAGES = {1379-1386}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 may 2012}, BOOKTITLE = {Proceedings of the 8th international conference on Language Resources and Evaluation (LREC2012)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Dogan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{SORIA_2012_INPROCEEDINGS_S_226389, AUTHOR = {Soria, C.}, TITLE = {Voices of Italy: a project for the preservation of Italian language diversity}, YEAR = {2012}, ABSTRACT = {This submission will bring to discussion a project idea that addresses documentation and preservation of regional languages of Italy. Italy holds a unique position in Europe, with around 40 languages spoken and 31 of them in danger. Yet, little is being done for fostering their preservation, and little (if any) is the perception by the general public of the proportions of the phenomenon of language endangerment. The result of the project should be an audiovisual digital archive for storing samples of regional languages. User s will interact with the archive through a range of so called new technologies: not only a web portal for accessing, browsing and searching information, but also mobile devices Apps, a YouTube channel, social networking platforms, etc. for contributing language material. Since these new media are widespread among the Italian population, the youngest one in particular (36 million people in Italy have a connection to the Internet, and 9,3 millions from a mobile device, with an increase of 74% in a year) it seems feasible to crowdsource the linguistic material to populate the archive, thereby offering an engaging experience that would boost involvement of speakers' communities into the documentation and preservation endeavour. It would be interesting to discuss the feasibility of this project under the current technological and cultural constraints.}, PAGES = {17-17}, URL = {https://publications.cnr.it/doc/226389}, CONFERENCE_NAME = {Language Endangerment: Methodologies and New Challenges}, CONFERENCE_PLACE = {Cambridge (UK)}, CONFERENCE_DATE = {06/07/2012}, BOOKTITLE = {Language Endangerment: Methodologies and New Challenges}, } @INPROCEEDINGS{SORIA_2012_INPROCEEDINGS_S_226380, AUTHOR = {Soria, C.}, TITLE = {Voices of Italy: a project for the preservation of Italian language diversity}, YEAR = {2012}, URL = {https://publications.cnr.it/doc/226380}, CONFERENCE_NAME = {Language Endangerment: Methodologies and New Challenges}, CONFERENCE_PLACE = {Cambridge (UK)}, CONFERENCE_DATE = {06/07/2012}, } @INPROCEEDINGS{SORIA_2012_INPROCEEDINGS_SZ_317628, AUTHOR = {Soria, C. and Zoli, C.}, TITLE = {New markets for Language Technology for minority languages}, YEAR = {2012}, ABSTRACT = {Language Technology offers significant opportunities for minority languages and can be a major force in addressing and alleviating some of the difficulties they face. For minority languages in particular, speech and language technology are a powerful means to bring together speakers' communities, to have a major impact on language learning support, to promote inclusion of elderly or impaired people and to foster widespread use of a language through digital means. In this talk, we will be presenting first the main outcomes of the research carried out by the META-NET project, resulting in the publication of the White Paper Series "Europe's Languages in the Digital Age". The series, that reports on the state of each European language with respect to Language Technology, offers an updated synthesis of the most urgent risks and chances faced, in particular, by less-serviced languages. The presentation will then provide concrete examples of LT solutions for minority languages, discussing their potential impact on those languages, in particular with regard to their role for language maintenance and preservation in the eyes of the younger, digitally-oriented generation.}, KEYWORDS = {Language technology, NLP, minority languages}, URL = {https://publications.cnr.it/doc/317628}, CONFERENCE_NAME = {3e Symposium sur le Multilinguisme dans le Cyberespace}, CONFERENCE_PLACE = {Parigi}, CONFERENCE_DATE = {21/11/2012-23/11/2012}, } @INCOLLECTION{HAYASHI_2011_INCOLLECTION_HDCMSB_205409, AUTHOR = {Hayashi, Y. and Declerck, T. and Calzolari, N. and Monachini, M. and Soria, C. and Buitelaar, P.}, TITLE = {Language Service Ontology}, YEAR = {2011}, ABSTRACT = {The Language Grid is a distinctive language service infrastructure in the sense that it accommodates a wide variety of user needs, ranging from technical novices to experts; language resource consumers to language resource providers. As these language services are various in type and each of them can be idiosyncratic in many aspects, the service infrastructure has to address the issue of interoperability. A key to solve this issue is not only to build the services around standardized resources and interfaces, but also to establish a knowledge structure that copes effectively with a range of language services. Given this knowledge structure, referred to as a service ontology, each language service can be systematically classified and its usage specified by a corresponding API. This not only enables the utilization of existing language resources but facilitates the dissemination of newly created language resources as services.}, KEYWORDS = {Language grid, ontology}, PAGES = {85-100}, URL = {https://publications.cnr.it/doc/205409}, DOI = {10.1007/978-3-642-21178-2_6}, PUBLISHER = {Springer-Verlag (Berlin/Heidelberg, DEU)}, ISBN = {978-3-642-21177-5}, BOOKTITLE = {The Language Grid}, EDITOR = {Ishida, T.}, } @EDITORIAL{CALZOLARI_2011_EDITORIAL_CBSGMQ_206410, AUTHOR = {Calzolari, N. and Baroni, P. and Soria, C. and Goggi, S. and Monachini, M. and Quochi, V.}, TITLE = {Proceedings of the 3rd European Language Resources and Technologies Forum: Language Resources in the Sharing Age-the Strategic Agenda}, YEAR = {2011}, ABSTRACT = {Proceedings of the third FLaReNet forum on the European Language Resources and Technologies, held in Venezia, at the Auditorium Santa Margherita of the Università Ca' Foscari, on 26-27 May 2011.}, KEYWORDS = {Language Resources, Language Technologies}, PAGES = {86}, URL = {http://www.flarenet.eu/sites/default/files/FLaReNet_Forum_2011_Proceedings.pdf}, } @TECHREPORT{BARONI_2011_TECHREPORT_BSC_206273, AUTHOR = {Baroni, P. and Soria, C. and Calzolari, N.}, TITLE = {The FLaReNet Databook}, YEAR = {2011}, ABSTRACT = {A collection of all the factual material collected during the activities of the FLaReNet project and a set of innovative initiatives and instruments that will remain in place for the continuous collection of such "facts". Editors: Paola Baroni, Claudia Soria, Nicoletta Calzolari. Contributors: Victoria Arranz, Núria Bel, Gerhard Budin, Tommaso Caselli, Khalid Choukri, Riccardo Del Gratta, Elina Desypri, Gil Francopoulo, Francesca Frontini, Sara Goggi, Olivier Hamon, Erhard Hinrichs, Penny Labropoulou, Lothar Lemnizer, Steven Krauwer, Valerie Mapelli, Joseph Mariani, Monica Monachini, Jan Odijk, Jungyeul Park, Stelios Piperidis, Adam Przepiorkowski, Valeria Quochi, Eva Revilla, Laurent Romary, Francesco Rubino, Irene Russo, Helmut Schmidt, Hans Uszkoreit, Peter Wittenburg.}, KEYWORDS = {Language Resources, Language Technologies}, URL = {http://www.flarenet.eu/sites/default/files/FLaReNet_Databook.pdf}, } @TECHREPORT{BARONI_2011_TECHREPORT_BSC_206324, AUTHOR = {Baroni, P. and Soria, C. and Calzolari, N.}, TITLE = {The FLaReNet Databook: http: //www. flarenet. eu/?q=FLaReNet_Databook}, YEAR = {2011}, ABSTRACT = {A collection of all the factual material collected during the activities of the FLaReNet project and a set of innovative initiatives and instruments that will remain in place for the continuous collection of such "facts". Editors: Paola Baroni, Claudia Soria, Nicoletta Calzolari. Contributors: Victoria Arranz, Núria Bel, Gerhard Budin, Tommaso Caselli, Khalid Choukri, Riccardo Del Gratta, Elina Desypri, Gil Francopoulo, Francesca Frontini, Sara Goggi, Olivier Hamon, Erhard Hinrichs, Penny Labropoulou, Lothar Lemnizer, Steven Krauwer, Valerie Mapelli, Joseph Mariani, Monica Monachini, Jan Odijk, Jungyeul Park, Stelios Piperidis, Adam Przepiorkowski, Valeria Quochi, Eva Revilla, Laurent Romary, Francesco Rubino, Irene Russo, Helmut Schmidt, Hans Uszkoreit, Peter Wittenburg.}, KEYWORDS = {Language Resources}, URL = {http://www.flarenet.eu/?q=FLaReNet_Databook}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CBCMMOPQS_206397, AUTHOR = {Calzolari, N. and Bel, N. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C.}, TITLE = {Final FLaReNet deliverable: Language Resources for the Future-The Future of Language Resources}, YEAR = {2011}, ABSTRACT = {Language Technologies (LT), together with their backbone, Language Resources (LR), provide an essential support to the challenge of Multilingualism and ICT of the future. The main task of language technologies is to bridge language barriers and to help creating a new environment where information flows smoothly across frontiers and languages, no matter the country, and the language, of origin. To achieve this goal, all players involved need to act as a community able to join forces on a set of shared priorities. However, until now the field of Language Resources and Technology has long suffered from an excess of individuality and fragmentation, with a lack of coherence concerning the priorities for the field, the direction to move, not to mention a common timeframe. The context encountered by the FLaReNet project was thus represented by an active field needing a coherence that can only be given by sharing common priorities and endeavours. FLaReNet has contributed to the creation of this coherence by gathering a wide community of experts and making them participate in the definition of an exhaustive set of recommendations.}, KEYWORDS = {language resources and technologies, infrastructures}, PAGES = {97}, URL = {https://publications.cnr.it/doc/206397}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CQS_206420, AUTHOR = {Calzolari, N. and Quochi, V. and Soria, C.}, TITLE = {FLaReNet Strategic Language Resource Agenda}, YEAR = {2011}, ABSTRACT = {Despite the complexity of handling its languages, the European Union has established that cultural and language differences are a unique asset to be preserved. Europe needs to find means - such as technological ones - to overcome the language barriers to support citizens and industry in a multilingual globalised world. The large majority of industrial technological applications that handle natural language, i.e. Machine Translation, Crosslingual Information Retrieval, Multilingual Information Extraction, Automatic Document Indexing, Question Answering, Natural Language Interfaces, etc., include Language Resources as critical components. Although Language Technologies may consist of language independent engines, they depend on the availability of language-dependent knowledge under the form of Language Resources for their real-life implementation. At the same time, it is proved that a critical mass of Language Resources can make advancement in research and technology development possible and quicker, making Europe the leader of the market related to multilingualism. Companies such as Google or Microsoft play a dominant role in this framework, as they have access to a huge amount of data in many different languages, devote considerable resources to Language Technologies, have massive computing power and a direct research-to-application pipeline using a new business model based on so-called "free" services. The fact that a US company like Google is delivering some of the most comprehensive Language Technology solutions to support multilingualism should raise concern among EU officials.}, KEYWORDS = {Language resources, infrastructures}, PAGES = {23}, URL = {https://publications.cnr.it/doc/206420}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CSBG_206274, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Goggi, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Final Report}, YEAR = {2011}, ABSTRACT = {Final report of the FLaReNet project.}, KEYWORDS = {Language Resources}, URL = {http://www.flarenet.eu/sites/default/files/D1.15.pdf}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CSBCMOPB_206254, AUTHOR = {Calzolari, N. and Soria, C. and Bel, N. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 5}, YEAR = {2011}, ABSTRACT = {Fifth semestrial report on the progress of the FLaReNet project.}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/206254}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CSBCMOPBG_206276, AUTHOR = {Calzolari, N. and Soria, C. and Bel, N. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 6}, YEAR = {2011}, ABSTRACT = {Sixth semestrial report on the progress of the FLaReNet project.}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/206276}, } @TECHREPORT{MARIANI_2011_TECHREPORT_MBS_206326, AUTHOR = {Mariani, J. and Baroni, P. and Soria, C.}, TITLE = {Feedback from Contact Points on National Initiatives in the Area of Language Resources}, YEAR = {2011}, ABSTRACT = {A survey of existing initiatives on language resources all over the world promoted by the FLaReNet WG7 and carried out with contributions from the 102 FLaReNet National Contact Points.}, KEYWORDS = {Language Resources}, URL = {http://www.flarenet.eu/?q=Feedback_from_Contact_Points_on_National_Initiatives_in_the_Area_of_Language_Resources}, } @TECHREPORT{MONACHINI_2011_TECHREPORT_MFS_206457, AUTHOR = {Monachini, M. and Frontini, F. and Soria, C.}, TITLE = {KYOTO-LMF WordNet Representation Format}, YEAR = {2011}, ABSTRACT = {The format described in the following pages is the final revised proposal for representing wordnets inside the Kyoto project (henceforth "Kyoto-LMF wordnet format"). The reference model is Lexical Markup Framework (LMF), version 16, probably one of the most widely recognized standards for the representation of NLP lexicons. The goals of LMF are to provide a common model for the creation and use of such lexical resources, to manage the exchange of data between and among them, and to enable the merging of a large number of individual resources to form extensive global electronic respurces. LMF was specifically designed to accomodate as many models of lexical representations as possible. Purposefully, it is designed as a mea-model, i.e a high-level specification for lexical resources defining the structural constraints of a lexicon.}, KEYWORDS = {Wordnets, LMF, ISO, Representation formats, standards}, PAGES = {32}, URL = {https://publications.cnr.it/doc/206457}, } @TECHREPORT{SORIA_2011_TECHREPORT_SC_206389, AUTHOR = {Soria, C. and Calzolari, N.}, TITLE = {Project presentation-results}, YEAR = {2011}, ABSTRACT = {International cooperation and re-creation of a community are the most important drivers for a coherent evolution of the Language Resource (LR) area in the next years. FLaReNet has been a European forum to facilitate interaction among LR stakeholders and its structure took into account the fact that LRs present various dimensions and must be approached from many perspectives: technical, but also organisational, economic, legal, political. The Network addressed also multicultural and multilingual aspects, essential when facing access and use of digital content in today's Europe. FLaReNet consolidated existing knowledge, presenting it analytically and visibly, and contributed to structuring the area of LRs of the future by discussing new strategies to: convert existing and experimental technologies related to LRs into useful economic and societal benefits; integrate so far partial solutions into broader infrastructures; consolidate areas mature enough for recommendation of best practices; anticipate the needs of new types of LRs. The outcomes of FLaReNet has been of a directive nature, to help identify those priority areas of LRs of major interest for the public that need public funding to develop or improve. A blueprint of actions has constituted the input to policy development both at EU and national level for identifying new language policies that support linguistic diversity in Europe, in combination with strengthening the language product market, e.g. for new products and innovative services, especially for less technologically advanced languages.}, KEYWORDS = {Language resources, infrastructures, international cooperation}, PAGES = {44}, URL = {https://publications.cnr.it/doc/206389}, } @TECHREPORT{SORIA_2011_TECHREPORT_SM_206391, AUTHOR = {Soria, C. and Mariani, J.}, TITLE = {Report on Existing Projects and Initiatives}, YEAR = {2011}, ABSTRACT = {It is of utmost importance for a project such as T4ME to get a comprehensive and reliable overview of the projects and initiatives addressing similar topics. Mainly in order to establish relationships, build on previous achievement, and get a reliable and up-to-date view about the currentstate of the art. This report surveys ongoing and recent projects and initiatives at the national, EU and transnational level addressing Machine Translation, multilingual issues, language resources and technologies, or infrastructural issues at large. Focus is on Europe but relevant initia-tives outside Europe have been reviewed as well.}, KEYWORDS = {language resources and technologies, infrastructures}, PAGES = {134}, URL = {http://www.meta-net.eu/public_documents/t4me/META-NET-D11.3-Final.pdf}, } @ARTICLE{CALZOLARI_2010_ARTICLE_CSD_112952, AUTHOR = {Calzolari, N. and Soria, C. and Del Gratta, R.}, TITLE = {The LREC 2010 Map of Language Resources and Tools}, YEAR = {2010}, KEYWORDS = {Linguistic Tools. Language Resources}, URL = {https://publications.cnr.it/doc/112952}, } @INCOLLECTION{CALZOLARI_2010_INCOLLECTION_CS_30886, AUTHOR = {Calzolari, N. and Soria, C.}, TITLE = {Planning the Future of Language Resources: The Role of the FLaReNet Network}, YEAR = {2010}, ABSTRACT = {In this paper we analyse the role of Language Resources (LR) and Language Technologies (LT) in today Human Language Technology field and try to speculate on some of the priorities for the next years, from the particular perspective of the FLaReNet project, that has been asked to act as an observatory to assess current status of the field on Language Resources and Technology and to indicate priorities of action for the future.}, KEYWORDS = {Language Resources and Technology, strategic initiatives, priorities}, PAGES = {1-11}, URL = {https://rdcu.be/dfs8V}, VOLUME = {6008}, DOI = {10.1007/978-3-642-12116-6_1}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-3-642-12116-6}, BOOKTITLE = {Computational Linguistics and Intelligent Text Processing. CICLing 2010}, EDITOR = {Gelbukh, A.}, } @EDITORIAL{CALZOLARI_2010_EDITORIAL_CBMS_136417, AUTHOR = {Calzolari, N. and Baroni, P. and Monachini, M. and Soria, C.}, TITLE = {Proceedings of the 2nd European Language Resources and Technologies Forum: Language Resources of the future-the future of Language Resources}, YEAR = {2010}, ABSTRACT = {Proceedings of the second FLaReNet forum on the European Language Resources and Technologies, held in Barcelona, at the Institut d'Estudis Catalans, on 11-12 February 2010.}, KEYWORDS = {Language Resources, Language Technologies, Future}, PAGES = {120}, URL = {http://www.flarenet.eu/sites/default/files/FLaReNet_Forum_2010_Proceedings.pdf}, } @INPROCEEDINGS{BUNT_2010_INPROCEEDINGS_BACCFHPPRSTL_84794, AUTHOR = {Bunt, H. and Alexandersson, J. and Carletta, J. and Choe, J. and Fang, A. C. and Hasida, K. and Petukhova, V. and Popescu Belis, A. and Romary, L. and Soria, C. and Traum, D. and Lee, K.}, TITLE = {Towards an ISO Standard for Dialogue Act Annotation}, YEAR = {2010}, ABSTRACT = {This paper describes an ISO project developing an international standard for annotating dialogue with semantic information, in particular concerning the communicative functions of the utterances, the kind of content they address, and the dependency relations to what was said and done earlier in the dialogue. The project, registered as ISO 24617-2 Semantic annotation framework, Part 2: Dialogue acts", is currently at DIS stage.}, KEYWORDS = {Dialogue, Corpus (creation, annotation, etc.), Semantics}, PAGES = {2548-2555}, URL = {https://publications.cnr.it/doc/84794}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, BOOKTITLE = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{CALZOLARI_2010_INPROCEEDINGS_CS_84792, AUTHOR = {Calzolari, N. and Soria, C.}, TITLE = {Preparing the field for an Open Resource Infrastructure: the role of the FLaReNet Network of Excellence}, YEAR = {2010}, ABSTRACT = {In order to overcome the fragmentation that affects the field of Language Resources and Technologies, an Open and Distributed Resource Infrastructure is the necessary step for building on each other achievements, integrating resources and technologies and avoiding dispersed or conflicting efforts. Since this endeavour represents a true cultural turnpoint in the LRs field, it needs a careful preparation, both in terms of acceptance by the community and thoughtful investigation of the various technical, organisational and practical aspects implied. To achieve this, we need to act as a community able to join forces on a set of shared priorities and we need to act at a worldwide level. FLaReNet - Fostering Language Resources Network - is a Thematic Network funded under the EU eContent program that aims at developing the needed common vision and fostering a European and International strategy for consolidating the sector, thus enhancing competitiveness at EU level and worldwide. In this paper we present the activities undertaken by FLaReNet in order to prepare and support the establishment of such an Infrastructure, which is becoming now a reality within the new MetaNet initiative.}, KEYWORDS = {LR Infrastructures and Architectures, LR national/international projects, organizational/policy issues}, PAGES = {1295-1300}, URL = {https://publications.cnr.it/doc/84792}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {19-21/05/2010}, BOOKTITLE = {Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10)}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{CALZOLARI_2010_INPROCEEDINGS_CSDGQRCMP_84809, AUTHOR = {Calzolari, N. and Soria, C. and Del Gratta, R. and Goggi, S. and Quochi, V. and Russo, I. and Choukri, K. and Mariani, J. and Piperidis, S.}, TITLE = {The LREC Map of Language Resources and Technologies}, YEAR = {2010}, ABSTRACT = {In this paper we present the LREC Map of Language Resources (data and tools), an innovative feature introduced in conjunction with the LREC 2010 Conference. The purpose of the Map is to shed light on the vast amount of resources that represent the background of the research presented at LREC, in the attempt to fill in a gap in the community knowledge about the resources that are used or created worldwide. It also aims at a change of culture in the field, actively engaging each researcher in the documentation task about resources. The Map has been developed on the basis of the information provided by LREC authors during the submission of papers to the LREC 2010 conference and the LREC workshops, and contains information about almost 2000 resources. The paper illustrates the motivation behind this initiative, its main characteristics, its relevance and future impact in the field, the metadata used to describe the resources, and finally presents some of the most relevant findings.}, KEYWORDS = {LR national/international projects, organizational/policy issues}, PAGES = {949-956}, URL = {http://www.lrec-conf.org/proceedings/lrec2010/index.html}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {LREC 2010 Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-23 May 2010}, BOOKTITLE = {LREC'10-Seventh International Conference on Language Resources and Evaluation. Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{DELGRATTA_2010_INPROCEEDINGS_DDBCEMQSTC_84782, AUTHOR = {Del Gratta, R. and D'Onofrio, L. and Bartolini, R. and Caselli, T. and Enea, A. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A. and Calzolari, N.}, TITLE = {A Web-based Architecture for Interoperability of Lexical Resources}, YEAR = {2010}, ABSTRACT = {In this paper we present aWeb Service Architecture for managing high level interoperability of Language Resources (LRs) by means of a Service Oriented Architecture (SOA) and the use of ISO standards, such as ISO LMF. We propose a layered architecture which separates the management of legacy resources (data collection) from data aggregation (workflow) and data access (user requests). We provide a case study to demonstrate how the proposed architecture is capable of managing data exchange among different lexical services in a coherent way and show how the use of a lexical standard becomes of primary importance when a protocol of interoperability is defined.}, KEYWORDS = {Interoperability, Web sercives, Lexical resources}, PAGES = {53-62}, URL = {http://weblab.iit.cnr.it/kyoto/www2.let.vu.nl/twiki/pub/Kyoto/Publications/icgl2010_DOnofrioetal.pdf}, PUBLISHER = {City university of Hong Kong press (Hong Kong, CHN)}, ISBN = {978-962-442-323-5}, CONFERENCE_NAME = {2nd International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {18-20 January 2010}, BOOKTITLE = {2nd International Conference on Global Interoperability for Language Resources, ICGL 2010}, EDITOR = {Fang, A. C. and Ide, N. and Webster, J.}, } @INPROCEEDINGS{SAVAS_2010_INPROCEEDINGS_SHMSC_84807, AUTHOR = {Savas, B. and Hayashi, Y. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {An LMF-based Web Service for Accessing WordNet-type Semantic Lexicons}, YEAR = {2010}, ABSTRACT = {This paper describes a Web service for accessing WordNet-type semantic lexicons. The central idea behind the service design is: given a query, the primary functionality of lexicon access is to present a partial lexicon by extracting the relevant part of the target lexicon. Based on this idea, we implemented the system as a RESTful Web service whose input query is specified by the access URI and whose output is presented in a standardized XML data format. LMF, an ISO standard for modeling lexicons, plays the most prominent role: the access URI pattern basically reflects the lexicon structure as defined by LMF; the access results are rendered based on Wordnet-LMF, which is a version of LMF XML-serialization. The Web service currently provides accesses to Princeton WordNet, Japanese WordNet, as well as the EDR Electronic Dictionary as a trial. To accommodate the EDR dictionary within the same framework, we modeled it also as a WordNet-type semantic lexicon. This paper thus propose modifications to LMF.}, KEYWORDS = {Standards for LRs, Lexicon, Lexical database, Web Services}, URL = {https://publications.cnr.it/doc/84807}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-21/05/2010}, } @INPROCEEDINGS{TORAL_2010_INPROCEEDINGS_TBMS_84773, AUTHOR = {Toral, A. and Bracale, S. and Monachini, M. and Soria, C.}, TITLE = {Rejuvenating the ItalianWordNet: upgrading, standardising, extending}, YEAR = {2010}, ABSTRACT = {This paper reports on recent activities carried out within the KYOTO project aimed at enhancing the Italian WordNet Language Resource. On the one hand we study the formalisation of this lexicon according to the LMF ISO standard and explore its application into a real-world scenario by means of representing it in the WN-LMF dialect. On the other hand, we report on a semiautomatic procedure to upgrade the connections of the lexicon to WordNet, which obtains over 98% accuracy.}, KEYWORDS = {Lexical Resources, Standards for LRs}, URL = {http://www.globalwordnet-iitb2010.in/proceedings.php}, CONFERENCE_NAME = {5th Global Wordnet Conference}, CONFERENCE_PLACE = {Mumbai (India)}, CONFERENCE_DATE = {31/01-4/02-2010}, EDITOR = {Bhattacharyya, P. and Fellbaum, C. and Vossen, P.}, } @INPROCEEDINGS{TORAL_2010_INPROCEEDINGS_TMSCRBV_84801, AUTHOR = {Toral, A. and Monachini, M. and Soria, C. and Cuadros, M. and Rigau, G. and Bosma, W. and Vossen, P.}, TITLE = {Linking a domain thesaurus toWordNet and conversion toWordNet-LMF}, YEAR = {2010}, ABSTRACT = {We present a methodology to link domain thesauri to general-domain lexica. This is applied in the framework of the KYOTO project to link the Species2000 thesaurus to the synsets of the English WordNet. Moreover, we study the formalisation of this thesaurus according to the ISO LMF standard and its dialect WordNet-LMF. This conversion will allow Species2000 to communicate with the other resources available in the KYOTO architecture.}, KEYWORDS = {Lexical Resources, Thesaurus}, PAGES = {157-165}, URL = {https://publications.cnr.it/doc/84801}, ISBN = {978-962-442-323-5}, CONFERENCE_NAME = {ICGL 2010-The Second International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {18-20 Gennaio 2010}, BOOKTITLE = {ICGL2010-Proceedings of the Second International Conference on Global Interoperability for Language Resources-5th Joint ISO-ACL/SIGSEM Workshop on Interoperable Semantic Annotation}, EDITOR = {Fang, A. C. and Ide, N. and Webster, J.}, } @TECHREPORT{CALZOLARI_2010_TECHREPORT_CSB_157481, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Annual Report No. 2}, YEAR = {2010}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157481}, } @TECHREPORT{CALZOLARI_2010_TECHREPORT_CSBBBCMOP_157478, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 3}, YEAR = {2010}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157478}, } @TECHREPORT{CALZOLARI_2010_TECHREPORT_CSBQBBCMOP_157488, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Quochi, V. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 4}, YEAR = {2010}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157488}, } @ARTICLE{FRANCOPOULO_2009_ARTICLE_FBGCMPS_30882, AUTHOR = {Francopoulo, G. and Bel, N. and George, M. and Calzolari, N. and Monachini, M. and Pet, M. and Soria, C.}, TITLE = {Multilingual resources for NLP in the Lexical Markup Framework (LMF)}, YEAR = {2009}, ABSTRACT = {Optimizing the production, maintenance and extension of lexical resources is one the crucial aspects impacting Natural Language Processing (NLP). A second aspect involves optimizing the process leading to their integration in applications. With this respect, we believe that a consensual specification on monolingual, bilingual and multilingual lexicons can be a useful aid for the various NLP actors. Within ISO, one purpose of Lexical Markup Framework (LMF, ISO-24613) is to define a standard for lexicons that covers multilingual lexical data.}, KEYWORDS = {LMF, Standardization, ISO-TC37}, PAGES = {57-70}, URL = {https://publications.cnr.it/doc/30882}, VOLUME = {43}, DOI = {10.1007/s10579-008-9077-5}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{SORIA_2009_ARTICLE_SMBCHHMT_170611, AUTHOR = {Soria, C. and Monachini, M. and Bertagna, F. and Calzolari, N. and Huang, C. and Hsieh, S. and Marchetti, A. and Tesconi, M.}, TITLE = {Exploring Interoperability of Language Resources: the Case of Cross-lingual Semi-automatic Enrichment of Wordnets}, YEAR = {2009}, ABSTRACT = {In this paper we present an application fostering the integration and interoperability of computational lexicons, focusing on the particular case of mutual linking and cross-lingual enrichment of two wordnets, the ItalWordNet and Sinica BOW lexicons. This is intended as a case study investigating the needs and requirements of semi-automatic integration and interoperability of lexical resources, in the view of developing a prototype web application to support the GlobalWordNet Grid Initiative.}, KEYWORDS = {H. 3 INFORMATION STORAGE AND RETRIEVAL. Linguistic processing, Distributed language resources, Interoperable lexical resources, Language services}, PAGES = {87-96}, URL = {https://publications.cnr.it/doc/170611}, VOLUME = {43}, DOI = {10.1007/s10579-009-9082-3}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @EDITORIAL{CALZOLARI_2009_EDITORIAL_CBBBCGMMOPQST_183877, AUTHOR = {Calzolari, N. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Goggi, S. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Proceedings of the 1st European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, ABSTRACT = {Proceedings of the first FLaReNet Forum on the European Language Resources and Technologies, held in Vienna, at the Austrian Academy of Science, on 12-13 February 2009.}, KEYWORDS = {Language Resources, Language Technologies, Multilingual, Digital}, PAGES = {105}, URL = {http://www.flarenet.eu/sites/default/files/Vienna09_Proceedings.pdf}, } @INPROCEEDINGS{CALZOLARI_2009_INPROCEEDINGS_CS_84758, AUTHOR = {Calzolari, N. and Soria, C.}, TITLE = {The FLaReNet Thematic Network: a Global Forum for Cooperation}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/84758}, CONFERENCE_NAME = {ACL-IJCNLP 2009-7th Workshop on Asian Language Resources}, CONFERENCE_PLACE = {Suntec, Singapore}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{IDE_2009_INPROCEEDINGS_IPCS_84760, AUTHOR = {Ide, N. and Pustejovsky, J. and Calzolari, N. and Soria, C.}, TITLE = {The SILT and FLaReNet International Collaboration for Interoperability}, YEAR = {2009}, KEYWORDS = {Interoperability, Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/84760}, CONFERENCE_NAME = {ACL-IJCNLP 2009-3rd Linguistic Annotation Workshop}, CONFERENCE_PLACE = {Suntec, Singapore}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{SORIA_2009_INPROCEEDINGS_SMV_84764, AUTHOR = {Soria, C. and Monachini, M. and Vossen, P.}, TITLE = {Wordnet-LMF: Fleshing out a Standardized Format for Wordnet Interoperability}, YEAR = {2009}, ABSTRACT = {In this paper we present Wordnet-LMF, a dialect of ISO Lexical Markup Framework that instantiates LMF for representing wordnets. Wordnet-LMF was developed in the framework of the EU KYOTO project for the specific purpose of endowing a set of wordnets with a standardized interoperability format allowing the interchange of lexicosemantic information encoded in each of them. The aim of this format is twofold a) to give a preliminary assessment of LMF, by large-scale application to real lexical resources; b) to endow WordNet with a format representation that will allow easier integration among resources sharing the same structure (i.e other wordnets) and, more importantly, across resources with different theoretical and implementation approaches.}, KEYWORDS = {Standards, Lexical Markup Framework, Lexical resources, Wordnets, Intercultural collaboration}, PAGES = {139-146}, URL = {https://publications.cnr.it/doc/84764}, PUBLISHER = {ACM, Association for computing machinery (New York, USA)}, ISBN = {978-1-60558-198-9}, CONFERENCE_NAME = {International Workshop on Intercultural Collaboration}, CONFERENCE_PLACE = {Palo Alto, California, USA}, CONFERENCE_DATE = {20-21 Febbraio 2009}, BOOKTITLE = {Proceedings of the International Workshop on Intercultural Collaboration}, } @INPROCEEDINGS{TOKUNAGA_2009_INPROCEEDINGS_TKCMSSCTXHHK_84751, AUTHOR = {Tokunaga, T. and Kaplan, D. and Calzolari, N. and Monachini, M. and Soria, C. and Sornlertlamvanich, V. and Charoenporn, T. and Tesconi, M. and Xia, Y. and Huang, C. and Hsieh, S. and Kiyoaki, S.}, TITLE = {Query Expansion using LMF-Compliant Lexical Resources}, YEAR = {2009}, ABSTRACT = {This paper reports prototype multilingual query expansion system relying on LMF compliant lexical resources. The system is one of the deliverables of a three-year project aiming at establishing an international standard for language resources which is applicable to Asian languages. Our important contributions to ISO 24613, standard Lexical Markup Framework (LMF) include its robustness to deal with Asian languages, and its applicability to cross-lingual query tasks, as illustrated by the prototype introduced in this paper.}, KEYWORDS = {Lexical resources, Lexical Markup Framework (LMF), Standards}, PAGES = {145-152}, URL = {https://publications.cnr.it/doc/84751}, ISBN = {978-1-932432-56-5}, CONFERENCE_NAME = {ACL-IJCNLP 2009-7th Workshop on Asian Language Resources}, CONFERENCE_PLACE = {Singapore}, CONFERENCE_DATE = {6-7 Agosto 2009}, } @TECHREPORT{ALIPRANDI_2009_TECHREPORT_ANMRTSMVBAADRS_262195, AUTHOR = {Aliprandi, C. and Neri, F. and Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W. and Agirre, E. and Artola, X. and De Ilarraza, A. D. and Rigau, G. and Soroa, A.}, TITLE = {Database Models and Data Formats DELIVERABLE NR. 1/WP NR. 2}, YEAR = {2009}, ABSTRACT = {This deliverable describes data structure and XML formats that have been investigated and defined for data representation of linguistic and semantic resources underlying the KYOTO system.}, URL = {https://publications.cnr.it/doc/262195}, } @TECHREPORT{ALIPRANDI_2009_TECHREPORT_ANMRTSMVBAADRS_157459, AUTHOR = {Aliprandi, C. and Neri, F. and Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W. and Agirre, E. and Artola, X. and Diaz, D. I. A. and Rigau, G. and Soroa, A.}, TITLE = {Database Models and Data Formats}, YEAR = {2009}, KEYWORDS = {XML data format, TMF, SEMAF, OWL/KIF, FACTAF}, URL = {https://publications.cnr.it/doc/157459}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CBGMQST_157465, AUTHOR = {Calzolari, N. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Dissemination Plan}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157465}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CBGMQST_157468, AUTHOR = {Calzolari, N. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157468}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CMSBGQT_157467, AUTHOR = {Calzolari, N. and Monachini, M. and Soria, C. and Baroni, P. and Goggi, S. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 2}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157467}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBCGMQTBBCMOP_157462, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Caselli, T. and Goggi, S. and Monachini, M. and Quochi, V. and Toral, A. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Action Plan}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157462}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBGMQT_157466, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Evaluation Plan for the functioning of the Network}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157466}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBMQ_157463, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Monachini, M. and Quochi, V.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Annual Report No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157463}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBMQT_157469, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Monachini, M. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Project Presentation}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157469}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBBCCMMOPQT_157464, AUTHOR = {Calzolari, N. and Soria, C. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Blueprint of actions and infrastructures No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources, Infrastructures, Recommendations}, URL = {https://publications.cnr.it/doc/157464}, } @TECHREPORT{HEROLD_2009_TECHREPORT_HHSVRALMTS_157472, AUTHOR = {Herold, A. and Hicks, A. and Segers, R. and Vossen, P. and Rigau, G. and Agirre, E. and Laparra, E. and Monachini, M. and Toral, A. and Soria, C.}, TITLE = {WordNets mapped to Central Ontology}, YEAR = {2009}, KEYWORDS = {Ontology}, URL = {https://publications.cnr.it/doc/157472}, } @MISC{CALZOLARI_2009_MISC_CBBBCGMMOPQST_157471, AUTHOR = {Calzolari, N. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Goggi, S. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157471}, } @MISC{CALZOLARI_2009_MISC_CBBCMOPBGMQST_157457, AUTHOR = {Calzolari, N. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Extended Report of: The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157457}, } @MISC{CALZOLARI_2009_MISC_CBBCMOPBGMQST_157460, AUTHOR = {Calzolari, N. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Short Report of The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157460}, } @INPROCEEDINGS{DELGRATTA_2008_INPROCEEDINGS_DBCMSC_84729, AUTHOR = {Del Gratta, R. and Bartolini, R. and Caselli, T. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {UFRA: a UIMA-based Approach to Federated Language Resource Architecture}, YEAR = {2008}, ABSTRACT = {In this paper we address the issue of developing an interoperable infrastructure for language resources and technologies. In our approach, called UFRA, we extend the Federate Database Architecture System adding typical functionalities caming from UIMA. In this way, we capitalize the advantages of a federated architecture, such as autonomy, heterogeneity and distribution of components, monitored by a central authority responsible for checking both the integration of components and user rights on performing different tasks. We use the UIMA approach to manage and define one common front-end, enabling users and clients to query, retrieve and use language resources and technologies. The purpose of this paper is to show how UIMA leads from a Federated Database Architecture to a Federated Resource Architecture, adding to a registry of available components both static resources such as lexicons and corpora and dynamic ones such as tools and general purpose language technologies. At the end of the paper, we present a case-study that adopts this framework to integrate the SIMPLE lexicon and TIMEML annotation guidelines to tag natural language texts.}, KEYWORDS = {LR Infrastructures and Architectures, LR web services, Lexicon, Lexical database}, PAGES = {2634-2639}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/656_paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26 May-1 June 2008}, } @INPROCEEDINGS{HAYASHI_2008_INPROCEEDINGS_HNMSC_84725, AUTHOR = {Hayashi, Y. and Narawa, C. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Ontologizing Lexicon Access Functions based on an LMF-based Lexicon Taxonomy}, YEAR = {2008}, ABSTRACT = {This paper discusses ontologization of lexicon access functions in the context of a service-oriented language infrastructure, such as the Language Grid. In such a language infrastructure, an access function to a lexical resource, embodied as an atomic Web service, plays a crucially important role in composing a composite Web service tailored to a user's specific requirement. To facilitate the composition process involving service discovery, planning and invocation, the language infrastructure should be ontology-based; hence the ontologization of a range of lexicon functions is highly required. In a service-oriented environment, lexical resources however can be classified from a service-oriented perspective rather than from a lexicographically motivated standard. Hence to address the issue of interoperability, the taxonomy for lexical resources should be ground to principled and shared lexicon ontology. To do this, we have ontologized the standardized lexicon modeling framework LMF, and utilized it as a foundation to stipulate the service-oriented lexicon taxonomy and the corresponding ontology for lexicon access functions. This paper also examines a possible solution to fill the gap between the ontological descriptions and the actual Web service API by adopting a W3C recommendation SAWSDL, with which Web service descriptions can be linked with the domain ontology.}, KEYWORDS = {Lexicon, LR web services, Standards for LRs, Lexical database}, PAGES = {916-922}, URL = {https://publications.cnr.it/doc/84725}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{TAKENOBU_2008_INPROCEEDINGS_TKHHCMSSSCY_84701, AUTHOR = {Takenobu, T. and Kaplan, D. and Huang, C. and Hsieh, S. and Calzolari, N. and Monachini, M. and Soria, C. and Shirai, K. and Sornlertlamvanich, V. and Charoenporn, T. and Yingju, X.}, TITLE = {Adapting International Standard for Asian Language Technologies}, YEAR = {2008}, ABSTRACT = {Corpus-based approaches and statistical approaches have been the main stream of natural language processing research for the past two decades. Language resources play a key role in such approaches, but there is an insufficient amount of language resources in many Asian languages. In this situation, standardisation of language resources would be of great help in developing resources in new languages. This paper presents the latest development efforts of our project which aims at creating a common standard for Asian language resources that is compatible with an international standard. In particular, the paper focuses on i) lexical specification and data categories relevant for building multilingual lexical resources for Asian languages; ii) a core upper-layer ontology needed for ensuring multilingual interoperability and iii) the evaluation platform used to test the entire architectural framework.}, KEYWORDS = {LR national/international projects, Organizational/policy issues, LR Infrastructures and Architectures, Lexicon, Lexical database}, PAGES = {1663}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/422_paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{TORAL_2008_INPROCEEDINGS_TQDMSC_84714, AUTHOR = {Toral, R. A. and Quochi, V. and Del Gratta, R. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Lexically-based Ontologies and Ontologically Based Lexicons}, YEAR = {2008}, ABSTRACT = {This paper deals with the relations between ontologies and lexicons. We study the role of these two components and their evolution during the last years in the field of Computational Linguistics. Subsequently, we survey the current lines of research at ILC-CNR which tackle this topic. They involve (I) the reuse of already existing Lexical Resources to derive formal ontologies, (II) the conversion and combination of terminologies into rich and formal Lexical Resources and (III) the use of formal ontologies as the backbone of multilingual Lexical Resources.}, KEYWORDS = {Resource Infrastructure, UIMA, Clarin}, PAGES = {49-59}, URL = {https://publications.cnr.it/doc/84714}, CONFERENCE_NAME = {AI*IA 2008-10th Congress of Italian Association for Artificial Intelligence}, CONFERENCE_PLACE = {Cagliari}, CONFERENCE_DATE = {11-13 Settembre 2008}, } @TECHREPORT{ALIPRANDI_2008_TECHREPORT_ANMRTSMVBAAARS_157449, AUTHOR = {Aliprandi, C. and Neri, F. and Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W. and Agirre, E. and Artola, X. and Arantza, D. and Rigau, G. and Soroa, A.}, TITLE = {Database models and data formats}, YEAR = {2008}, KEYWORDS = {XML data format, TMF, SEMAF, OWL/KIF, FACTAF}, URL = {https://publications.cnr.it/doc/157449}, } @TECHREPORT{MARCHETTI_2008_TECHREPORT_MRTSMVB_262193, AUTHOR = {Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W.}, TITLE = {XML Schema for Wordnet and Ontology: DELIVERABLE NR. 1 /WP NR. 7}, YEAR = {2008}, ABSTRACT = {This deliverable describes the XML schemata adopted to represent all the data related to the management of the multi-language wordnets and the ontology; they constitute the set of linguistic and semantic resources of KYOTO system.}, URL = {http://www2.let.vu.nl/twiki/pub/Kyoto/WP07:DatabaseSystemsAndWiki/D7.1_XML_Schema_for_Wordnet_and_Ontology_v2.0.pdf}, } @TECHREPORT{MARCHETTI_2008_TECHREPORT_MRTSMVB_157455, AUTHOR = {Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W.}, TITLE = {XML Schema for Wordnet and Ontology}, YEAR = {2008}, ABSTRACT = {This deliverable describes the XML schema adopted to represent all the data related to the management of the multi-language wordnets and the ontology; they constitute the set of linguistic and semantic resources of KYOTO system.}, KEYWORDS = {XML Schema, Wordnet, Ontology, LMF, TMF}, URL = {https://publications.cnr.it/doc/157455}, } @TECHREPORT{MONACHINI_2008_TECHREPORT_MS_157451, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Report on use of LMF for representing WordNets}, YEAR = {2008}, KEYWORDS = {WordNets}, URL = {https://publications.cnr.it/doc/157451}, } @TECHREPORT{MONACHINI_2008_TECHREPORT_MS_157452, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Report on use of TMF and LMF for representing raw terms}, YEAR = {2008}, KEYWORDS = {Terminological Data Collection, Terminological Markup Framework, Terminological Markup Language}, URL = {https://publications.cnr.it/doc/157452}, } @TECHREPORT{MONACHINI_2008_TECHREPORT_MSC_157454, AUTHOR = {Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {The Lexical Grid: Lexical Resources in Language Infrastructures}, YEAR = {2008}, ABSTRACT = {Language Resources are recognized as a central and strategic for the development of any Human Language Technology system and application product. they play a critical role as horizontal technology and have been recognized in many occasions as a priority also by national and spra-national funding a number of initiatives (such as EAGLES, ISLE, ELRA) to establish some sort of coordination of LR activities, and a number of large LR creation projects, both in the written and in the speech areas.}, KEYWORDS = {Human Language Technology, Language Resources}, URL = {https://publications.cnr.it/doc/157454}, } @TECHREPORT{SORIA_2008_TECHREPORT_SM_157450, AUTHOR = {Soria, C. and Monachini, M.}, TITLE = {KYOTO-LMF WordNet Representation Format}, YEAR = {2008}, KEYWORDS = {Ontology linked to wordnets}, URL = {https://publications.cnr.it/doc/157450}, } @TECHREPORT{TOKUNAGA_2008_TECHREPORT_TCHKSYCCHKMPS_157453, AUTHOR = {Tokunaga, T. and Calzolari, N. and Huang, C. and Kiyoaki, S. and Sornlertlamvanich, V. and Yingju, X. and Charoenporn, T. and Chung, S. and Hsieh, S. and Kaplan, D. and Monachini, M. and Prévot, L. and Soria, C.}, TITLE = {Developing International Standards of Language Resources for Semantic Web Applications-Research Report of the International Joint Research Program NEDO}, YEAR = {2008}, ABSTRACT = {This report describes a three-year project aiming at an international standard for language resources that includes Asian languages. We summarise our contribution to an international standard of lexical markup framework (LMF) and introduce a prototype query expansion system using LMF-compliant lexical resources. Since ISO 24613 was in the FDIS stage and fairly stable, we built sample lexicons in Chinese, English, Italian, Japanese, and Thai based on ISO24613. At the same time, we implemented a query expansion system utilising rich linguistic resources including lexicons described in the ISO 24613 framework. We confirmed that a system was feasible which worked on the tested languages (including both Western and Eastern languages) when given lexicons are compliant with the framework.}, KEYWORDS = {International standards, Language resources, Semantic web applications}, URL = {https://publications.cnr.it/doc/157453}, } @ARTICLE{BERTAGNA_2007_ARTICLE_BMSCHHMT_30874, AUTHOR = {Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Huang, C. and Hsieh, S. and Marchetti, A. and Tesconi, M.}, TITLE = {Fostering Intercultural Collaboration: a Web Service Architecture for Cross-Fertilization of Distributed Wordnets}, YEAR = {2007}, ABSTRACT = {Enhancing the development of multilingual lexicons is of foremost importance for intercultural collaboration to take place, as multilingual lexicons are the cornerstone of several multilingual applications. However, the development and maintenance of large-scale, robust multilingual dictionaries is a tantalizing task. In this paper we present a tool, based on a web service architecture, enabling semi-automatic generation of bilingual lexicons through linking of distributed monolingual lexical resources. In addition to lexicon development, the architecture also allows enrichment of monolingual source lexicons through exploitation of the semantic information encoded in corresponding entries. In the paper we describe our case study applied to the Italian and Chinese wordnets, and we illustrate how the architecture can be extended to access distributed multilingual WordNets over the Internet, paving the way to exploitation in a cross-lingual framework of the wealth of information built over the last decade.}, PAGES = {146-158}, URL = {https://publications.cnr.it/doc/30874}, VOLUME = {4568}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @INPROCEEDINGS{BERTAGNA_2007_INPROCEEDINGS_BMSCRTM_172595, AUTHOR = {Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Ronzano, N. and Tesconi, M. and Marchetti, A.}, TITLE = {Cooperative Building of Semantic Resources}, YEAR = {2007}, ABSTRACT = {In this paper we present LexFlow, a framework for the automatic and cooperative enrichment, integration and exploitation of semantic resources. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a particular case of workflow instance, where lexical entries move across agents and become dynamically updated. We also give an important exploitation example of the semantic resources managed or built thanks to LexFlow, describing its integration with SemKey, a system for semantic collaborative tagging.}, KEYWORDS = {semantic resources, cooperative knowledge definition, semantic tagging}, URL = {https://publications.cnr.it/doc/172595}, ISBN = {3-540-74781-8}, CONFERENCE_NAME = {10th Congress of Italian Association for Artificial Intelligence-Cooperative construction of linguistic knowledge bases Workshop}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {10-13 September 2007}, } @INPROCEEDINGS{BERTAGNA_2007_INPROCEEDINGS_BMSMTHH_173656, AUTHOR = {Bertagna, F. and Monachini, M. and Soria, C. and Marchetti, A. and Tesconi, M. and Huang, C. and Hsich, S.}, TITLE = {Fostering Intercultural Collaboration: a Web Service Architecture for Cross-Fertilization of Distributed Wordnets}, YEAR = {2007}, ABSTRACT = {Enhancing the development of multilingual lexicons is of foremost importance for intercultural collaboration to take place, as multilingual lexicons are the cornerstone of several multilingual applications. However, the development and maintenance of large-scale, robust multilingual dictionaries is a tantalizing task. In this paper we present a tool, based on a web service architecture, enabling semi-automatic generation of bilingual lexicons through linking of distributed monolingual lexical resources. In addition to lexicon development, the architecture also allows enrichment of monolingual source lexicons through exploitation of the semantic information encoded in corresponding entries. In the paper we describe our case study applied to the Italian and Chinese wordnets, and we illustrate how the architecture can be extended to access distributed multilingual WordNets over the Internet, paving the way to exploitation in a cross-lingual framework of the wealth of information built over the last decade}, KEYWORDS = {distributed language resources, interoperable lexical resources, integration of WordNets}, PAGES = {185-198}, URL = {https://publications.cnr.it/doc/173656}, VOLUME = {4568}, DOI = {10.1007/978-3-540-74000-1_11}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-1-60558-198-9}, CONFERENCE_NAME = {IWIC 2007-The First International Workshop on Intercultural Collaboration}, CONFERENCE_PLACE = {Kyoto, Japan}, CONFERENCE_DATE = {25-26 Gennaio 2007}, } @INPROCEEDINGS{FRANCOPOULO_2007_INPROCEEDINGS_FBGCMPS_84673, AUTHOR = {Francopoulo, G. and Bel, N. and George, M. and Calzolari, N. and Monachini, M. and Pet, M. and Soria, C.}, TITLE = {Lexical Markup Framework: an ISO Standard for Semantic Information in NLP Lexicons}, YEAR = {2007}, ABSTRACT = {Lexical Markup Framework (LMF) is a model that provides a common standardized framework for Natural Language Processing (NLP) lexicons. The goals of LMF are to provide a common model for the creation and use of such lexical resources to manage the exchange of data between and among these resources, and to enable the merging of a large number of individual resources to form extensive global electronic resources.}, URL = {https://publications.cnr.it/doc/84673}, ISBN = {978-3-8233-6314-9}, CONFERENCE_NAME = {GLDV2007-Lexical-Semantic and Ontological Resources of the GLDV Working Group on Lexicography at the Biennal Spring Conference}, CONFERENCE_PLACE = {Tubingen}, CONFERENCE_DATE = {13-14/04/2007}, } @INPROCEEDINGS{SORIA_2007_INPROCEEDINGS_SBLMP_84682, AUTHOR = {Soria, C. and Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Automatic Extraction of Semantics in Law Documents}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84682}, CONFERENCE_NAME = {V Legislative XML Workshop}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {2007}, } @TECHREPORT{CALZOLARI_2007_TECHREPORT_CMQSGB_157444, AUTHOR = {Calzolari, N. and Monachini, M. and Quochi, V. and Soria, C. and Goggi, S. and Baroni, P.}, TITLE = {FLaReNet: Fostering Language Resources Network. Grant Agreement n° 617001, eContentPlus}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157444}, } @TECHREPORT{SORIA_2007_TECHREPORT_ST_157437, AUTHOR = {Soria, C. and Thorleifsdottir, A.}, TITLE = {eParticipation: the potential of new and emerging technologies}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157437}, } @MISC{TESCONI_2007_MISC_TMBMSC_157409, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LeXFlow: a Prototype Supporting Collaborative Lexicon Development and Cross-fertilization}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157409}, } @INPROCEEDINGS{CALZOLARI_2006_INPROCEEDINGS_CSSCPBEMSC_84625, AUTHOR = {Calzolari, F. and Sassolini, E. and Sassi, M. and Cucurullo, S. and Picchi, E. and Bertagna, F. and Enea, A. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Next Generation Language Resources using Grid}, YEAR = {2006}, ABSTRACT = {This paper presents a case study concerning the challenges and requirements posed by next generation language resources, realized as an overall model of open, distributed and collaborative language infrastructure. If a sort of "new paradigm" for language resource sharing is required, we think that the emerging and still evolving technology connected to Grid computing is a very interesting and suitable one for a concrete realization of this vision. Given the current limitations of Grid computing, it is very important to test the new environment on basic language analysis tools, in order to get the feeling of what are the potentialities and possible limitations connected to its use in NLP. For this reason, we have done some experiments on a module of the Linguistic Miner, i.e. the extraction of linguistic patterns from restricted domain corpora. The Grid environment has produced the expected results (reduction of the processing time, huge storage capacity, data redundancy) without any additional cost for the final user.}, KEYWORDS = {grid, acquisition, topic classification}, PAGES = {1858-1861}, URL = {https://publications.cnr.it/doc/84625}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FBGCMPS_84649, AUTHOR = {Francopoulo, G. and Bel, N. and George, M. and Calzolari, N. and Monachini, M. and Pet, M. and Soria, C.}, TITLE = {Lexical markup framework (LMF) for NLP multilingual resources}, YEAR = {2006}, ABSTRACT = {Optimizing the production, maintenance and extension of lexical resources is one of the crucial aspects impacting Natural Language Processing (NLP). A second aspect involves optimizing the process leading to their integration into applications. In this respect, we believe that the production of a consensual specification on multilingual lexicons can be a useful aid for the various NLP actors. Within ISO, one purpose of LMF (ISO-24613) is to define a standard for lexicons that covers multilingual data.}, PAGES = {1-8}, URL = {https://publications.cnr.it/doc/84649}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {COLING-ACL Workshop on Multilingual Lexical Resources and Interoperability}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {2006}, BOOKTITLE = {Proceedings of the Workshop on Multilingual Language Resources and Interoperability}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FGCMBPS_84632, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {LMF for multilingual, specialized lexicons}, YEAR = {2006}, PAGES = {27-32}, URL = {https://publications.cnr.it/doc/84632}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FGCMBPS_84633, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {Lexical Markup Framework (LMF)}, YEAR = {2006}, ABSTRACT = {Optimizing the production, maintenance and extension of lexical resources is one the crucial aspects impacting Natural Language Processing (NLP). A second aspect involves optimizing the process leading to their integration in applications. With this respect, we believe that the production of a consensual specification on lexicons can be a useful aid for the various NLP actors. Within ISO, the purpose of LMF is to define a standard for lexicons. LMF is a model that provides a common standardized framework for the construction of NLP lexicons. The goals of LMF are to provide a common model for the creation and use of lexical resources, to manage the exchange of data between and among these resources, and to enable the merging of large number of individual electronic resources to form extensive global electronic resources. In this paper, we describe the work in progress within the sub-group ISO-TC37/SC4/WG4. Various experts from a lot of countries have been consulted in order to take into account best practices in a lot of languages for (we hope) all kinds of NLP lexicons.}, PAGES = {233-236}, URL = {https://publications.cnr.it/doc/84633}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{GIOULI_2006_INPROCEEDINGS_GLGPMSCC_84634, AUTHOR = {Giouli, V. and Labropoulou, P. and Gavrilidou, M. and Piperidis, S. and Monachini, M. and Soria, C. and Calzolari, N. and Choukri, K.}, TITLE = {Language Resources Production Models: the Case of the INTERA Multilingual Corpus and Terminology}, YEAR = {2006}, ABSTRACT = {This paper reports on the multilingual Language Resources (MLRs), i.e. parallel corpora and terminological lexicons for less widely digitally available languages, that have been developed in the INTERA project and the methodology adopted for their production. Special emphasis is given to the reality factors that have influenced the MLRs development approach and their final constitution. Building on the experience gained in the project, a production model has been elaborated, suggesting ways and techniques that can be exploited in order to improve LRs production taking into account realistic issues.}, KEYWORDS = {multilingual parallel corpora, language resources production models, less widely digitally available languages}, PAGES = {609-614}, URL = {https://publications.cnr.it/doc/84634}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{MARCHETTI_2006_INPROCEEDINGS_MTRRBMSCHH_84652, AUTHOR = {Marchetti, A. and Tesconi, M. and Ronzano, F. and Rosella, M. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Huang, C. R. and Hsieh, S. K.}, TITLE = {Towards an Architecture for the GlobalWordNet Initiative}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84652}, CONFERENCE_NAME = {SWAP-06, the 3rd Italian Semantic Web Workshop}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{MARCHETTI_2006_INPROCEEDINGS_MTRRBMSCHH_263653, AUTHOR = {Marchetti, A. and Tesconi, M. and Ronzano, F. and Rosella, M. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Huang, C. and Hsieh, S.}, TITLE = {Toward an Architecture for the Global Wordnet Initiative}, YEAR = {2006}, ABSTRACT = {Enhancing the development of multilingual lexicons is of foremost importance for intercultural collaboration to take place, as multilingual lexicons are the cornerstone of several multilingual applications. However, the development and maintenance of large-scale, robust multilingual dictionaries is a tantalizing task. Moreover, Semantic Web's growing interest towards the availability of high-quality lexical resources and their multilingual interoperability, is focusing more and more attention on this topic. In this paper we present a tool, based on a web service architecture, enabling semi-automatic generation of bilingual lexicons through linking of distributed monolingual lexical resources. In addition to lexicon development, the architecture also allows enrichment of monolingual source lexicons through exploitation of the semantic information encoded in corresponding entries. In the paper we describe our case study applied to the Italian and Chinese wordnets, and we illustrate how the architecture can be extended to access distributed multilingual WordNets over the Internet, paving the way to exploitation in a cross-lingual framework of the wealth of information built over the last decade.}, KEYWORDS = {Lexical resource, wordnet, multilingual interoperability, semantic web}, PAGES = {7-35}, URL = {http://ceur-ws.org/Vol-201/35.pdf}, CONFERENCE_NAME = {SWAP 2006-Semantic Web Applications and Perspectives}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {18-20 December, 2006}, } @INPROCEEDINGS{SORIA_2006_INPROCEEDINGS_STBCMM_171465, AUTHOR = {Soria, C. and Tesconi, M. and Bertagna, F. and Calzolari, N. and Marchetti, A. and Monachini, M.}, TITLE = {Moving to dynamic computational lexicons with LeXFlow}, YEAR = {2006}, ABSTRACT = {LeXFlow is a framework for semi-automatic integration of lexicons, already expressed in standardized format. LeXFlow is intended as a tool for, on the one hand, paving the way to the development of dynamic multi-source lexicons; and on the other, for fostering the adoption of standards. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a particular case of workflow instance, where lexical entries move across agents and become dynamically updated. To this end, we have designed a lexical flow (LF) corresponding to the scenario where an entry of a lexicon A becomes enriched via basically two steps. First, by virtue of being mapped onto a corresponding entry belonging to a lexicon B, the entry(LA) inherits the semantic relations available in B. Second, by resorting to an automatic application that acquires information about semantic relations from corpora, the relations acquired are integrated into the entry and proposed to the human encoder. As a result of the lexical flow, in addition, for each starting lexical entry(LA) mapped onto a corresponding entry(LB) the flow produces a new entry representing the merging of the original two entries.}, KEYWORDS = {computational lexicons, collaborative authoring}, PAGES = {12}, URL = {https://publications.cnr.it/doc/171465}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{TESCONI_2006_INPROCEEDINGS_TMBMHCS_173931, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Huang, C. and Calzolari, N. and Soria, C.}, TITLE = {Towards agent-based cross-lingual interoperability of distributed lexical resources}, YEAR = {2006}, ABSTRACT = {In this paper we present an application fostering the integration and interoperability of computational lexicons, focusing on the particular case of mutual linking and cross-lingual enrichment of two wordnets, ItalWordNet and Sinica-BOW lexicons. This is intended as a case-study investingating the needs and requirements of semi-automatic integration and interoperability of lexical resources.}, KEYWORDS = {wordnet, multilingual computational lexicons, collaborative authoring}, PAGES = {17-24}, URL = {https://publications.cnr.it/doc/173931}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {ACL Workshop on Multilingual Lexical Resources and Interoperability}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {15-23 Luglio 2006}, BOOKTITLE = {Proceedings of the Workshop on Multilingual Language Resources and Interoperability}, } @INPROCEEDINGS{TESCONI_2006_INPROCEEDINGS_TMBMSC_83590, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LeXFlow: a system for cross-fertilization of computational lexicons}, YEAR = {2006}, ABSTRACT = {This demo presents LeXFlow, a workflow management system for crossfertilization of computational lexicons. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a set of workflow types, where lexical entries move across agents in the process of being dynamically updated. A prototype of LeXFlow has been implemented with extensive use of XML technologies (XSLT, XPath, XForms, SVG) and open-source tools (Cocoon, Tomcat, MySQL). LeXFlow is a web-based application that enables the cooperative and distributed management of computational lexicons.}, KEYWORDS = {computational lexicons, collaborative authoring}, URL = {https://publications.cnr.it/doc/83590}, DOI = {10.3115/1225403.1225406}, CONFERENCE_NAME = {COLING-ACL '06 Proceedings of the COLING/ACL on Interactive presentation sessions}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {16-23 luglio 2006}, } @INPROCEEDINGS{TESCONI_2006_INPROCEEDINGS_TMBMSC_84656, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LeXFlow: a framework for cross-fertilization of computational lexicons}, YEAR = {2006}, ABSTRACT = {This demo presents LeXFlow, a workflow management system for cross-fertilization of computational lexicons. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a set of workflow types, where lexical entries move across agents in the process of being dynamically updated. A prototype of LeXFlow has been implemented with extensive use of XML technologies (XSLT, XPath, XForms, SVG)and open-source tools (Cocoon, Tomcat, MySQL). LeXFlow is a web-based application that enables the cooperative and distributed management of computational lexicons.}, PAGES = {9-12}, URL = {https://publications.cnr.it/doc/84656}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {COLING/ACL 2006}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {15-23 Luglio 2006}, BOOKTITLE = {Proceedings of the COLING/ACL 2006 Interactive Presentation Sessions}, } @INPROCEEDINGS{TOKUNAGA_2006_INPROCEEDINGS_TSCCMSHPXYK_84657, AUTHOR = {Tokunaga, T. and Sornlertlamvanich, V. and Charoenporn, T. and Calzolari, N. and Monachini, M. and Soria, C. and Huang, C. and Prevot, L. and Xia, Y. and Yu, H. and Kiyoaki, S.}, TITLE = {Infrastructure for standardization of Asian language resources}, YEAR = {2006}, ABSTRACT = {As an area of great linguistic and cultural diversity, Asian language resources have received much less attention than their western counterparts. Creating a common standard for Asian language resources that is compatible with an international standard has at least three strong advantages: to increase the competitive edge of Asian countries, to bring Asian countries to closer to their western counterparts, and to bring more cohesion among Asian countries. To achieve this goal, we have launched a two year project to create a common standard for Asian language resources. The project is comprised of four research items, (1) building a description framework of lexical entries, (2) building sample lexicons, (3) building an upper-layer ontology and (4) evaluating the proposed framework through an application. This paper outlines the project in terms of its aim and approach.}, PAGES = {827-834}, URL = {https://publications.cnr.it/doc/84657}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {COLING/ACL 2006}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {15-26 luglio 2006}, BOOKTITLE = {Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions}, } @TECHREPORT{MONACHINI_2006_TECHREPORT_MSCFB_157402, AUTHOR = {Monachini, M. and Soria, C. and Calzolari, N. and Francopoulo, G. and Bel, N.}, TITLE = {WD of Lexica standard for CD ballot}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157402}, } @ARTICLE{MONACHINI_2005_ARTICLE_MS_64515, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Building Multilingual Terminological Lexicons for Less Widely Available Languages}, YEAR = {2005}, ABSTRACT = {Availability of Linguistic Resources for the development of Human Language Technology applications is nowadays recognized as a critical issue with both political and economic impact and implications on the sphere of cultural identity. This paper reports about the experience gained during the INTERA European project for the production of multilingual terminological lexicons for less widely available languages, i.e. those languages that suffer from poor representation over the net and from scarce computational resources, but yet are requested by the market. It discusses the procedure followed within the project, focuses on the problems faced which had an impact on the initial goals, presents the necessary modifications that resulted from these problems, evaluates the market needs as attested by various surveys, and describes the methodology that is proposed for the efficient production of Multilingual Terminological Lexicons.}, PAGES = {251-261}, URL = {https://publications.cnr.it/doc/64515}, VOLUME = {15}, PUBLISHER = {Polish Scientific Publishers PWN (Warszawa, Polonia)}, ISSN = {1230-2384}, JOURNAL = {Archives of Control Sciences}, } @INPROCEEDINGS{BIAGIOLI_2005_INPROCEEDINGS_BFPMS_172458, AUTHOR = {Biagioli, C. and Francesconi, E. and Passerini, A. and Montemagni, S. and Soria, C.}, TITLE = {Automatic semantics extraction in law documents}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/172458}, CONFERENCE_NAME = {Tenth International Conference on Artificial Intelligence and Law (ICAIL 2005)}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_CS_84595, AUTHOR = {Calzolari, N. and Soria, C.}, TITLE = {A new paradigm for an Open Distributed Language Resource Infrastructure: the case of Computational Lexicons}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84595}, CONFERENCE_NAME = {Knowledge Collection from Volunteer Contributors. Papers from the 2005 AAAI Spring Symposium}, CONFERENCE_PLACE = {Stanford}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{GAVRILIDOU_2005_INPROCEEDINGS_GLMPS_84580, AUTHOR = {Gavrilidou, M. and Labropoulou, P. and Monachini, M. and Piperidis, S. and Soria, C.}, TITLE = {Building Multilingual Language Resources}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84580}, ISBN = {954-91743-2-8}, CONFERENCE_NAME = {Language and Speech Infrastructure for Information Access in the Balkan Countries}, CONFERENCE_PLACE = {Borovets, Bulgaria}, CONFERENCE_DATE = {25/2/2005}, } @INPROCEEDINGS{MONACHINI_2005_INPROCEEDINGS_MS_84597, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Building Multilingual Terminological Lexicons for Less Widely Available Languages}, YEAR = {2005}, ABSTRACT = {Availability of Linguistic Resources for the development of Human Language Technology applications is nowadays recognized as a critical issue with both political and economic impact and implications on the sphere of cultural identity. This paper reports about the experience gained during the INTERA European project for the production of multilingual terminological lexicons for less widely available languages, i.e. those languages that suffer from poor representation over the net and from scarce computational resources, but yet are requested by the market. It discusses the procedure followed within the project, focuses on the problems faced which had an impact on the initial goals, presents the necessary modifications that resulted from these problems, evaluates the market needs as attested by various surveys, and describes the methodology that is proposed for the efficient production of Multilingual Terminological Lexicons.}, PAGES = {129-133}, URL = {https://publications.cnr.it/doc/84597}, PUBLISHER = {IMPRESJA Wydawnictwa Elektroniczne S. A (Poznan, POL)}, ISBN = {83-7111-341-2}, CONFERENCE_NAME = {2nd Language \& Technology Conference Human Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan}, CONFERENCE_DATE = {21-23 Aprile 2005}, BOOKTITLE = {2nd Language \& Technology Conference Human Language Technologies as a Challenge for Computer Science and Linguistics Poznan}, EDITOR = {Vetulani, Z.}, } @INPROCEEDINGS{SORIA_2005_INPROCEEDINGS_S_84584, AUTHOR = {Soria, C.}, TITLE = {Constraints on the use of connectives in discourse}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84584}, CONFERENCE_NAME = {First International Symposium on the Exploration and Modelling of Meaning}, CONFERENCE_PLACE = {Biarritz, France}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{SORIA_2005_INPROCEEDINGS_SM_84601, AUTHOR = {Soria, C. and Monachini, M.}, TITLE = {Methods, Models and Standardization Issues for the Creation of Linguistic Resources: the Case of Under-Represented Languages}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84601}, CONFERENCE_NAME = {TALN \& RECITAL 2005: 12ème conférence annuelle sur le Traitement Automatique des Langues Naturelles}, CONFERENCE_PLACE = {Dourdan-France}, CONFERENCE_DATE = {6-10 Giugno 2005}, } @TECHREPORT{FRANCOPOULO_2005_TECHREPORT_FGCMBPS_157379, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {Language Resource Management – Lexical Markup Framework}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157379}, } @TECHREPORT{LENCI_2005_TECHREPORT_LPS_157382, AUTHOR = {Lenci, A. and Pirrelli, V. and Soria, C.}, TITLE = {Modellazione del motore di dialogo e delle strutture dati di supporto}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157382}, } @TECHREPORT{MONACHINI_2005_TECHREPORT_MSCDW_157385, AUTHOR = {Monachini, M. and Soria, C. and Choukri, K. and Declerck, T. and Wittenburg, P.}, TITLE = {Final Evaluation Report}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157385}, } @TECHREPORT{MONACHINI_2005_TECHREPORT_MSUCDM_157386, AUTHOR = {Monachini, M. and Soria, C. and Ulivieri, M. and Calzolari, N. and Declerck, T. and Mammini, M.}, TITLE = {Evaluation of existing standards for NLP Lexica: Proposal for Candidate Data Categories}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157386}, } @MISC{FRANCOPOULO_2005_MISC_FGCMBPS_151541, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {Lexical Markup Framework}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151541}, } @MISC{GAVRILIDOU_2005_MISC_GLMPS_151543, AUTHOR = {Gavrilidou, M. and Labropoulou, P. and Monachini, M. and Piperidis, S. and Soria, C.}, TITLE = {INTERA Business model}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151543}, } @MISC{MONACHINI_2005_MISC_MS_151528, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Terminologia Multilingue (inglese-greco-serbo-sloveno-bulgaro)}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151528}, } @MISC{MONACHINI_2005_MISC_MSPSR_151547, AUTHOR = {Monachini, M. and Soria, C. and Picchi, E. and Sassolini, E. and Ruffolo, P.}, TITLE = {Procedure e tecniche di acquisizione semi-automatica di terminologie da testi paralleli}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151547}, } @MISC{MONACHINI_2005_MISC_MSUCDM_151542, AUTHOR = {Monachini, M. and Soria, C. and Ulivieri, M. and Calzolari, N. and Declerck, T. and Mammini, M.}, TITLE = {Data Category Registry}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151542}, } @MISC{SORIA_2005_MISC_SM_151538, AUTHOR = {Soria, C. and Monachini, M.}, TITLE = {MILE-OWL}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151538}, } @ARTICLE{FURFARI_2004_ARTICLE_FSPSB_173367, AUTHOR = {Furfari, F. and Soria, C. and Pirrelli, V. and Signore, O. and Bianchi Bandinelli, R.}, TITLE = {NICHE: Natural Interaction in Computerised Home Environments}, YEAR = {2004}, ABSTRACT = {Future technologies will provide users with increasing control over surrounding devices embedded in a common home environment. Somewhat paradoxically, this could result in an increase rather than a reduction in complexity if support for high-level interfacing is not introduced. This concern prompted the launching of a medium-term project aimed at promoting natural user-home interaction along the lines of the Ambient Intelligence vision.}, KEYWORDS = {HCI, Home Autoamtion, Smart Home}, PAGES = {55-56}, URL = {http://www.ercim.org/publication/Ercim_News/enw58/furfari.html}, VOLUME = {58}, PUBLISHER = {ERCIM (Le Chesnay)}, ISSN = {0926-4981}, JOURNAL = {ERCIM news}, } @INCOLLECTION{BARTOLINI_2004_INCOLLECTION_BLMPS_30867, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Automatic Classification and Analysis of Provisions in Italian Legal Texts: A Case Study}, YEAR = {2004}, ABSTRACT = {In this paper we address the problem of automatically enriching legal texts with semantic annotation, an essential pre–requisite to effective indexing and retrieval of legal documents. This is done through illustration of SALEM (Semantic Annotation for LEgal Management), a computational system developed for automated semantic annotation of (Italian) law texts. SALEM is an incremental system using Natural Language Processing techniques to perform two tasks: i) classify law paragraphs according to their regulatory content, and ii) extract relevant text fragments corresponding to specific semantic roles that are relevant for the different types of regulatory content. The paper sketches the overall architecture of SALEM and reports results of a preliminary case study on a sample of Italian law texts.}, KEYWORDS = {Annotazione semantica, Classificazione automatica}, PAGES = {593-604}, URL = {https://rdcu.be/dftjm}, VOLUME = {3292}, DOI = {10.1007/978-3-540-30470-8_72}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-3-540-23664-1}, BOOKTITLE = {On the Move to Meaningful Internet Systems 2004: OTM 2004 Workshops. OTM 2004}, EDITOR = {Meersman, R. and Tari, Z. and Corsaro, A.}, } @INPROCEEDINGS{BARTOLINI_2004_INPROCEEDINGS_BLMPS_84571, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Semantic Mark-up of Italian Legal Texts Through NLP-based Techniques}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84571}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BERTAGNA_2004_INPROCEEDINGS_BCMSU_84572, AUTHOR = {Bertagna, F. and Calzolari, N. and Monachini, M. and Soria, C. and Ulivieri, M.}, TITLE = {Report on the interlingual annotation experience at ILC-CNR}, YEAR = {2004}, URL = {http://www.google.com/url?sa=t\&rct=j\&q=\&esrc=s\&source=web\&cd=1\&ved=0CDQQFjAA\&url=http%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fdownload%3Fdoi%3D10.1.1.92.5078%26rep%3Drep1%26type%3Dpdf\&ei=ksa5UZOcJ8mjhgeziIGQBw\&usg=AFQjCNE1nXfd9hHiIstx1Lq_4VaiGDvkkA\&sig2=fIUJ3FE3BnBgDALu5T832w\&bvm=bv.47883778,d.ZG4}, CONFERENCE_NAME = {Seventh Interlingua Workshop on Determining Interlingua Utility for Machine Translation}, CONFERENCE_PLACE = {Washington DC}, CONFERENCE_DATE = {2 Ottobre 2004}, BOOKTITLE = {Biennal Conference of the AMTA-Determining Interlingua Utility for Machine Translation}, EDITOR = {Habash, N. and Dorr, B. and Hovy, E. and Reeder, F.}, } @TECHREPORT{GAVRILIDOU_2004_TECHREPORT_GGDLMSPRS_157392, AUTHOR = {Gavrilidou, M. and Giouli, V. and Desipri, E. and Labropoulou, P. and Monachini, M. and Soria, C. and Picchi, E. and Ruffolo, P. and Sassolini, E.}, TITLE = {Report on the multilingual resources production}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157392}, } @TECHREPORT{GAVRILIDOU_2004_TECHREPORT_GGDMS_157393, AUTHOR = {Gavrilidou, M. and Giouli, V. and Desipri, E. and Monachini, M. and Soria, C.}, TITLE = {Report on the model of LRs production. INTERA}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157393}, } @ARTICLE{SORIA_2003_ARTICLE_SP_64470, AUTHOR = {Soria, C. and Pirrelli, V.}, TITLE = {A multi-level annotation meta-scheme for dialogue acts}, YEAR = {2003}, ABSTRACT = {Abstract - This article describes a new principled framework for comparison, design and standardization of annotation schemes for dialogue acts. Previous attempts at comparing existing schemes in order to identify a common core of generally agreed-upon dialogue acts share the assumption that tags belonging to different schemes and describing the same general phenomena can always be related through hypo- or hyperonymy relationships. Consequently, general-purpose schemes have often been the result of a merger of different tag sets. In this article, we show the extent to which comparability of different annotation schemes is prevented by the very limited tag inter-translatability. We thus describe an alternative approach to the comparison of dialogue act taxonomies based on a compositional analysis of tags according to independent classificatory dimensions. The framework takes a recognition-based approach to dialogue tagging and defines four independent taxonomies of tags, one for each orthogonal dimension of linguistic and contextual analysis assumed to have a bearing on identification of dialogue acts. We also show how the same framework can be used to design a generalpurpose annotation scheme which combines the features of generality and expressivity by exploiting a modular structure. The advantages and limitations of this proposal over other previous attempts are discussed and concretely exemplified.}, KEYWORDS = {dialogue acts, annotation scheme, pragmatics}, PAGES = {925-952}, URL = {https://publications.cnr.it/doc/64470}, VOLUME = {18-19}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @INPROCEEDINGS{SORIA_2003_INPROCEEDINGS_SBC_84564, AUTHOR = {Soria, C. and Bertagna, F. and Calzolari, N.}, TITLE = {ItalWordNet in an annotation task: a chance for discussion}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84564}, CONFERENCE_NAME = {First International WordNet Conference, Global WordNet Association}, CONFERENCE_PLACE = {Mysore}, CONFERENCE_DATE = {2003}, } @TECHREPORT{GAVRILIDOU_2003_TECHREPORT_GDLCMS_157321, AUTHOR = {Gavrilidou, M. and Desipri, E. and Labropoulo, P. and Calzolari, N. and Monachini, M. and Soria, C.}, TITLE = {Technical Specifications for the Selection and Encoding of Multilingual Resources}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157321}, } @TECHREPORT{HEID_2003_TECHREPORT_HMPS_157341, AUTHOR = {Heid, U. and Maci, E. and Pirrelli, V. and Soria, C.}, TITLE = {NITE Interim Evaluation Report}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157341}, } @TECHREPORT{MONACHINI_2003_TECHREPORT_MS_157324, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Testing Scenario and Quality Assessment Strategy}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157324}, } @TECHREPORT{SORIA_2003_TECHREPORT_S_157350, AUTHOR = {Soria, C.}, TITLE = {Advice and recommendations about AsAnAngel's linguistic components}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157350}, } @ARTICLE{CALZOLARI_2002_ARTICLE_CSBB_64500, AUTHOR = {Calzolari, N. and Soria, C. and Bertagna, F. and Barsotti, F.}, TITLE = {Evaluating Lexical Resources Using Senseval}, YEAR = {2002}, ABSTRACT = {The aim of our paper is twofold: to introduce some general reflections on the task of lexical semantic annotation and the adequacy of existing lexical-semantic reference resources, while giving an overall description of the Italian lexical sample task for the SENSEVAL-2 experiment. We suggest how the SENSEVAL exercise (and comparison between the two editions of the experiment) can be employed to evaluate the lexical reference resources used for annotation. We conclude with a few general remarks on the gap between the lexicon, a partially decontextualised object, and the corpus, where context plays a significant role.}, KEYWORDS = {Risorse Lessicali, Disambiguazione, Semantica, Annotazione, Wordnet}, PAGES = {375-390}, URL = {https://publications.cnr.it/doc/64500}, VOLUME = {8}, DOI = {10.1017/S1351324902003017}, PUBLISHER = {Cambridge University Press (Cambridge, Regno Unito)}, ISSN = {1469-8110}, JOURNAL = {Natural language engineering (Online)}, }