@ARTICLE{DANKOVA_2025_ARTICLE_DFKM_579261, AUTHOR = {Dankova, K. and Frontini, F. and Khan, A. F. and Monachini, M.}, TITLE = {La représentation et la diffusion des données terminologiques plurilingues: la collection REALITER – OTPL (CLARIN-IT)}, YEAR = {2025}, ABSTRACT = {In today’s increasingly interconnected world, plurilingual and pluricultural competences are essential for participation in economic, scientific, and cultural exchanges. In this context, the creation and dissemination of plurilingual terminological resources play an important role in ensuring clear and effective communication in scientific and professional fields. The Pan-Latin Terminology Network REALITER recognizes the benefits of plurilingual communication in specialized domains and therefore carries out activities aimed at promoting linguistic diversity in the area of Romance languages. Since its creation (1993), several plurilingual lexicons covering a wide range of sectors, such as the environment, digital technologies, education, and fashion, have been produced. Thanks to the collaboration between CLARIN-IT (the Italian national node of CLARIN ERIC, the European infrastructure for language resources and technologies) and OTPL (Osservatorio di Terminologie e Politiche Linguistiche, Università Cattolica del Sacro Cuore, Milan), these terminological data are indexed in the REALITER – OTPL collection and published on the ILC4CLARIN-SKOSMOS Service platform. After presenting the REALITER projects, with particular attention to terminological variation and cultural aspects, the paper aims to describe the methodological choices made for the representation and dissemination of these plurilingual lexicons in compliance with FAIR principles. This will highlight the crucial role of infrastructures such as CLARIN ERIC in supporting the representation, sharing, and preservation of this rich linguistic and cultural heritage}, KEYWORDS = {plurilingualism, lexicon, terminological variation, FAIR principles, infrastructure, plurilinguisme, lexique, variation terminologique, principes FAIR, infrastructure}, PAGES = {209-236}, URL = {https://id.erudit.org/iderudit/1124416ar}, VOLUME = {XXXVIII (2)}, DOI = {10.7202/1124416ar}, JOURNAL = {TTR}, } @INPROCEEDINGS{KHAN_2025_INPROCEEDINGS_KMQPFS_570784, AUTHOR = {Khan, A. F. and Mallia, M. and Quochi, V. and Pedonese, G. and Frontini, F. and Squadrito, E.}, TITLE = {A Pilot Project for Promoting Linguistic Linked Open Data}, YEAR = {2025}, ABSTRACT = {This paper presents a pilot initiative, part of the H2IOSC infrastructure, that strives to support and promote the creation, publication, and sharing of Linguistic Linked Open Data (LLOD) in Italy and beyond. We describe the different parts of the pilot project: those related to vocabulary hosting, RDF data publication, training development, and use case promotion. Key contributions include the publication and hosting of the REALITER series of lexicons, the PLLOD triple store platform, and LLOD-focused training initiatives. We also describe a series of use-cases taking place within the pilot}, KEYWORDS = {Training,Linguistic Linked Open data, H2IOSC, CLARIN}, PAGES = {1-6}, URL = {https://iris.cnr.it/handle/20.500.14243/570784}, DOI = {10.1109/ieee-ch65308.2025.11279386}, CONFERENCE_NAME = {2025 IEEE International Conference on Cyber Humanities (IEEE-CH)}, BOOKTITLE = {2025 IEEE International Conference on Cyber Humanities (IEEE-CH)}, } @ARTICLE{KHAN_2024_ARTICLE_KF_475881, AUTHOR = {Khan, A. F. and Frontini, F.}, TITLE = {Toward a Representation of Semantic Change in Linked Data}, YEAR = {2024}, ABSTRACT = {In this article, we introduce a new framework, the Intensional–Ontological Model (IOM), for representing meaning, and especially for representing semantic change, in linguistic linked data resources. This framework, which makes use of previous work in the literature on lexical semantics and ontologies, is intended to help clarify what we mean when we model semantic change and to assist in elaborating different ontology patterns for doing so. In this work, we assume a simple architecture, one which is at the basis of the well-known OntoLex-Lemon vocabulary and which consists of one or more lexicons linked to an ontology. Our model, which is based on this architecture and informed by previous work on word senses and ontologies, is intended to provide a clear interpretation for the modelling of both onomasiological and semiasological changes, in both static and dynamic versions. This article describes how the IOM framework represents word meaning as the relationship between a word and an ontological concepts in the ’static’ case, demonstrating that the IOM is compatible with OntoLex-Lemon (while at the same time providing a greater level of detail as to the meaning of the ’sense’ and ’reference’ relationships). It then goes on to detail how the IOM can help us understand how to model semantic shifts in linked data lexical resources with a focus on conceptual change and the addition of temporal information to semantic shift data}, KEYWORDS = {linked data, semantic shift, ontologies, lexical semantics}, URL = {https://iris.cnr.it/handle/20.500.14243/475881}, VOLUME = {9 (6)}, DOI = {10.3390/languages9060215}, ISSN = {2226-471X}, JOURNAL = {LANGUAGES}, } @INPROCEEDINGS{GROMANN_2024_INPROCEEDINGS_GGPABBCCFGGGKKLLPORRSSSSSSSTVZZ_475921, AUTHOR = {Gromann, D. and Goncalo Oliveira, H. and Pitarch, L. and Apostol, E. S. and Bernad, J. and Bytyçi, E. and Cantone, C. and Carvalho, S. and Frontini, F. and Garabik, R. and Gracia, J. and Granata, L. and Khan, F. and Knez, T. and Labropoulou, P. and Liebeskind, C. and Pia Di Buono, M. and Ostroški Anić, A. and Rackevičienė, S. and Rodrigues, R. and Sérasset, G. and Selmistraitis, L. and Sidibé, M. and Silvano, P. and Spahiu, B. and Sogutlu, E. and Stanković, R. and Truică, C. O. and Valunaite Oleskeviciene, G. and Zitnik, S. and Zdravkova, K.}, TITLE = {MultiLexBATS: Multilingual Dataset of Lexical Semantic Relations}, YEAR = {2024}, ABSTRACT = {Understanding the relation between the meanings of words is an important part of comprehending natural language. Prior work has either focused on analysing lexical semantic relations in word embeddings or probing pretrained language models (PLMs), with some exceptions. Given the rarity of highly multilingual benchmarks, it is unclear to what extent PLMs capture relational knowledge and are able to transfer it across languages. To start addressing this question, we propose MultiLexBATS, a multilingual parallel dataset of lexical semantic relations adapted from BATS in 15 languages including low-resource languages, such as Bambara, Lithuanian, and Albanian. As experiment on cross-lingual transfer of relational knowledge, we test the PLMs(') ability to (1) capture analogies across languages, and (2) predict translation targets. We find considerable differences across relation types and languages with a clear preference for hypernymy and antonymy as well as romance languages}, KEYWORDS = {Lexical Semantic Relations, Multilingual Benchmark, BATS}, PAGES = {11783-11793}, URL = {https://aclanthology.org/2024.lrec-main.1029}, PUBLISHER = {ELRA and ICCL}, BOOKTITLE = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)}, } @INPROCEEDINGS{KHAN_2024_INPROCEEDINGS_KSACLMORF_475941, AUTHOR = {Khan, F. and Salgado, A. and Anuradha, I. and Costa, R. and Liyanage, C. and McCrae, J. P. and Ojha, A. K. and Rani, P. and Frontini, F.}, TITLE = {CHAMUÇA: Towards a Linked Data Language Resource of Portuguese Borrowings in Asian Languages}, YEAR = {2024}, ABSTRACT = {This paper presents the development of CHAMUÇA, a novel lexical resource designed to document the influence of the Portuguese language on various Asian languages, with an initial focus on the languages of South Asia. Through the utilization of linked open data and the OntoLex vocabulary, CHAMUÇA offers structured insights into the linguistic characteristics, and cultural ramifications of Portuguese borrowings across multiple languages. The article outlines CHAMUÇA’s potential contributions to the linguistic linked data community, emphasising its role in addressing the scarcity of resources for lesser-resourced languages and serving as a test case for organising etymological data in a queryable format. CHAMUÇA emerges as an initiative towards the comprehensive catalogization and analysis of Portuguese borrowings, offering valuable insights into language contact dynamics, historical evolution, and cultural exchange in Asia, one that is based on linked data technology}, KEYWORDS = {portuguese, ontolex, language contact, lexicon}, URL = {https://aclanthology.org/2024.ldl-1.6}, PUBLISHER = {ELRA and ICCL (Torino, Italia)}, CONFERENCE_PLACE = {Torino, Italia}, BOOKTITLE = {Proceedings of the 9th Workshop on Linked Data in Linguistics @ LREC-COLING 2024}, } @MISC{PEDONESE_2024_MISC_PKMFQS_561741, AUTHOR = {Pedonese, G. and Khan, A. F. and Mallia, M. and Frontini, F. and Quochi, V. and Squadrito, E.}, TITLE = {Linguistic Linked Open Data for Humanists}, YEAR = {2024}, ABSTRACT = {Having achieved popularity as a way of publishing and accessing data in different fields of the sciences and for sharing large encyclopaedic datasets such as DBpedia (derived from Wikipedia), linked data is becoming more and more popular in different areas of the humanities. In this course we will present a comprehensive introduction to the creation, publication, and use of linked open data for anyone who wants to work with linguistic datasets – such as lexicons and corpora – and especially for those who come from a linguistic or humanist background. We will look at the basics of linked data and the Semantic Web and introduce the various different standards technologies that make up the Semantic Web stack before focusing on the particular case of linked data language resources. During the course we will study the most important tools, vocabularies, and resources available in the Semantic Web and provide hands-on training for the creation and querying of linguistic linked data. We will look at how Semantic Web technologies can contribute to the creation of FAIR language resources as well as how to publish your resource on the linked open data cloud. We will also show how the Semantic Web query language SPARQL can be a powerful tool for data exploration}, KEYWORDS = {Linked Open Data, Linguistics}, URL = {https://iris.cnr.it/handle/20.500.14243/561741}, DOI = {10.5281/zenodo.13897931}, } @INPROCEEDINGS{CHIARCOS_2022_INPROCEEDINGS_CGIKKT_444084, AUTHOR = {Chiarcos, C. and Gkirtzou, K. and Ionov, M. and Kabashi, B. and Khan, F. and Truic, C.}, TITLE = {Modelling Collocations in OntoLex-FrAC}, YEAR = {2022}, ABSTRACT = {Following presentations of frequency and attestations, and embeddings and distributional similarity, this paper introduces the third cornerstone of the emerging OntoLex module for Frequency, Attestation and Corpus-based Information, OntoLex-FrAC. We provide an RDF vocabulary for collocations, established as a consensus over contributions from five different institutions and numerous data sets, with the goal of eliciting feedback from reviewers, workshop audience and the scientific community in preparation of the final consolidation of the OntoLex-FrAC module, whose publication as a W3C community report is foreseen for the end of this year. The novel collocation component of OntoLex-FrAC is described in application to a lexicographic resource and corpus-based collocation scores available from the web, and finally, we demonstrate the capability and genericity of the model by showing how to retrieve and aggregate collocation information by means of SPARQL, and its export to a tabular format, so that it can be easily processed in downstream applications}, URL = {https://iris.cnr.it/handle/20.500.14243/444084}, ISBN = {979-10-95546-92-4}, } @INPROCEEDINGS{COSTA_2022_INPROCEEDINGS_CSRKCTAKRS_444087, AUTHOR = {Costa, R. and Salgado, A. and Ramos, M. and Khan, F. and Carvalho, S. and Tasovac, T. and Almeida, B. and Khemakhem, M. and Romary, L. and Silva, R.}, TITLE = {Integrating Terminological and Ontological Principles into a Lexicographic Resource}, YEAR = {2022}, ABSTRACT = {In this paper we will present the research that is taking place at the NOVA CLUNL1 where an international team is working on a financed project MORDigital2. MORDigital's goal is to encode the selected editions of Diccinario de Lingua Portugueza by António de Morais Silva (MOR), first published in 1789}, URL = {https://iris.cnr.it/handle/20.500.14243/444087}, } @INPROCEEDINGS{KHAN_2022_INPROCEEDINGS_KGGDVMOSS_444085, AUTHOR = {Khan, F. and Gómez, F. J. M. and González, R. C. and Diakoff, H. and Vera, J. E. D. and McCrae, J. P. and O'Loughlin, C. and Short, W. M. and Stolk, S.}, TITLE = {Towards the Construction of a WordNet for Old English}, YEAR = {2022}, ABSTRACT = {In this paper we will discuss our preliminary work towards the construction of a WordNet for Old English, taking our inspiration from other similar WN construction projects for ancient languages such as Ancient Greek, Latin and Sanskrit. The Old English WordNet (OldEWN) will build upon this innovative work in a number of different ways which we articulate in the article, most importantly by treateating figurative meaning as a 'first-class citizen' in the structuring of the semantic system. From a more practical perspective we will describe our plan to utilize a pre-existing lexicographic resource and the naisc system to automatically compile a provisional version of the WordNet which will then be checked and enriched by Old English experts}, URL = {https://iris.cnr.it/handle/20.500.14243/444085}, } @INPROCEEDINGS{QUOCHI_2022_INPROCEEDINGS_QBKMMPRTZ_412363, AUTHOR = {Quochi, V. and Bellandi, A. and Khan, F. and Mallia, M. and Murano, F. and Piccini, S. and Rigobianco, L. and Tommasi, A. and Zavattari, C.}, TITLE = {From Inscriptions to Lexica and Back: A Platform for Editing and Linking the Languages of Ancient Italy}, YEAR = {2022}, ABSTRACT = {Available language technology is hardly applicable to scarcely attested ancient languages, yet their digital semantic representation, though challenging, is an asset for the purpose of sharing and preserving existing cultural knowledge. In the context of a project on the languages and cultures of ancient Italy, we took up this challenge. This paper thus describes the development of a user friendly web platform, EpiLexO, for the creation and editing of an integrated system of language resources for ancient fragmentary languages centered on the lexicon, in compliance with current digital humanities and Linked Open Data principles. EpiLexo allows for the editing of lexica with all relevant cross-references: for their linking to their testimonies, as well as to bibliographic information and other (external) resources and common vocabularies. The focus of the current implementation is on the languages of ancient Italy, in particular Oscan, Faliscan, Celtic and Venetic; however, the technological solutions are designed to be general enough to be potentially applicable to different contexts and scenarios}, KEYWORDS = {Digital Epigraphy, Restsprachen, Lexicon Editing and Linking, tools for DH}, PAGES = {59-67}, URL = {https://aclanthology.org/2022.lt4hala-1.0/}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {979-10-95546-78-8}, CONFERENCE_NAME = {Second Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2022)}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2022)}, EDITOR = {Sprugnoli, R. and Passarotti, M.}, } @TECHREPORT{TASOVAC_2022_TECHREPORT_TTBBBCUFHHMKKKKMMMMMQARSSVWWZ_446092, AUTHOR = {Tasovac, T. and Tiberius, C. and Bamberg, C. and Bellandi, A. and Burch, T. and Costa, R. and Uro, M. and Frontini, F. and Hennemann, J. and Heylen, K. and Milojakubíek and Khan, F. and Klee, A. and Kosem, I. and Ková, V. and Matuka, O. and McCrae, J. and Monachini, M. and Mörth, K. and Munda, T. and Quochi, V. and Andrarepar and Roche, C. and Salgado, A. and Sievers, H. and Váradi, T. and Weyand, S. and Woldrich, A. and Zhanial, S.}, TITLE = {D5. 3 Overview of Online Tutorials and Instruction Manuals}, YEAR = {2022}, ABSTRACT = {The ELEXIS Curriculum is an integrated set of training materials which contextualizes ELEXIS tools and services inside a broader, systematic pedagogic narrative. This means that the goal of the ELEXIS Curriculum is not simply to inform users about the functionalities of particular tools and services developed within the project, but to show how such tools and services are a) embedded in both lexicographic theory and practice; and b) representative of and contributing to the development of digital skills among lexicographers. The scope and rationale of the curriculum are described in more detail in the Deliverable D5. 2 Guidelines for Producing ELEXIS Tutorials and Instruction Manuals. The goal of this deliverable, as stated in the project DOW, is to provide "a clear, structured overview of tutorials and instruction manuals developed within the project. "}, KEYWORDS = {ELEXIS, lexicography, training materials}, PAGES = {31}, URL = {https://elex.is/wp-content/uploads/ELEXIS_D5_3_Overview-of-Online-Tutorials-and-Instruction-Manuals.pdf}, } @ARTICLE{KHAN_2021_ARTICLE_KEJCM_441096, AUTHOR = {Khan, F. and E Díazvera, J. and Javier Minaya Gómez, F. and Cruz González, R. and Monachini, M.}, TITLE = {Mapping Conceptual Variation through A Thesaurus of Old English and Evoke: Towards a Topical Thesaurus of Old English Emotional Expressions}, YEAR = {2021}, ABSTRACT = {The topic of figurative language in Old English (OE) has recently become the focus of substantial research. In this article, the authors will describe work on the semantic description of the lexicon of shame words in OE and in particular the taxonomical organisation of this lexicon on the basis of different kinds of semantic mappings (metonymic, metaphorical). Next, they will explore the use of the Evoke platform as a means of visualising and navigating this lexicon and show how it can be used to enrich A Thesaurus of Old English (TOE). The authors also describe ongoing work on the modelling and publication of this data as a linked data resource consisting of a lexicon and a taxonomy in SKOS of different kinds of metaphoric/metonymic sense shifts}, KEYWORDS = {semantic mappings, metaphor, metonymy, Old English, shame, emotions, onomasiology}, PAGES = {442-456}, URL = {https://doi.org/10.1163/18756719-12340238}, DOI = {10.1163/18756719-12340238}, ISSN = {1875-6719}, JOURNAL = {AMSTERDAMER BEITRÄGE ZUR ÄLTEREN GERMANISTIK}, } @INPROCEEDINGS{AHMADI_2020_INPROCEEDINGS_AMNKMPDWBPTOKLVSGTSMRALKLKOFCLASSUZSOKRSPG_404924, AUTHOR = {Ahmadi, S. and McCrae John, P. and Nimb, S. and Khan, F. and Monachini, M. and Pedersen Bolette, S. and Declerck, T. and Wissik, T. and Bellandi, A. and Pisani, I. and Troelsgårdthomas and Olsen, S. and Krek, S. and Lipp, V. and Váraditamás and Simon, L. and Gyorffy, A. and Tiberius, C. and Schoonheim, T. and Moshe Yifat, B. and Rudich, M. and Ahmad Raya, A. and Lonke, D. and Kovalenko, K. and Langemets, M. and Kallas, J. and Oksana, D. and Fransentheodorus and Cillessen, D. and Lindemann, D. and Alonsomikel and Salgado, A. and Sancho Jose, L. and Urenaruiz, R. and Zamorano Jordi, P. and Simov, K. and Osenova, P. and Kancheva, Z. and Radev, I. and Stankovic, R. and Perdihandrej and Gabrovsek, D.}, TITLE = {A multilingual evaluation dataset for monolingual word sense alignment}, YEAR = {2020}, ABSTRACT = {Aligning senses across resources and languages is a challenging task with beneficial applications in the field of natural language processing and electronic lexicography. In this paper, we describe our efforts in manually aligning monolingual dictionaries. The alignment is carried out at sense-level for various resources in 15 languages. Moreover, senses are annotated with possible semantic relationships such as broadness, narrowness, relatedness, and equivalence. In comparison to previous datasets for this task, this dataset covers a wide range of languages and resources and focuses on the more challenging task of linking general-purpose language. We believe that our data will pave the way for further advances in alignment and evaluation of word senses by creating new solutions, particularly those notoriously requiring data such as neural networks. Our resources are publicly available at https: //github. com/elexis-eu/MWSA}, KEYWORDS = {lexical semantic resources, sense alignment, lexicography, language resource}, URL = {https://iris.cnr.it/handle/20.500.14243/404924}, ISBN = {979-10-95546-34-4}, CONFERENCE_NAME = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)}, BOOKTITLE = {Proceedings of the 12th Language Resources and Evaluation Conference-LREC 2020}, } @INPROCEEDINGS{KHAN_2020_INPROCEEDINGS_K_407585, AUTHOR = {Khan, F.}, TITLE = {Representing Temporal Information in Lexical Linked Data Resources}, YEAR = {2020}, ABSTRACT = {The increasing recognition of the utility of Linked Data as a means of publishing lexical resources has helped to underline the need for RDF-based data models with the flexibility and expressivity to be able to represent the most salient kinds of information contained in such resources as structured data; this includes, notably, information relating to time and the temporal dimension. In this article we describe a perdurantist approach to modelling diachronic lexical information which builds upon work which we have previously presented and which is based on the ontolex-lemon vocabulary. We present two extended examples, one taken from the Oxford English Dictionary, the other from a work on etymology, to show how our approach can handle different kinds of temporal information often found in lexical resources}, URL = {https://iris.cnr.it/handle/20.500.14243/407585}, ISBN = {979-10-95546-36-8}, } @INPROCEEDINGS{KHAN_2020_INPROCEEDINGS_KRSBKT_404921, AUTHOR = {Khan, F. and Romary, L. and Salgado, A. and Bowers, J. and Khemakhem, M. and Tasovac, T.}, TITLE = {Modelling Etymology in LMF/TEI: The Grande Dicionário Houaiss da Língua Portuguesa Dictionary as a Use Case}, YEAR = {2020}, ABSTRACT = {In this article, we will introduce two of the new parts of the new multi-part version of the Lexical Markup Framework (LMF) ISO standard, namely Part 3 of the standard (ISO 24613-3), which deals with etymological and diachronic data, and Part 4 (ISO 24613-4), which consists of a TEI serialisation of all of the prior parts of the model. We will demonstrate the use of both standards by describing the LMF encoding of a small number of examples taken from a sample conversion of the reference Portuguese dictionary Grande Dicion´ario Houaiss da L´?ngua Portuguesa, part of a broader experiment comprising the analysis of different, heterogeneously encoded, Portuguese lexical resources. We present the examples in the Unified Modelling Language (UML) and also in a couple of cases in TEI}, URL = {https://iris.cnr.it/handle/20.500.14243/404921}, ISBN = {979-10-95546-36-8}, } @INCOLLECTION{BELLANDI_2019_INCOLLECTION_BMK_407485, AUTHOR = {Bellandi, A. and Monachini, M. and Khan, F.}, TITLE = {LexO: Where Lexicography Meets the Semantic Web}, YEAR = {2019}, ABSTRACT = {LexO is a collaborative web editor used for the creation and management of (multilingual) lexical and terminological resources as linked data resources. The editor makes use of Semantic Web technologies (which enrich web data with semantic information in order to make them machine readable) and the linked data publishing paradigm in order to ensure that lexical resources can be more easily shared and reused by the scientific community}, KEYWORDS = {Semantic Web technologies, multilingual lexical resources, collaborative web editor}, PAGES = {43-47}, URL = {https://iris.cnr.it/handle/20.500.14243/407485}, BOOKTITLE = {Tour de CLARIN volume two}, EDITOR = {Fiser, D. and Lenardic, J.}, } @INPROCEEDINGS{BELLANDI_2019_INPROCEEDINGS_BKM_393377, AUTHOR = {Bellandi, A. and Khan, F. and Monachini, M.}, TITLE = {Enhancing Lexicography by Means of the Linked Data Paradigm: LexO for CLARIN}, YEAR = {2019}, ABSTRACT = {This paper presents a collaborative web editor for easily building and managing lexical and terminological resources based on the OntoLex-Lemon model. The tool allows information to be easily manually curated by humans. Our primary objective is to enable lexicographers, scholarsand humanists, especially those who do not have technical skills and expertise in the Semantic Web and Linked Data technologies, to create lexical resourcesex novoeven if they are notfamiliar with the underlying technical details. This is fundamental for collecting reliable, fine-grained, and explicit information, thus allowing the adoption of new technological advances inthe Semantic Web by the Digital Humanities}, URL = {https://iris.cnr.it/handle/20.500.14243/393377}, } @INPROCEEDINGS{MCCRAE_2019_INPROCEEDINGS_MKKDTMA_389214, AUTHOR = {McCrae, J. P. and Khan, F. and Kernerman, I. and Declerck, T. and Tiberius, C. and Monachini, M. and Ahmadi, S.}, TITLE = {The ELEXIS Interface for Interoperable Lexical Resources}, YEAR = {2019}, ABSTRACT = {ELEXIS is a project that aims to create a European network of lexical resources, and one of the key challenges for this is the development of an interoperable interface for different lexical resources so that further tools may improve the data. This paper describes this interface and in particular describes the five methods of entrance into the infrastructure, through retrodigitization, by conversion to TEI-Lex0, by the TEILex0 format, by the OntoLex format or through the REST interface described in this paper. The interface has the role of allowing dictionaries to be ingested into the ELEXIS system, so that they can be linked to each other, used by NLP tools and made available through tools to Sketch Engine and Lexonomy. Most importantly, these dictionaries will all be linked to each other through the Dictionary Matrix, a collection of linked dictionaries that will be created by the project. There are five principal ways that a dictionary maybe entered into the Matrix Dictionary: either through retrodigitization; by conversion to TEI Lex-0 by means of the forthcoming ELEXIS conversion tool; by directly providing TEI Lex-0 data; by providing data in a compatible format (including OntoLex); or by implementing the REST interface described in this paper}, URL = {https://iris.cnr.it/handle/20.500.14243/389214}, } @INPROCEEDINGS{BELLANDI_2019_INPROCEEDINGS_BK_389215, AUTHOR = {Bellandi, A. and Khan, F.}, TITLE = {Lexicography and the Semantic Web: A Demo with LexO}, YEAR = {2019}, ABSTRACT = {The purpose of this contribution is to present LexO8, the first version of a collaborative web editor for easily building and managing of lexical and terminological resources in the context of the Semantic Web. The adoption of Semantic Web technologies and the Linked Data paradigm has been driven by the need to ensure the construction of resources that are interoperable and can be shared and reused by the scientific community. LexO's primary objective is to enable terminologists and lexicographers to create a resource ex novo this is by means of the adoption of a lexical model that allows the association of detailed and structured lexical information (Bellandi et al., 2018); (Khan et al., 2016) to ontological concepts. In this respect, the lemon lexical model (McCrae et al., 2012), later renamed OntoLex-lemon (McCrae et al., 2017), is currently regarded as the de facto standard for enriching Semantic Web ontologies with lexical information. LexO can provide a support for creating, managing, publishing lexical and terminological resources as Linked Open Data, that is typically a complex task, especially for those who have not yet mastered Semantic Web-based standards and technologies, such as RDF and OWL. However, the long-term ambition of LexO would be to make a deeper contribution to e-lexicography}, URL = {https://iris.cnr.it/handle/20.500.14243/389215}, } @INPROCEEDINGS{BOHBOT_2019_INPROCEEDINGS_BFKKR_389213, AUTHOR = {Bohbot, H. and Frontini, F. and Khan, F. and Khemakhem, M. and Romary, L.}, TITLE = {Nénufar: Modelling a Diachronic Collection of Dictionary Editions as a Computational Lexical Resource}, YEAR = {2019}, ABSTRACT = {The Petit Larousse Illustré (PLI) is a monolingual French dictionary which has been published every year since the 1906 edition, and which is therefore a fundamental record of the evolution of the French language. As a consequence of the pre-1948 editions of the PLI entering the public domain in 2018 the Nénufar (Nouvelle édition numérique de fac-similés de référence) project was launched at the Praxiling laboratory in Montpellier with the aim of digitizing and making these editions available electronically. The project is still ongoing; various selected editions from each decade are going to be fully digitized (so far the 1906, 1924 and 1925 editions have been completed), and changes backtracked and dated to the specific year. Nénufar's primary aim is to make the editions available and searchable via an advanced search interface which will not only enable the selective querying of text by lemma and type of content (definitions, examples,.), but crucially also detect and study changes by comparing different editions. In order to do so, a specific web interface has been put in place. Alongside the digitized text, the Nénufar website contains high quality scans for each page. In compliance with current open data best practices (Wilkinson et al., 2016), the project also aims to make the source data available separately from the querying interface both for research and for A similar project which presents data and scans from subsequent editions of the same legacy dictionary has been carried out by the team behind the Swedish Academy's Wordlist (see Holmer, Malmgren, and Martens (2016) and http: //spraakdata. gu. se/saolhist/). eLex 2019: Book of Abstracts 36 long-term preservation. The primary encoding format is TEI-XML; however in our case the TEI encoding is closely inspired by the latest version of the TEI-Lex0 (Ba?ski et al., 2017, Romary \& Tasovac, 2018) guidelines for encoding lexicographic resources, which are based upon TEI. The choice of a TEI based approach allows the Nénufar project to align itself to other pre-existing initiatives and tools. By aligning ourselves to TEI-Lex0 we will be able to make use of digitisation tools such as Grobid (Khemakhem et al., 2017) which have TEI-Lex0 as their native format and which have already been tested and used within the Nénufar project to speed up the digitization of new editions. In addition we will be able to make use of ongoing initiatives to convert TEI-Lex0 datasets to RDF using the W3C recommendation for publishing lexicons as Linked Data, namely OntoLex-Lemon (McCrae et al., 2017; Bosque-Gil et al., 2016) which will allow for the publication of the Nénufar dataset as an LOD graph. The LOD version of the Nénufar dataset, now currently being developed, will be queryable from the available SPARQL endpoint and contain all available editions as one single graph, allowing for expert users to perform complex queries that could detect systematic changes in the dataset. The LOD version is particularly adapted to be linked to other datasets; more recent editions, once added, could also be of interest for NLP applications}, URL = {https://iris.cnr.it/handle/20.500.14243/389213}, } @TECHREPORT{AHMADI_2019_TECHREPORT_AADKKKJMMRTTZ_351830, AUTHOR = {Ahmadi, S. and Arcan, M. and Declerck, T. and Kernerman, I. and Khan, F. and Krek, S. and Johnmc Crae and McHura, M. and Monachini, M. and Roche, C. and Tiberius, C. and Troelsgård, T. and Zaytseva, K.}, TITLE = {D2. 1. Interface for Interoperable Lexical Resources}, YEAR = {2019}, ABSTRACT = {ELEXIS Deliverable D2. 1. Interface for Interoperable Lexical Resources}, URL = {https://iris.cnr.it/handle/20.500.14243/351830}, } @TECHREPORT{TASOVAC_2019_TECHREPORT_TMK_352225, AUTHOR = {Tasovac, T. and Monachini, M. and Khan, F.}, TITLE = {5. 1 ELEXIS SKILLSET REPORT}, YEAR = {2019}, ABSTRACT = {ELEXIS PROJECT DELIVERABLE ELEXIS SKILLSET REPORT}, URL = {https://iris.cnr.it/handle/20.500.14243/352225}, } @INCOLLECTION{KHAN_2018_INCOLLECTION_KEM_345622, AUTHOR = {Khan, F. and E Díazvera, J. and Monachini, M.}, TITLE = {Representing Meaning Change in Computational Lexical Resources: The Case of Shame and Embarrassment Terms in Old English}, YEAR = {2018}, ABSTRACT = {The inclusion of diachronic information detailing changes in the meanings of words over time can be extremely helpful in modelling broad coverage digital lexical resources but it is often crucial for lexical resources serving such fields as classical philology or historical linguistics where the diachronic aspects of a language are explicitly taken into consideration. In this article we present a linked data based model for representing meaning change in lexico-semantic resources called lemonDia and describe its use in modelling a lexicon of Old English terms for shame and embarrassment}, KEYWORDS = {linked data, diachrony, emotion}, PAGES = {59-79}, URL = {https://iris.cnr.it/handle/20.500.14243/345622}, ISBN = {978-1-5275-0803-3}, BOOKTITLE = {Formal Representation and the Digital Humanities}, } @INPROCEEDINGS{KHAN_2018_INPROCEEDINGS_KBFM_376218, AUTHOR = {Khan, F. and Bellandi, A. and Frontini, F. and Monachini, M.}, TITLE = {One Language to rule them all: modelling Morphological Patterns in a Large Scale Italian Lexicon with SWRL}, YEAR = {2018}, ABSTRACT = {We present an application of Semantic Web Technologies to computational lexicography. More precisely we describe the publication of the morphological layer of the Italian Parole Simple Clips lexicon (PSC-M) as linked open data. The novelty of our work is in the use of the Semantic Web Rule Language (SWRL) to encode morphological patterns, thereby allowing the automatic derivation of the inflectional variants of the entries in the lexicon. By doing so we make these patterns available in a form that is human readable and that therefore gives a comprehensive morphological description of a large number of Italian word}, KEYWORDS = {Morphology, Linked Open Data, Italian Lexicon, SWRL, SQVRL}, PAGES = {4385-4389}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/844.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Chair, N. C. C. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{BELLANDI_2018_INPROCEEDINGS_BFKM_345621, AUTHOR = {Bellandi, A. and Frontini, F. and Khan, F. and Monachini, M.}, TITLE = {SWRL your lexicon: adding inflectional rules to a LOD dataset}, YEAR = {2018}, ABSTRACT = {Over the past few years the publication of lexical resources as Linked Data (LD) has taken on ever greater significance within the field of computational lexicography. So far the efforts of the community have been largely directed towards the definition of standards1 and the conversion of single resources (see McCrae et al 2012, Khan et al 2016), but with less of a focus on the technical possibilities afforded by this new mode of publishing lexical data. However, the fact is that the Semantic Web gives us access to a whole ecosystem of standards, languages, and technologies. In this paper we will look at one of these languages, the Semantic Web Rule Language2 (SWRL) and explore whether it might potentially play a useful role in the publication of lexical resources}, URL = {https://iris.cnr.it/handle/20.500.14243/345621}, } @MISC{BELLANDI_2018_MISC_BFKM_350511, AUTHOR = {Bellandi, A. and Frontini, F. and Khan, F. and Monachini, M.}, TITLE = {Parole-Simple-Clip/Morphological Layer in RDF}, YEAR = {2018}, ABSTRACT = {A version in RDF of the morphological layer of the wide coverage multi-level Italian lexicon Parole-Simple-Clips, containing the parts of speech Noun, Verb, Adjective. The dataset is encoded using the ontolex-lemon vocabulary. Information pertaining to inflectional morphological contained in the original resource is converted into Semantic Web Rule Language (SWRL) rules}, URL = {https://iris.cnr.it/handle/20.500.14243/350511}, } @INCOLLECTION{KHAN_2017_INCOLLECTION_KBFM_339934, AUTHOR = {Khan, F. and Bellandi, A. and Frontini, F. and Monachini, M.}, TITLE = {Using SWRL rules to model noun behaviour in Italian}, YEAR = {2017}, ABSTRACT = {In this article we describe our ongoing attempts to use the Semantic Web Rule Language (SWRL) to model the morphological layer of a wide-coverage Italian lexical resource, Parole-Simple-Clips (PSC); in this case that subset of PSC dealing with Italian noun morphology. After giving a brief introduction to SWRL and to Italian noun morphology we go onto describe the actual transformation itself. Finally we describe an experiment on our dataset using SWRL rules and queries written in the Semantic Query-Enhanced Rule Web Language (SQWRL)}, KEYWORDS = {Linked Open Data, Logic Programming, Italian Morphology}, PAGES = {134-142}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85021186095\&origin=inward}, DOI = {10.1007/978-3-319-59888-8_11}, PUBLISHER = {Springer (Berlin, DEU)}, CONFERENCE_PLACE = {Berlin}, BOOKTITLE = {LANGUAGE, DATA, AND KNOWLEDGE, LDK}, EDITOR = {Gracia, J. and Bond, F. and McCrae, J. and Buitelaar, P. and Chiarcos, C. and Hellmann, S.}, } @INPROCEEDINGS{KHAN_2017_INPROCEEDINGS_KBBM_339933, AUTHOR = {Khan, F. and Bellandi, A. and Boschetti, F. and Monachini, M.}, TITLE = {The Challenges of Converting Legacy Lexical Resources to Linked Open Data using Ontolex-Lemon: The Case of the Intermediate Liddell-Scott Lexicon}, YEAR = {2017}, ABSTRACT = {In this article we discuss the conversion of a legacy lexical resource, an abridged version of the ancient Greek-English lexicon, the Liddell-Scott-Jones lexicon, into RDF using the lemon model discussing some of the challenges we confronted during this conversion. We will also introduce the polyLemon vocabulary which we introduced to describe the structuring of the senses in a lexical entry in a dictionary}, URL = {https://iris.cnr.it/handle/20.500.14243/339933}, } @INPROCEEDINGS{KHAN_2017_INPROCEEDINGS_KBF_342012, AUTHOR = {Khan, F. and Bowers, J. and Frontini, F.}, TITLE = {Situating Word Senses in their Historical Context with Linked Data}, YEAR = {2017}, ABSTRACT = {In this article we present a Semantic Web-based model for creating lexical resources in which the diachronic and, more broadly, contextual dimensions of word meaning can be explicitly represented as part of a graph-based data structure. We start by discussing why Linked Data is the right publishing approach for such diachronic datasets. We then describe our model, lemonEty, which utilizes the ontology engineering technique of perdurants in order to model lexical entries as dynamic processes. Next we go onto explain how to represent etymologies using our model, and in particular how to associate temporal information with word senses, taking examples from two different lexicographic resources. In addition, we will show how our model deals with cognates and attestations}, URL = {https://iris.cnr.it/handle/20.500.14243/342012}, } @MISC{BELLANDI_2017_MISC_BKB_350512, AUTHOR = {Bellandi, A. and Khan, F. and Boschetti, F.}, TITLE = {Linked Data Version of the Intermediate Greek English Lexicon}, YEAR = {2017}, ABSTRACT = {A linked data version of the intermediate greek english lexicon by Liddell-Scott-Jones encoded using the ontolex-lemon vocabulary}, URL = {https://iris.cnr.it/handle/20.500.14243/350512}, } @ARTICLE{KHAN_2016_ARTICLE_KABF_322086, AUTHOR = {Khan, F. and Arrigoni, S. and Boschetti, F. and Frontini, F.}, TITLE = {Restructuring a Taxonomy of Literary Themes and Motifs for More Efficient Querying}, YEAR = {2016}, ABSTRACT = {In this paper we describe ongoing work in the restructuring of a tagset originally organised as a taxonomy and used to annotate literary themes and motifs in a corpus of classical works of poetry from a number of different traditions. We show how such a tagset can be rendered more efficient and useful through the appropriation of ideas and techniques from lexical semantics and ontology design. The newly redesigned tagset is described with examples showing how the new design is much more expressive than the old taxonomy; furthermore, an example query is described in order to demonstrate how more refined semantic searches can be carried using the new version of the taxonomy. The final result is, we hope, a resource that will be useful not only for the specific project for which it was developed but one that is well-designed and well-documented enough to be of use for other similar semantic annotation tasks}, URL = {https://iris.cnr.it/handle/20.500.14243/322086}, DOI = {10.14195/2182-8830}, ISSN = {2182-8830}, JOURNAL = {MATLIT}, } @ARTICLE{MUGELLI_2016_ARTICLE_MBDDKT_354715, AUTHOR = {Mugelli, G. and Boschetti, F. and Del Gratta, R. and Del Grosso, A. and Khan, F. and Taddei, A.}, TITLE = {A user-centred design to annotate ritual facts in ancient greek tragedies}, YEAR = {2016}, ABSTRACT = {Euporia is an annotation system developed with a user-centred approach for the study of ritual and religion in ancient Greek tragedy. Euporia adopts a domain specific language (DSL) and a lightweight web user interface in order to offer digital support to an anthropological study of ancient Greek tragedy that compares ritual as it is performed or described in Greek tragedy with ancient ritual as it can be reconstructed from literary, archaeological, and epigraphic sources. The case study discussed in this paper (Aesch. Ag 67-71) shows one of the main features of Euporia: the ability to annotate different readings and different interpretations of the text and their consequences in the reconstruction of ancient Greek ritual}, KEYWORDS = {Digital Philology, Digital Humanities, Digital Classicist, Computational philology, Computational Linguistics}, PAGES = {103-120}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85007489227\&origin=inward}, VOLUME = {59}, DOI = {10.1111/j.2041-5370.2016.12041.x}, ISSN = {0076-0730}, JOURNAL = {BULLETIN OF THE INSTITUTE OF CLASSICAL STUDIES OF THE UNIVERSITY OF LONDON}, } @INCOLLECTION{DELGRATTA_2016_INCOLLECTION_DBDKM_312011, AUTHOR = {Del Gratta, R. and Boschetti, F. and Del Grosso, A. and Khan, F. and Monachini, M.}, TITLE = {Cooperative philology on the way to web services: The case of the cophiwordnet platform}, YEAR = {2016}, ABSTRACT = {In this paper we present ongoing research carried out at the Institute for Computational Linguistics "A. Zampolli" (ILC) in Pisa. The institute has been active since many years in the field of Digital Humanities providing resources, tools and solutions to address issues of the to digital humanists. Starting from those previous initiatives, we show how to re-engineer them as Web Services in order to make connections between lexicons, semantic resources and a fine grained text management. Linked Open Data is chosen as the paradigm used to link the different resources as well as the modality of data presentation}, KEYWORDS = {Canonical text services, Cooperative philology, Linked open data, Web services}, PAGES = {173-187}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84961744725\&partnerID=q2rCbXpz}, DOI = {10.1007/978-3-319-31468-6_13}, PUBLISHER = {Springer International Publishing (Switzerland, CHE)}, ISBN = {978-3-319-31468-6}, CONFERENCE_PLACE = {Switzerland}, BOOKTITLE = {Worldwide Language Service Infrastructure: Second International Workshop, WLSI 2015, Kyoto, Japan, January 22-23, 2015. Revised Selected Papers}, EDITOR = {Murakami, Y. and Li, D.}, } @EDITORIAL{KHAN_2016_EDITORIAL_KVAFFPGU_324185, AUTHOR = {Khan, F. and Vintar, P. and Araúz, P. L. and Faber, P. and Frontini, F. and Parvizi, A. and Grisimeunovi, L. and Unger, C.}, TITLE = {Language and Ontology (LangOnto2) & Terminology and Knowledge Structures (TermiKS)}, YEAR = {2016}, ABSTRACT = {This joint workshop brings together two different but closely related strands of research. On the one hand it looks at the overlap between ontologies and computational linguistics and on the other it explores the relationship between knowledge modelling and terminologies. In particular the workshop aims to create a forum for discussion in which the different relationships and commonalities between these two areas can be explored in detail, as well as presenting cutting edge research in each of the two individual areas. A significant amount of human knowledge can be found in texts. It is not surprising that languages such as OWL, which allow us to formally represent this knowledge, have become more and more popular both in linguistics and in automated language processing. For instance ontologies are now of core interest to many NLP fields including Machine Translation, Question Answering, Text Summarization, Information Retrieval, and Word Sense Disambiguation. At a more abstract level, however, ontologies can also help us to model and reason about phenomena in natural language semantics. In addition, ontologies and taxonomies can also be used in the organisation and formalisation of linguistically relevant categories such as those used in tagsets for corpus annotation. Notably also, the fact that formal ontologies are being increasingly accessed by users with limited to no background in formal logic has led to a growing interest in developing accessible front ends that allow for easy querying and summarisation of ontologies. It has also led to work in developing natural language interfaces for authoring ontologies and evaluating their design. Additionally in recent years there has been a renewed interest in the linguistic aspects of accessing, extracting, representing, modelling and transferring knowledge. Numerous tools for the automatic extraction of terms, term variants, knowledge-rich contexts, definitions, semantic relations and taxonomies from specialized corpora have been developed for a number of languages, and new theoretical approaches have emerged as potential frameworks for the study of specialized communication. However, the building of adequate knowledge models for practitioners (e. g. experts, researchers, translators, teachers etc.), on the one hand, and NLP applications (including cross-language, cross-domain, cross-device, multi-modal, multi-platform applications), on the other hand, still remains a challenge. The papers included in the workshop range across a wide variety of different areas and reflect the strong inter-disciplinary approach, which characterises both areas of research. In addition we are very happy to include two invited talks in the program presented by authorities in their respective fields: Pamela Faber from the field of terminology, and John McCrae, an expert on linguistic linked data and the interface between NLP and ontologies}, KEYWORDS = {lexicons, ontologies}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, } @INPROCEEDINGS{DELGRATTA_2016_INPROCEEDINGS_DFMPRBKSC_324176, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Khan, F. and Soria, C. and Calzolari, N.}, TITLE = {LREC as a Graph: People and Resources in a Network}, YEAR = {2016}, ABSTRACT = {This proposal describes a new way to visualise resources in the LREMap, a community-built repository of language resource descriptions and uses. The LREMap is represented as a force-directed graph, where resources, papers and authors are nodes. The analysis of the visual representation of the underlying graph is used to study how the community gathers around LRs and how LRs are used in research}, KEYWORDS = {Language Resources, Resources Documentation, Data Visualisation}, PAGES = {2529-2532}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KBM_331670, AUTHOR = {Khan, A. F. and Bellandi, A. and Monachini, M.}, TITLE = {Tools and Instruments for Building and Querying Diachronic Computational Lexica}, YEAR = {2016}, ABSTRACT = {This article describes work on enabling the addition of temporal information to senses of words in linguistic linked open data lexica based on the lemonDia model. Our contribution in this article is twofold. On the one hand, we demonstrate how lemonDia enables the querying of diachronic lexical datasets using OWL-oriented Semantic Web based technologies. On the other hand, we present a preliminary version of an interactive interface intended to help users in creating lexical datasets that model meaning change over time}, KEYWORDS = {OWL-oriented Semantic Web based technologies}, PAGES = {164-171}, URL = {https://www.clarin-d.net/images/lt4dh/pdf/LT4DH22.pdf}, ISBN = {978-4-87974-708-2}, CONFERENCE_NAME = {Language Technology Resources and Tools for Digital Humanities (LT4DH 2016)}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KDM_324221, AUTHOR = {Khan, F. and Díazvera Javier, E. and Monachini, M.}, TITLE = {Representing Polysemy and Diachronic Lexico-Semantic Data on the Semantic Web}, YEAR = {2016}, ABSTRACT = {In this article we will outline two different vocabularies, both extensions of the lemon model, for representing diachronic lexico-semantic data on the Semantic Web. This is especially useful for repre-senting the evolution of scientific terminologies where many terms are polysemous and or imported from other languages. The first vocabulary, polyLemon, allows for the representation of data about polysemy; the second, lemonDIA the representation of meaning shift over time}, KEYWORDS = {Language Resources, Resource Data Framework (RDF)}, PAGES = {37-45}, URL = {http://ceur-ws.org/Vol-1595/paper4.pdf}, VOLUME = {1595}, CONFERENCE_NAME = {Second International Workshop on Semantic Web for Scientific Heritage co-located with 13th Extended Semantic Web Conference (ESWC 2016)}, BOOKTITLE = {SWASH 2016 Semantic Web for Scientific Heritage Proceedings of the Second International Workshop on Semantic Web for Scientific Heritage co-located with 13th Extended Semantic Web Conference (ESWC 2016)}, EDITOR = {Draelants, I. and Zucker, C. F. and Monnin, A. and Zucker, A.}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KDM_324193, AUTHOR = {Khan, F. and Díazvera, J. and Monachini, M.}, TITLE = {The Representation of an Old English Emotion Lexicon as Linked Open Data}, YEAR = {2016}, ABSTRACT = {We present the ongoing conversion of a lexicon of emotion terms in Old English (OE) into RDF using an extension of lemon called lemonDIA and which we briefly describe. We focus on the translation of the subset of the lexicon dealing with terms for shame and guilt and give a number of illustrative example}, KEYWORDS = {Linguistic Linked Open Data, Old English, Lexicon}, PAGES = {73-76}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Paris}, BOOKTITLE = {LDL 2016 5th Workshop on Linked Data in Linguistics: Managing, Building and Using Linked Language Resources}, EDITOR = {McCrae, J. P. and Chiarcos, C. and Ponsoda, E. M. and Declerck, T. and Osenova, P. and Hellmann, S.}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KFBMM_322106, AUTHOR = {Khan, F. and Frontini, F. and Boschetti, F. and Monachini and , M.}, TITLE = {Converting the Liddell Scott Greek-English Lexicon into Linked Open Data using lemon}, YEAR = {2016}, ABSTRACT = {The emergence and growing popularity of Linked Open Data (LOD) offers researchers a new range of possibilities when it comes to publishing datasets online (Hyvönen 2012, Oomen et al 2012); indeed not only does the success of LOD greatly facilitate the process of making scholarly data accessible and to a wider community but it also permits the enrichment of individual datasets by linking them to the other datasets available on the so called Linked Open Data Cloud. The advantages of Linked Open Data for teachers, academics and students in the humanities are obvious and are indeed manifold. However there is currently a paucity of linked open datasets in fields such as philology and literary studies, and in particular of datasets that deal with classical languages such as ancient Greek, Sanskrit, and Latin. This seems strange given the rich abundance of surviving works, of both a religious and secular character, that exist in those languages. A salient consideration here relates to the fact that even when such works have been digitised and made available in a format such as TEI-XML, a format which renders the structure and content of such texts more amenable to computer processing, the conversion of these resources into the Resource Data Framework (RDF), the standardised data model that underpins the Semantic Web, is not always straightforward. In this article we describe ongoing work in the conversion of an important 19th century Ancient Greek resource the Liddell-Scott-Jones Lexicon, into RDF, part of a wider program of work that has been recently initiated at CNR-ILC in converting historical lexicons in languages such as Greek, Latin and Arabic into Linked Open Data}, URL = {https://iris.cnr.it/handle/20.500.14243/322106}, ISBN = {978-83-942760-3-4}, } @INPROCEEDINGS{NAHLI_2016_INPROCEEDINGS_NFMKZK_324187, AUTHOR = {Nahli, O. and Frontini, F. and Monachini, M. and Khan, F. and Zarghili, A. and Khalfi, M.}, TITLE = {Al Qamus al Muhit, a Medieval Arabic Lexicon in LMF}, YEAR = {2016}, ABSTRACT = {This paper describes the conversion into LMF, a standard lexicographic digital format of 'al-q?m?s al-mu???, a Medieval Arabic lexicon. The lexicon is first described, then all the steps required for the conversion are illustrated. The work is will produce a useful lexicographic resource for Arabic NLP, but is also interesting per se, to study the implications of adapting the LMF model to the Arabic language. Some reflections are offered as to the status of roots with respect to previously suggested representations. In particular, roots are, in our opinion are to be not treated as lexical entries, but modeled as lexical metadata for classifying and identifying lexical entries. In this manner, each root connects all entries that are derived from it}, KEYWORDS = {Arabic Lexicon, LMF, Al Qamus al Muhi}, PAGES = {943-950}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Paris}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{ARRIGONI_2016_INPROCEEDINGS_AKMB_328803, AUTHOR = {Arrigoni, S. and Khan, F. and Monachini, M. and Boschetti, F.}, TITLE = {Misurare Memorata Poetis: prime statistiche}, YEAR = {2016}, KEYWORDS = {intertestualità, temi e motivi}, PAGES = {151-155}, URL = {http://www.himeros.eu/aiucd2016/c47.pdf}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, BOOKTITLE = {AIUCD 2016-Book of Abstracts}, EDITOR = {Boschetti, F.}, } @ARTICLE{DELGRATTA_2015_ARTICLE_DFKM_222847, AUTHOR = {Del Gratta, R. and Frontini, F. and Khan, F. and Monachini, M.}, TITLE = {Converting the PAROLE SIMPLE CLIPS Lexicon into RDF with lemon}, YEAR = {2015}, ABSTRACT = {This paper describes the publication and linking of (parts of) PAROLE SIMPLE CLIPS (PSC), a large scale Italian lexicon, to the Semantic Web and the Linked Data cloud using the lemon model. The main challenge of the conversion is discussed, namely the reconciliation between the PSC semantic structure which contains richly encoded semantic information, following the qualia structure of the Generative Lexicon theory and the lemon view of lexical sense as a reified pairing of a lexical item and a concept in an ontology. The result is two datasets: one consists of a list of lemon lexical entries with their lexical properties, relations and senses; the other consists of a list of OWL individuals representing the referents for the lexical senses. These OWL individuals are linked to each other by a set of semantic relations and mapped onto the SIMPLE OWL ontology of higher level semantic types}, KEYWORDS = {lemon, linked data, generative lexicon, RDF, OWL, lexical resource}, PAGES = {387-392}, URL = {http://www.semantic-web-journal.net/content/converting-parole-simple-clips-lexicon-rdf-lemon-0}, VOLUME = {6}, DOI = {10.3233/SW-140168}, ISSN = {1570-0844}, JOURNAL = {SEMANTIC WEB (PRINT)}, } @INPROCEEDINGS{BOSCHETTI_2015_INPROCEEDINGS_BDFKM_305311, AUTHOR = {Boschetti, F. and Del Gratta, R. and Frontini, F. and Khan, F. and Monachini, M.}, TITLE = {(Re)thinking the BLARK for Ancient Greek}, YEAR = {2015}, ABSTRACT = {The paper discusses the Basic LAnguage Resource Kit (BLARK) for Ancient Greek, measuring the BLARK matrix against what is actually available for this language, and assessing its applicability to ancient languages in general. In addition, the BLARK and the FLaReNet recommendations are used to define priorities in the sector in close collaboration between philologists and the broader LRT community}, URL = {https://iris.cnr.it/handle/20.500.14243/305311}, ISBN = {978-83-932640-8-7}, } @INPROCEEDINGS{DELGRATTA_2015_INPROCEEDINGS_DFMPRBGKQSC_307390, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Goggi, S. and Khan, F. and Quochi, V. and Soria, C. and Calzolari, N.}, TITLE = {Visualising Italian Language Resources: a Snapshot}, YEAR = {2015}, ABSTRACT = {This paper aims to provide a first snapshot of Italian Language Resources (LRs) and their uses by the community, as documented by the papers presented at two different conferences, LREC2014 and CLiC-it 2014. The data of the former were drawn from the LOD version of the LRE Map, while those of the latter come from manually analyzing the proceedings. The results are presented in the form of visual graphs and confirm the initial hypothesis that Italian LRs require concrete actions to enhance their visibility}, KEYWORDS = {Italian Language Resources}, PAGES = {100-104}, URL = {https://books.openedition.org/aaccademia/1277?lang=it}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, BOOKTITLE = {Proceedings of the Second Italian Conference on Computational Linguistics CLiC-it 2015}, EDITOR = {Bosco, C. and Tonelli, S. and Zanzotto, F. M.}, } @INPROCEEDINGS{KHAN_2015_INPROCEEDINGS_KF_295959, AUTHOR = {Khan, F. and Frontini, F.}, TITLE = {Using Ontologies to Model Polysemy in Lexical Resources}, YEAR = {2015}, ABSTRACT = {In this article we look at how the use of ontologies can assist in analysing polysemy in natural languages. We develop a model, the Lexical-Sense-Ontology model (LSO), to represent the interaction between a lexicon and ontology, based on lemon. We use the LSO model to show how default rules can be used to represent semi-productivity in polysemy as well as discussing the kinds of ontological information that are useful for studying polysemy}, KEYWORDS = {Polysemy, Ontology, Default Logic}, URL = {http://www.aclweb.org/anthology/W/W15/W15-0404.pdf}, CONFERENCE_NAME = {Workshop on Language and Ontologies}, BOOKTITLE = {Proceedings of the Workshop on Language and Ontologies}, } @INPROCEEDINGS{BOSCHETTI_2015_INPROCEEDINGS_BDFKM_305309, AUTHOR = {Boschetti, F. and Del Gratta, R. and Frontini, F. and Khan, A. F. and Monachini, M.}, TITLE = {Strumenti, Risorse e Linguistic Linked Open Data per le lingue antiche}, YEAR = {2015}, ABSTRACT = {Strumenti e metodi dell'Informatica Umanistica hanno portato e portano ad una ridefinizione di processi teorici, metodologici e tecnici, fino a una vera e propria ri-concettualizzazione dei saperi nell'ambito dei beni culturali. L'Istituto di Linguistica Computazionale è attivo con varie iniziative sul fronte delle Digital Humanities per la creazione di strumenti e risorse linguistiche per il mondo classico. La direzione intrapresa si inserisce nel paradigma che si va consolidando nel settore delle tecnologie del linguaggio e che prevede la fruizione di servizi linguistici attraverso infrastrutture di ricerca, secondo un modello già operativo per le lingue moderne. Tale paradigma è in connessione con l'emergere degli standard e dei formati del web semantico per le tecnologie del linguaggio e per la pubblicazione di dati linguistici}, URL = {https://iris.cnr.it/handle/20.500.14243/305309}, } @INPROCEEDINGS{DELGRATTA_2014_INPROCEEDINGS_DFKMS_257904, AUTHOR = {Del Gratta, R. and Frontini, F. and Khan, F. and Mariani, J. and Soria, C.}, TITLE = {The LREMap for Under-Resourced Languages}, YEAR = {2014}, ABSTRACT = {A complete picture of currently available language resources and technologies for the under-resourced languages of Europe is still lacking. Yet this would help policy makers, researchers and developers enormously in planning a roadmap for providing all languages with the necessary instruments to act as fully equipped languages in the digital era. In this paper we introduce the LRE Map and show its utility for documenting available language resources and technologies for under-resourced languages. The importance of the serialization of the LREMap into (L)LOD along with the possibility of its connection to a wider world is also introduced}, KEYWORDS = {language resources, less-resourced languages, linguistic linked open data}, PAGES = {78-83}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, CONFERENCE_NAME = {Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)}, BOOKTITLE = {Proceedings of the Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)}, EDITOR = {Pretorius, L. and Soria, C. and Baroni, P.}, } @INPROCEEDINGS{KHAN_2014_INPROCEEDINGS_KBF_259370, AUTHOR = {Khan, F. and Boschetti, F. and Frontini, F.}, TITLE = {Using lemon to Model Lexical Semantic  Shift in Diachronic Lexical Resources}, YEAR = {2014}, ABSTRACT = {In this paper we propose a model, called lemonDIA, for representing lexical semantic change using the lemon framework and based on the ontological notion of the perdurant. Namely we extend the notion of sense in lemon by adding a temporal dimension and then define a class of perdurant entities that represents a shift in meaning of a word and which contains different related senses. We start by discussing the general problem of semantic shift and the utility of being able to easily access and represent such information in diachronic lexical resources. We then describe our model and illustrate it with examples}, URL = {https://iris.cnr.it/handle/20.500.14243/259370}, } @INPROCEEDINGS{MONEGLIA_2014_INPROCEEDINGS_MBFGKMP_222787, AUTHOR = {Moneglia, M. and Brown, S. and Frontini, F. and Gagliardi, G. and Khan, F. and Monachini, M. and Panunzi, A.}, TITLE = {The IMAGACT Visual Ontology. an Extendable Multilingual Infrastructure for the Representation of Lexical Encoding of Action}, YEAR = {2014}, ABSTRACT = {Action verbs have many meanings, covering actions in different ontological types. Moreover, each language categorizes action in its own way. One verb can refer to many different actions and one action can be identified by more than one verb. The range of variations within and across languages is largely unknown, causing trouble for natural language processing tasks. IMAGACT is a corpus-based ontology of action concepts, derived from English and Italian spontaneous speech corpora, which makes use of the universal language of images to identify the different action types extended by verbs referring to action in English, Italian, Chinese and Spanish. This paper presents the infrastructure and the various linguistic information the user can derive from it. IMAGACT makes explicit the variation of meaning of action verbs within one language and allows comparisons of verb variations within and across languages. Because the action concepts are represented with videos, extension into new languages beyond those presently implemented in IMAGACT is done using competence-based judgments by mother-tongue informants without intense lexicographic work involving underdetermined semantic description}, URL = {https://iris.cnr.it/handle/20.500.14243/222787}, ISBN = {978-2-9517408-8-4}, } @MISC{KHAN_2014_MISC_KFM_262584, AUTHOR = {Khan, F. and Frontini, F. and Monachini, M.}, TITLE = {A Model for Representing Diachronic Semantic Information in Lexico-Semantic Resources on the Semantic Web}, YEAR = {2014}, ABSTRACT = {The Semantic Web offers a way of publishing structured data online that facilitates the interlinking of different datasets stored at different online locations? indeed one of the main aims of the Semantic Web movement is to actively encourage this enrichment of online datasets with information from other resources, in order to avoid the problem of so called 'data islands'. In contrast to conventional hyperlinks however the links between different resources on the Semantic Web can be given semantic types and classified hierarchically. Data published on the Semantic Web is referred to as Linked Data? if, in addition, this data is available with an open license then it can be referred to as Linked Open Data (Heath 2011)}, KEYWORDS = {Cultural resources, Heritage resources}, PAGES = {1-3}, URL = {http://www.dh.uni-leipzig.de/wo/wp-content/uploads/2014/11/Fahad-Khan-Francesca-Frontini-and-Monica-Monachini-A-Model-for-Representing.pdf}, CONFERENCE_NAME = {Greek and Latin in an age of Open Data. Open Philology Project}, } @INPROCEEDINGS{KHAN_2013_INPROCEEDINGS_KFDMQ_259365, AUTHOR = {Khan, F. and Frontini, F. and Del Gratta, R. and Monachini, M. and Quochi, V.}, TITLE = {Generative Lexicon Theory and Linguistic Linked Open Data}, YEAR = {2013}, ABSTRACT = {In this paper we look at how Generative Lexicon theory can assist in providing a more thorough definition of word senses as links between items in a RDF-based lexicon and concepts in an ontology. We focus on the definition of lexical sense in lemon and show its limitations before defining a new model based on lemon and which we term lemonGL. This new model is an initial attempt at providing a way of structuring lexico-ontological resources as linked data in such a way as to allow a rich representation of word meaning (following the GL theory) while at the same time (attempting to) re-main faithful to the separation between the lexicon and the ontology as recommended by the lemon model}, URL = {https://iris.cnr.it/handle/20.500.14243/259365}, ISBN = {978-1-937284-98-5}, } @INPROCEEDINGS{MONEGLIA_2013_INPROCEEDINGS_MPGMRDKF_226438, AUTHOR = {Moneglia, M. and Panunzi, A. and Gagliardi, G. and Monachini, M. and Russo, I. and De Felice, I. and Khan, F. and Frontini, F.}, TITLE = {IMAGACT E-learning Platform for Basic Action Types. In: Pixel (ed.), Proceedings of the 6th International Conference ICT for Language Learning}, YEAR = {2013}, ABSTRACT = {Action verbs express important information in a sentence and they are the most frequent elements in speech, but they are also one of the most difficult part of the lexicon to learn for L2 language learners, because languages segment these concepts in very different ways. The two sentences "Mary folds her shirt" and "Mary folds her arms" refer to two completely different types of action, as becomes evident when they are translated into another language (e. g., in Italian they would be translated as "Maria piega la camicia" and "Maria incrocia le braccia" respectively). IMAGACT e-learning platform aims to make these differences evident by creating a cross-linguistic ontology of action types, whose nodes consist of 3D scenes, each of which relates to one action type. In order to identify these types, contexts of use have been extracted from English and Italian spontaneous speech corpora for around 600 high frequency action verbs (for each language). All instances that refer to similar events (e. g., fold the shirt/ the blanket) are grouped under one single action type: each one of these types is then represented by a linguistic best example and a short video that represents simple actions (e. g. a man taking a glass from a table). The action types extracted for Italian and English are compared and merged into one cross-linguistic ontology of action. IMAGACT has provided an internet based annotation infrastructure to derive this information from corpora. The project is now completed for the Italian and English lexicon, data extraction for Chinese and Spanish is ongoing. Reference to prototypical imagery is crucial in order to bootstrap the learning process. By selecting the set of 3D scenes referred to by a verb in one language and viewing the type of activity represented therein learners can directly understand the range of applicability of each verb. Thanks to an easy interface, a user can access the English/Italian/Chinese lexicon by lemma or directly by 3D scenes. For example, searching for the verb "to turn", s/he will be presented with a number of scenes, showing the various action types associated to that verb. Clicking on a scene s/he or she will know how this type of action is referred to in other the languages}, KEYWORDS = {Ontology}, PAGES = {85-89}, URL = {https://iris.cnr.it/handle/20.500.14243/226438}, PUBLISHER = {libreriauniversitaria. it (Limena, ITA)}, ISBN = {978-88-6292-423-8}, CONFERENCE_NAME = {International Conference "ICT for Language Learning", 6th edition}, CONFERENCE_PLACE = {Limena}, BOOKTITLE = {Conference Proceedings. ICT for Language Learning}, EDITOR = {Pixel}, } @INPROCEEDINGS{RUSSO_2013_INPROCEEDINGS_RDFKM_257360, AUTHOR = {Russo, I. and De Felice, I. and Frontini, F. and Khan, F. and Monachini, M.}, TITLE = {(Fore)seeing actions in objects. Acquiring distinctive affordances from language}, YEAR = {2013}, ABSTRACT = {In this paper we investigate if conceptual information concerning objects' affordances as possibilities for actions anchored to an object can be at least partially acquired through language. Considering verb-noun pairs as the linguistic realizations of relations between actions performed by an agent and objects we collect this information from the ImagAct dataset, a linguistic resource obtained from manual annotation of basic action verbs, and from a web corpus(itTenTen). The notion of affordance verb as the most distinctive verb in ImagAct enables a comparison with distributional data that reveal how lemmas ranking based on a semantic association measure that mirror that of affordances as the most distinctive actions an object can be involved in}, PAGES = {151-161}, URL = {https://docs.google.com/viewer?a=v\&pid=sites\&srcid=ZGVmYXVsdGRvbWFpbnxubHBjczIwMTN8Z3g6MTI0ZGMzYWYwYmMxNjY1Mg}, CONFERENCE_NAME = {NLPCS 2013-10th International Workshop on Natural Language Processing and Cognitive Science}, BOOKTITLE = {Proceedings of NLPCS 2013-10th International Workshop on Natural Language Processing and Cognitive Science}, EDITOR = {Sharp, B. and Zock, M.}, } @INPROCEEDINGS{RUSSO_2013_INPROCEEDINGS_RFDKM_227078, AUTHOR = {Russo, I. and Frontini, F. and De Felice, I. and Khan, F. and Monachini, M.}, TITLE = {Disambiguation of Basic Action Types through Nouns' Telic Qualia}, YEAR = {2013}, ABSTRACT = {Knowledge about semantic associations between words is effective to disambiguate word senses. The aim of this paper is to investigate the role and the relevance of telic information from SIMPLE in the disambiguation of basic action types of Italian HOLD verbs (prendere, 'to take', raccogliere, 'to pick up', pigliare 'to grab' etc.). We propose an experiment to compare the results obtained with telic information from SIMPLE with basic co-occurrence information extracted from corpora (most salient verbs modifying nouns) classified in terms of general semantic classes to avoid data sparseness}, PAGES = {70-75}, URL = {http://www.aclweb.org/anthology/W13-5410}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-937284-98-5}, CONFERENCE_NAME = {6th International Conference on Generative Approaches to the Lexicon Generative Lexicon and Distributional Semantics}, CONFERENCE_PLACE = {Stroudsburg}, BOOKTITLE = {Proceedings of the 6th International Conference on Generative Approaches to the Lexicon. Generative Lexicon and Distributional Semantics}, EDITOR = {Saurí, R. and Calzolari, N. and Huang, C. R. and Lenci, A. and Monachini, M. and Pustejovsky, J.}, } @INPROCEEDINGS{MONACHINI_2012_INPROCEEDINGS_MFDRKGP_119663, AUTHOR = {Monachini, M. and Frontini, F. and De Felice, I. and Russo, I. and Khan, F. and Gagliardi, G. and Panunzi, A.}, TITLE = {Verb interpretation for basic action types: annotation, ontology induction and creation of prototypical scenes}, YEAR = {2012}, ABSTRACT = {In the last 20 years dictionaries and lexicographic resources such as WordNet have started to be enriched with multimodal content. Short videos depicting basic actions support the user's need (especially in second language acquisition) to fully understand the range of applicability of verbs. The IMAGACT project has among its results a repository of action verbs ontologically organised around prototypical action scenes in the form of both video recordings and 3D animations. The creation of the IMAGACT ontology, which consists in deriving action types from corpus instances of action verbs, intra and cross linguistically validating them and producing the prototypical scenes thereof, is the preliminary step for the creation of a resouce that users can browse by verb, learning how to match different action prototypes with the correct verbs in the target language. The mapping of IMAGACT types onto WordNet synsets allows for a mutual enrichment of both resources}, KEYWORDS = {ontology of actions, lexical resource, 3D animations}, PAGES = {69-80}, URL = {https://iris.cnr.it/handle/20.500.14243/119663}, CONFERENCE_NAME = {COLING 2012-3rd Workshop on Cognitive Aspects of the Lexicon (CogALex-III)}, }