@INPROCEEDINGS{SCIOLETTE_2023_INPROCEEDINGS_SMG_491771, AUTHOR = {Sciolette, F. and Marchi, S. and Giovannetti, E.}, TITLE = {Towards a New Computational Lexicon for Italian: building the morphological layer by harmonizing and merging existing resources}, YEAR = {2023}, ABSTRACT = {The present work illustrates the first steps towards the construction of a new computational lexicon for the Italian language. Following an analysis of existing lexical resources, it was decided to use LexicO as the reference base. In this first phase a resource of nearly 800,000 inflected forms was produced, accompanied by lemmas and morphological traits, obtained by integrating the available data in LexicO with those coming from two support sources: the tool MAGIC and a selection of Italian treebanks.}, KEYWORDS = {computational lexicon, lexical resources, morphology, morphological harmonization}, PAGES = {5}, URL = {https://ceur-ws.org/Vol-3596/short20.pdf}, VOLUME = {3596}, CONFERENCE_NAME = {9th Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {30/11/2023-01/12/2023}, BOOKTITLE = {Proceedings of the 9th Italian Conference on Computational Linguistics}, EDITOR = {Boschetti, F. and Lebani, G. E. and Magnini, B. and Novielli, N.}, } @TECHREPORT{ALBANESI_2023_TECHREPORT_AGMPS_491776, AUTHOR = {Albanesi, D. and Giovannetti, E. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 23}, YEAR = {2023}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo gennaio 2023 - giugno 2023. Le principali attività tecniche svolte sul sistema Traduco attualmente in produzione sono state la risoluzione di bug e l'aggiornamento di alcune funzionalità. Parallelamente, è proseguito il lavoro di ricerca e sviluppo su due fronti: i) la realizzazione della nuova versione di Traduco, ii) l'ampliamento della risorsa lessicale per l'italiano contemporaneo a supporto della funzionalità di ricerca full-text sul testo del Talmud tradotto in italiano.}, KEYWORDS = {Lessici elettronici, rappresentazione della conoscenza, Linguistica Computazionale, traduzione di testi religiosi, traduzione assistita dal calcolatore}, PAGES = {13}, URL = {https://publications.cnr.it/doc/491776}, } @MISC{GIOVANNETTI_2023_MISC_GABCGMPS_491773, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Carniani, E. and Guidi, L. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Maia}, YEAR = {2023}, ABSTRACT = {Maia is an open and collaborative web tool based on semantic web and linked open data technologies for text annotation, e-lexicography, and lexical linking.}, KEYWORDS = {maia, linked open data, e-lexicography, text annotation, lexical linking, collaborative tools}, URL = {https://github.com/klab-ilc-cnr/Maia}, } @INPROCEEDINGS{MARCHI_2022_INPROCEEDINGS_MCDG_463120, AUTHOR = {Marchi, S. and Colombo, M. and Dattilo, D. and Giovannetti, E.}, TITLE = {Un esperimento di visualizzazione grafica della terminologia del Talmud babilonese}, YEAR = {2022}, ABSTRACT = {L'impiego di tecnologie di information visualization nel settore delle digital humanities può aprire nuove frontiere di ricerca. Le informazioni veicolate attraverso modalità grafiche, infatti, possono apparire agli studiosi più immediatamente comprensibili e le interfacce grafiche realizzate fornire inediti paradigmi di studio e di manipolazione dei dati analizzati. Il caso d'uso sperimentale illustrato in questo contributo è stato concepito per fornire allo studioso una modalità visiva, immediata, per l'analisi comparativa del contenuto terminologico di un corpus testuale.}, KEYWORDS = {Visualizzazione grafica di risorse testuali, terminologia, linguistica computazionale, tf-idf, grafi}, PAGES = {239-241}, URL = {http://amsacta.unibo.it/6848/1/Proceedings_AIUCD2022.pdf}, DOI = {10.6092/unibo/amsacta/6848}, ISBN = {9788894253566}, CONFERENCE_NAME = {AIUCD 2022}, CONFERENCE_PLACE = {Lecce}, CONFERENCE_DATE = {1-3/06/2022}, BOOKTITLE = {AIUCD 2022-Culture digitali. Intersezioni: filosofia, arti, media. Proceedings della 11a conferenza nazionale}, } @TECHREPORT{ALBANESI_2022_TECHREPORT_ABCGMPS_470012, AUTHOR = {Albanesi, D. and Bellandi, A. and Colombo, M. and Giovannetti, E. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 21}, YEAR = {2022}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo novembre 2021 - aprile 2022. Le principali attività tecniche svolte sul sistema Traduco sono state la risoluzione di bug e l'implementazione di nuove funzionalità richieste. Parallelamente, è stata realizzata una applicazione per la consultazione del corpus biblico e sono proseguite le attività volte alla visualizzazione di risorse lessicali tramite grafi e per l'analisi, l'allineamento di testi, l'aggiornamento e la conversione del lessico computazionale PSC da utilizzarsi per la consultazione avanzata, su base morfologica e semantica, del testo talmudico tradotto in italiano.}, KEYWORDS = {Traduzione Assistita dal Calcolatore, Traduzione Collaborativa, Lessici elettronici, rappresentazione della conoscenza, Linguistica Computazionale, traduzione di testi religiosi}, PAGES = {23}, URL = {https://publications.cnr.it/doc/470012}, } @TECHREPORT{ALBANESI_2022_TECHREPORT_ACGMPS_475381, AUTHOR = {Albanesi, D. and Colombo, M. and Giovannetti, E. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 22}, YEAR = {2022}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo maggio 2022 - dicembre 2022. Le principali attività tecniche svolte sul sistema Traduco sono state la risoluzione di bug e l'implementazione di nuove funzionalità richieste. Parallelamente, è proseguito il lavoro di ricerca e sviluppo su tre fronti: i) la nuova versione di Traduco, ii) la visualizzazione grafica di risorse lessicali, e iii) la ricerca full-text sul testo del Talmud tradotto in italiano.}, KEYWORDS = {Traduzione Assistita dal CalcolatoreTraduzione Collaborativa, Lessici elettronici, rappresentazione della conoscenza, Linguistica Computazionale, traduzione di testi religiosi, ricerca full-text}, PAGES = {40}, URL = {https://publications.cnr.it/doc/475381}, } @ARTICLE{GIOVANNETTI_2021_ARTICLE_GABDDM_457778, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Dattilo, D. and Del Grosso, A. M. and Marchi, S.}, TITLE = {An ontology of masters of the Babylonian Talmud}, YEAR = {2021}, ABSTRACT = {The purpose of this research is to build an ontology of the masters appearing in the Babylonian Talmud (BT). The ontology built so far has been shared as a Linked Open Data and it will be linked to existing vocabularies. This work has been developed in the context of the Babylonian Talmud Translation Project, where more than eighty Talmudists are working together, since 2012, at the translation (comprehensive of explicative notes and glossaries) of the Talmud into Italian. The construction of the resource has involved the application of tools leveraging on computational linguistics approaches. The ontology, already describing more than 500 masters, constitutes the first portion of a more comprehensive Talmudic Knowledge Base where the text itself, the terminology, the entities, and the concepts constituting the BT will be formalized and linked to each other.}, KEYWORDS = {ontology, babylonian talmud, terminology, word alignment, linked open data, semantic web, knowledge representation}, PAGES = {725-737}, URL = {https://academic.oup.com/dsh/article-abstract/37/3/725/6410110}, VOLUME = {37}, DOI = {10.1093/llc/fqab043}, PUBLISHER = {Oxford University Press (Oxford, UK, Regno Unito)}, ISSN = {2055-7671}, JOURNAL = {Digital Scholarship in the Humanities}, } @INPROCEEDINGS{GIOVANNETTI_2021_INPROCEEDINGS_GABMPS_463795, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {The role of a computational lexicon for query expansion in full-text search}, YEAR = {2021}, ABSTRACT = {This work describes the first experiments conducted with a computational lexicon of Italian in a context of query expansion for full-text search. An application, composed of a graphical user interface and backend services to access the lexicon and the database containing the corpus to be queried, was developed. The text was morphologically analysed to improve the precision of the search process. Some examples of queries are given to show the potential of a text search approach supported by a complex and stratified lexical resource.}, KEYWORDS = {full-text search, computational lexicon, query expansion}, PAGES = {162-168}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85121247840\&origin=inward}, VOLUME = {3033}, DOI = {10.4000/books.aaccademia.10417}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {9791280136947}, CONFERENCE_NAME = {CLiC-it 2021 Italian Conference on Computational Linguistics 2021}, CONFERENCE_PLACE = {Milan, Italy}, CONFERENCE_DATE = {January 26-28, 2022}, BOOKTITLE = {Proceedings of the Eight Italian Conference on Computational Linguistics (Clic-it 2021)}, EDITOR = {Fersini, E. and Passarotti, M. and Patti, V.}, } @ARTICLE{GIOVANNETTI_2020_ARTICLE_GBDDMPP_453583, AUTHOR = {Giovannetti, E. and Bellandi, A. and Dattilo, D. and Del Grosso, A. M. and Marchi, S. and Pecchioli, A. and Piccini, S.}, TITLE = {The Terminology of the Babylonian Talmud: Extraction, Representation and Use in the Context of Computational Linguistics}, YEAR = {2020}, ABSTRACT = {A formal digital structuring of the terminology of the Talmud is being carried out in the context of the Project for the Translation of the Babylonian Talmud in Italian. The terminological resource was encoded in the form of a multi-language Explanatory Combinatorial Dictionary (Hebrew-Aramaic-Italian) according to the principles of the Meaning-Text Theory. The construction of such a resource was supported by text processing and computational linguistics techniques aimed at automatically extracting terms from the Italian translation of the Talmud and aligning them with the corresponding Hebrew/Aramaic source terms. The paper describes the process that was set up for the construction of the terminological resource with the ultimate goal of illustrating the advantages of the adoption of a formal linguistic model. The terminological resource aims, indeed, to be a useful tool to deepen the characteristics of the languages of the Talmud, to help translators in their work and more generally scholars in their study of the Talmud itself.}, KEYWORDS = {Babylonian Talmud, Computational Linguistics, Explanatory and Combinatorial Lexicology}, PAGES = {61-74}, URL = {https://publications.cnr.it/doc/453583}, VOLUME = {XXV}, DOI = {10.1400/283235}, PUBLISHER = {Giuntina (Firenze, Italia)}, ISSN = {2282-4499}, JOURNAL = {Materia giudaica Print}, } @INPROCEEDINGS{DELGROSSO_2020_INPROCEEDINGS_DGM_427281, AUTHOR = {Del Grosso, A. M. and Giovannetti, E. and Marchi, S.}, TITLE = {Enriching a Multilingual Terminology Exploiting Parallel Texts: An Experiment on the Italian Translation of the Babylonian Talmud}, YEAR = {2020}, ABSTRACT = {Parallel texts can represent an extremely useful source of information in a number of text and linguistic processing tasks. In this work we show an experiment conducted on the Italian translation of the Babylonian Talmud, a text we have analyzed and processed to support in the construction of a multilingual Hebrew/Aramaic/Italian terminological resource. The approach we adopted comprised: i) the TEI encoding of the text, ii) the automatic extraction of the Italian terms, iii) the addition of Hebrew/Aramaic terms via word-by-word alignment, iv) the revision of the obtained results.}, KEYWORDS = {multilingual terminology, parallel text, text alignment, babylonian talmud}, PAGES = {119-124}, URL = {http://amsacta.unibo.it/6316/1/AIUCD_2020_volume_FINAL.pdf}, DOI = {10.6092/unibo/amsacta/6316}, ISBN = {978-88-942535-4-2}, CONFERENCE_NAME = {IX Convegno Annuale AIUCD}, CONFERENCE_PLACE = {Milano: Università Cattolica del Sacro Cuore}, CONFERENCE_DATE = {15-17/01/2020}, } @ARTICLE{DELGROSSO_2019_ARTICLE_DCCDGMSS_427276, AUTHOR = {Del Grosso, A. M. and Capizzi, E. and Cristofaro, S. and De Luca, M. R. and Giovannetti, E. and Marchi, S. and Seminara, G. and Spampinato, D.}, TITLE = {Bellini's Correspondence: a Digital Scholarly Edition for a Multimedia Museum}, YEAR = {2019}, ABSTRACT = {Within the "Museo Virtuale della Musica BellinInRete" project, a corpus of letters, written by the renowned composer Vincenzo Bellini (1801-1835) from Catania, will be encoded and made publicly available. This contribution aims at illustrating the part of the project regarding the implementation of the prototype for the metadata and text encoding, indexing and visualisation of Bellini's correspondence. The encoding scheme has been defined according to the latest guidelines of the Text Encoding Initiative and it has been instantiated on a sample of letters. Contextually, a first environment has been implemented by customizing two open source tools: Edition Visualization Technology and Omega Scholarly platform. The main objective of the digital edition is to engage general public with the cultural heritage held by the Belliniano Civic Museum of Catania. This wide access to Bellini's correspondence has been conceived preserving the scholarly transcriptions of the letters edited by Seminara within her most recent critical edition (Olschki, 2017). The digital edition of the corpus takes care of handling the correspondence metadata by means of the correspDesc TEI tagset. Finally, Bellini's letters will be accessible via the Web platform as well as integrated into a forthcoming interactive and multimedia tour hosted at the museum.}, KEYWORDS = {digital scholarly edition, correspondence, Digital and Computational Philology, Software Design, Vincenzo Bellini, Music, Multimedia Museum}, PAGES = {23-47}, URL = {https://umanisticadigitale.unibo.it/article/view/9162/9918}, VOLUME = {7}, DOI = {10.6092/issn.2532-8816/9162}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @ARTICLE{PECCHIOLI_2018_ARTICLE_PABGM_397525, AUTHOR = {Pecchioli, A. and Albanesi, D. and Bellandi, A. and Giovannetti, E. and Marchi, S.}, TITLE = {Annotazione Linguistica Automatica dell'Ebraico Mishnaico: Esperimenti sul Talmud Babilonese}, YEAR = {2018}, ABSTRACT = {The automatic linguistic analysis of ancient Hebrew represents a new research opportunity in the field of Jewish studies. In fact, very little has been produced, both in terms of linguistic resources and, above all, of tools for the analysis of ancient Hebrew. This article illustrates a work born within the Italian Translation of the Babylonian Talmud Project aimed at the construction of an automatic linguistic annotator of Mishnaic Hebrew.}, KEYWORDS = {Babylonian Talmud, Natural Language Processing, Mishnaic Hebrew}, PAGES = {281-291}, URL = {http://aisg.cise.unipi.it/Materia-giudaica-2018/018-Pecchioli%20pp%20281-292B.pdf}, VOLUME = {XXIII}, PUBLISHER = {Giuntina (Firenze, Italia)}, ISSN = {2282-4499}, JOURNAL = {Materia giudaica Print}, } @INPROCEEDINGS{DELGROSSO_2018_INPROCEEDINGS_DBGMN_390296, AUTHOR = {Del Grosso, A. M. and Bellandi, A. and Giovannetti, E. and Marchi, S. and Nahli, O.}, TITLE = {Scanning is Just the Beginning: Exploiting Text and Language Technologies to Enhance the Value of Historical Manuscripts}, YEAR = {2018}, ABSTRACT = {In this paper we present a digital process for the explicitation of the textual, linguistic and semantic content of historical manuscripts. The proposed workflow is composed of a sequence of incremental steps, each of which is described both on a methodological and practical perspective. The steps are: 1) visualization and structuring of metadata, 2) transcription, 3) structural encoding, 4) annotation, 5) lexical and conceptual structuring.}, KEYWORDS = {Computational Lexica, Digital Scholarly Editing, Digital Humanities, al-Qamus al-Muhit}, PAGES = {214-219}, URL = {https://publications.cnr.it/doc/390296}, DOI = {10.1109/CIST.2018.8596373}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {CIST 2018 WH-MNLP}, CONFERENCE_PLACE = {MARRAKECH, MOROCCO}, CONFERENCE_DATE = {21-27/10/2018}, BOOKTITLE = {Colloquium in Information Science and Technology, CIST}, EDITOR = {Al Achhab, M. and El Mohajir, M. and Jellouli, I. and El Mohajir, B. E.}, } @INPROCEEDINGS{DELGROSSO_2018_INPROCEEDINGS_DCDGMSS_384781, AUTHOR = {Del Grosso, A. M. and Cristofaro, S. and De Luca, M. R. and Giovannetti, E. and Marchi, S. and Seminara, G. and Spampinato, D.}, TITLE = {Le lettere di Bellini: dalla Carta al Web}, YEAR = {2018}, ABSTRACT = {Nel contesto del progetto "Museo virtuale della Musica BellinInRete" sarà reso fruibile, attraverso un processo di acquisizione, codifica e pubblicazione digitale, un corpus di lettere di Vincenzo Bellini, compositore catanese del XIX secolo. L'edizione digitale delle lettere belliniane sarà consultabile in rete e, inoltre, sarà integrata in un percorso museale interattivo in allestimento presso il Museo Civico Belliniano di Catania.}, KEYWORDS = {Digital Edition, Digital Scholarly Platform}, PAGES = {60-64}, URL = {http://www.aiucd2018.uniba.it/content/AIUCD2018-BoA.pdf}, DOI = {10.6092/unibo/amsacta/5997}, ISBN = {9788894253528}, CONFERENCE_NAME = {AIUCD 2018 Conference}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {31/01/2018-02/02/2018}, BOOKTITLE = {AIUCD 2018-Book of abstracts}, EDITOR = {Spampinato, D.}, } @MISC{DELGROSSO_2018_MISC_DM_484667, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Edizione digitale del Rotulo di San Teobaldo}, YEAR = {2018}, ABSTRACT = {Applicazione web per la consultazione dell'edizione digitale del Rotulo di San Teobaldo proprietà della Diocesi di Alba.}, KEYWORDS = {digital philology, web application, evt, enhanced visualization}, URL = {https://www.visitmudi.it/rotulo-di-san-teobaldo/}, } @MISC{DELGROSSO_2018_MISC_DM_484669, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Bellininrete Web Application}, YEAR = {2018}, ABSTRACT = {Applicazione web sviluppata in seno al progetto Bellininrete per lo studio e la consultazione della corrispondenza del maestro catanese Vincenzo Bellini.}, KEYWORDS = {Digital Edition, Digital Scholarly Platform, web application}, URL = {http://bellinicorrespondence.cnr.it/evt}, } @MISC{DELGROSSO_2018_MISC_DMA_390394, AUTHOR = {Del Grosso, A. M. and Marchi, S. and Albanesi, D.}, TITLE = {Omega Project: Omega: Piattaforma Multi-modulare per lo studio scientifico del testo}, YEAR = {2018}, ABSTRACT = {Piattaforma per lo studio del testo con prospettiva scientifico-filologica.}, KEYWORDS = {digital humanities, computational philology, software engineering}, URL = {https://github.com/literarycomputinglab/OmegaProject}, } @INPROCEEDINGS{DELGROSSO_2017_INPROCEEDINGS_DGM_377409, AUTHOR = {Del Grosso, A. M. and Giovannetti, E. and Marchi, S.}, TITLE = {Il modello a microkernel di Omega nello sviluppo di strumenti per lo studio dei testi: dagli ADT alle API}, YEAR = {2017}, KEYWORDS = {microkernel, studio del testo, Omega, ADT, API}, PAGES = {199-205}, URL = {https://publications.cnr.it/doc/377409}, ISBN = {978-88-942535-1-1}, CONFERENCE_NAME = {AIUCD 2017 Conference}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {24-28/01/2017}, } @INPROCEEDINGS{DELGROSSO_2017_INPROCEEDINGS_DGM_377413, AUTHOR = {Del Grosso, A. M. and Giovannetti, E. and Marchi, S.}, TITLE = {Thinking like the "Modern Operating Systems": The Omega architecture and the Clavius on the Web project}, YEAR = {2017}, ABSTRACT = {The current digital turn in studying and analyzing historical documents results in both having machine actionable cultural data and providing software able to process them. However, these data and services often lack in integration strategies among them in order to be reused in other contexts different from the original ones. As pointed out by Franz Fischer in a worthy of note article: "There is no out-of-the-box software available for creating truly critical and truly digital editions at the same time" [1]. Likewise, Monica Berti stated that is now important to "build a model for representing quotations and text reuses of lost works in a digital environment" [2]. In this vision Bridget Almas is in charge of developing an integrated platform for collaboratively transcribing, editing, and translating historical documents and texts. She claimed that through this platform, called Perseids, students and scholars are able to create open source digital scholarly editions [3]. A number of interesting projects are currently under development to realize general models, digital services, and online tools that can be adopted as part of a long-term infrastructure for managing digital editions. Among Perseids and others, we cite as reference systems (a) the Textual Community project led by P. Robinson and B. Bordalejo, (b) the AustESE project led by the Australian eResearch group, (c) the Tagore Online Variorum "Bichitra" project led by Sukanta Chaudhuri, (d) Homer Multitext led by Neel Smith and Christopher Blackwell, (e) Sharing Ancient Wisdoms founded by the HERA network.}, KEYWORDS = {Omega, object-oriented design, digital scholarly editing, clavius on the web}, URL = {https://publications.cnr.it/doc/377413}, CONFERENCE_NAME = {Global Philology Open Conference}, CONFERENCE_PLACE = {Leipzig}, CONFERENCE_DATE = {20-23/02/2017}, } @INPROCEEDINGS{PICCINI_2017_INPROCEEDINGS_PMG_378393, AUTHOR = {Piccini, S. and Marchi, S. and Giovannetti, E.}, TITLE = {Étudier le structuralisme par le structuralisme: expériences de sémantique distributionnelle dans la construction d'un lexique électronique de la terminologie saussurienne}, YEAR = {2017}, ABSTRACT = {En 2010-2011, le premier lexique électronique dédié à la terminologie linguistique saussurienne a été créé [1] dans le cadre d'un projet de recherche intitulé « Pour une édition numérique des manuscrits de Ferdinand de Saussure », projet coordonné par le Professeur Gambarara. La première étape de construction de la ressource lexicale a consisté en l'identification manuelle dans les textes des termes clés du vocabulaire saussurien et de leurs propriétés sémantiques. L'informatique n'est intervenue que dans la phase de formalisation des données extraites. C'est pourquoi nous nous proposons d'illustrer ici la possibilité de recourir à des techniques automatiques et, en particulier, à des algorithmes de sémantique distributionnelle [2] pour identifier les relations que les termes entretiennent entre eux dans le texte. La méthodologie sous-jacente est basée sur l'hypothèse distributionnelle selon laquelle plus deux mots sont sémantiquement proches, plus ils ont tendance à se produire dans des contextes similaires. Le lexique d'un texte est considéré comme un espace métrique où chaque mot peut être représenté comme un vecteur à n dimensions, chacune d'elles enregistrant le nombre de fois que ce mot apparaît dans un contexte donné. La proximité spatiale entre deux vecteurs indique la similarité sémantique entre deux mots. Elle est calculée par le cosinus de l'angle compris entre les deux vecteurs : plus la valeur du cosinus est grande, plus les termes sont, en principe, sémantiquement similaires. Les techniques computationnelles ont été appliquées aux mêmes textes à partir desquels le lexique électronique a été construit : le Cours de linguistique générale [3], les Écrits de linguistique générale [4] et le Recueil des publications scientifiques [5]. Bien qu'au stade préliminaire, l'expérience a permis d'obtenir des résultats intéressants. À titre d'exemple, nous présentons ci-dessous (Tableau) les valeurs de similitude obtenues par l'algorithme entre le terme signe et d'autres mots dans les textes. Si l'on compare les résultats avec l'entrée du lexique signe, on peut remarquer que l'algorithme est en mesure de détecter un grand nombre de liens explicités dans la ressource et de suggérer, en outre, des relations possibles avec d'autres termes comme valeur, rapport, idée. L'application de ces techniques computationnelles au corpus saussurien peut donc constituer une aide précieuse non seulement pour les lexicographes mais également pour les experts du domaine en faisant émerger des connections qui n'apparaissent pas immédiatement de manière explicite et en suggérant ainsi des parcours alternatifs d'analyse de la pensée de l'auteur.}, KEYWORDS = {structuralisme, sémantique distributionnelle, terminologie saussurienne, lexique électronique}, URL = {https://publications.cnr.it/doc/378393}, CONFERENCE_NAME = {Atelier "Les manuscrits de Saussure, parmi d'autres. Problèmes, stratégies et solutions d'édition pour les archives numériques"}, CONFERENCE_PLACE = {Geneve}, CONFERENCE_DATE = {09-14/01/2017}, } @MISC{DELGROSSO_2017_MISC_DM_390360, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Clavius on The Web search framework}, YEAR = {2017}, ABSTRACT = {Search engine and restful API developed within the Clavius On the Web project.}, KEYWORDS = {digital humanities, computational philology, software engineering, search engine}, URL = {https://github.com/literarycomputinglab/ClaviusSearch}, } @INPROCEEDINGS{BENOTTO_2016_INPROCEEDINGS_BGM_364952, AUTHOR = {Benotto, G. and Giovannetti, E. and Marchi, S.}, TITLE = {Investigating the Application of Distributional Semantics to Stylometry}, YEAR = {2016}, ABSTRACT = {The inclusion of semantic features in the stylometric analysis of literary texts appears to be poorly investigated. In this work, we experiment with the application of Distributional Semantics to a corpus of Italian literature to test if words distribution can convey stylistic cues. To verify our hypothesis, we have set up an Authorship Attribution experiment. Indeed, the results we have obtained suggest that the style of an author can reveal itself through words distribution too.}, KEYWORDS = {NLP for Digital Humanities, Stilometry, Distributional Semantic}, PAGES = {61-65}, URL = {https://drive.google.com/open?id=0B0sEp2O7Oo7feVJLdHI3YXBxdTg}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {9788899982089}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-6/12/2016}, BOOKTITLE = {Proceedings of Third Italian Conference on Computational Linguistics (CLiC-it 2016)}, } @INPROCEEDINGS{DELGROSSO_2016_INPROCEEDINGS_DBMG_360640, AUTHOR = {Del Grosso, A. M. and Boschetti, F. and Marchi, S. and Giovannetti, E.}, TITLE = {Vantaggi dell'Astrazione attraverso l'Approccio Orientato agli Oggetti per il Digital Scholarly Editing}, YEAR = {2016}, KEYWORDS = {Object Oriented Design, ADT, Digital Textual Scholarship}, URL = {http://www.himeros.eu/aiucd2016/c33.pdf}, DOI = {10.6092/unibo/amsacta/5559}, ISBN = {978-88-942535-0-4}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/09/2016}, } @INCOLLECTION{MARCHI_2015_INCOLLECTION_M_344710, AUTHOR = {Marchi, S.}, TITLE = {GREEK INTO ARABIC, A RESEARCH INFRASTRUCTURE BASED ON COMPUTATIONAL MODULES TO ANNOTATE AND QUERY HISTORICAL AND PHILOSOPHICAL DIGITAL TEXTS Part ii. System components and features}, YEAR = {2015}, ABSTRACT = {Computer technology nowadays allows users to build simple and effective tools designed to meet the needs of researchers and institutions in various fields of research. Since its reation, the World Wide Web prompted the existence of an environment that breaks down the boundaries of time (i.e. synchronous activity) and space (i.e. location of activities), a prerequisite for the design of tools enabling the collaboration among users. Over the past years text processing systems have become part and parcel of the daily language of scholars working in the field of Humanities, despite some objections raised against this type of technology because of their apparent lack of simplicity of usage, appropriateness, and flexibility. Usage requires special attention with respect to the interface between the information system and the user, while appropriateness and flexibility have not been sufficiently taken into account, not to mention that they two desiderata almost seem to be in contrast to each other. Therefore, it is not easy to plan and implement a text processing system which is suitable for specific types of research and at the same time as flexible as to operate in various fields of research.}, KEYWORDS = {textual scholarship, Collaborative Application, web application}, PAGES = {43-56}, URL = {http://www.olschki.it/libro/9788822263933}, VOLUME = {60}, PUBLISHER = {Leo S. Olschki (Firenze, ITA)}, ISBN = {9788822263933}, BOOKTITLE = {Digital texts, translations, lexicons in a multi-modular web application: methods and samples}, EDITOR = {Bozzi, A.}, } @INPROCEEDINGS{NAHLI_2015_INPROCEEDINGS_NM_342436, AUTHOR = {Nahli, O. and Marchi, S.}, TITLE = {Improved Written Arabic Word Parsing through Orthographic, Syntactic and Semantic constraints}, YEAR = {2015}, ABSTRACT = {The script-based and morphological characteristics of the Arabic language increase considerably the number of alternative analyses output by any morphological parser that does not use orthographic, syntactic and semantic constraints. In order to reduce time-wasting and error-prone proliferation of multiple outputs to be filtered in a post-processing phase, we have tried to optimize word processing by providing the morphological parser with multiple levels of information. We have operated at three such levels: orthography, morpho-syntax and semantics.}, KEYWORDS = {Arabic Language, Arabic NLP, Orthography, Morpho-syntax, Semantics}, PAGES = {210-214}, URL = {http://www.aaccademia.it/elenco-libri?aaref=CLIC_2015}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {9788899200626}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 Dicembre 2015}, } @MISC{DANCONA_2015_MISC_DBNFCBDM_390659, AUTHOR = {D'Ancona, C. and Bozzi, A. and Nahli, O. and Farina, M. and Coda, E. and Boschetti, F. and Del Grosso, A. M. and Marchi, S.}, TITLE = {Banca dati testuale Greek into Arabic}, YEAR = {2015}, ABSTRACT = {Banca dati testuale con la codifica XML della pericopatura dei testi Greco-Arabo di alcuni trattati delle Enneadi di Plotino.}, KEYWORDS = {Digital Humanities, Computational Philology, Greek into Arabic, http: //g2a. ilc. cnr. it}, URL = {http://g2a.ilc.cnr.it/}, } @MISC{MARCHI_2015_MISC_MD_390657, AUTHOR = {Marchi, S. and Del Grosso, A. M.}, TITLE = {Greek into Arabic philological Web platform}, YEAR = {2015}, ABSTRACT = {Piattaforma filologico-computazionale sviluppata nell'ambito del progetto ERC 2009 Advanced Grant n. 249431. Titolo: Greek into Arabic. Philosophical concepts and linguistic bridges.}, KEYWORDS = {computational philology, digital humanities, ERC, Greek into Arabic}, URL = {http://g2a.ilc.cnr.it/}, } @INPROCEEDINGS{BELLUSCI_2014_INPROCEEDINGS_BBBCGM_311735, AUTHOR = {Bellusci, A. and Bellandi, A. and Benotto, G. and Cappelli, A. and Giovannetti, E. and Marchi, S.}, TITLE = {Towards a Decision Support System for Text Interpretation}, YEAR = {2014}, ABSTRACT = {This article illustrates the first steps towards the implementation of a Decision Support System aimed to recreate a research environment for scholars and provide them with computational tools to assist in the processing and interpretation of texts. While outlining the general characteristics of the system, the paper presents a minimal set of user requirements and provides a possible use case on Dante's Inferno.}, KEYWORDS = {DDS, XML, text interpretation, literary computing}, PAGES = {58-62}, URL = {http://clic.humnet.unipi.it/proceedings/vol1/CLICIT2014112.pdf}, VOLUME = {I}, DOI = {10.12871/CLICIT2014118}, ISBN = {9788867414727}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics (CLiC-it 2014)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 dicembre 2014}, BOOKTITLE = {Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{DELGROSSO_2014_INPROCEEDINGS_DMMP_288069, AUTHOR = {Del Grosso, A. M. and Marchi, S. and Murano, F. and Pesini, L.}, TITLE = {A collaborative tool for philological research: experiments on Ferdinand de Saussure's manuscripts}, YEAR = {2014}, ABSTRACT = {The present paper describes a philological-computational tool developed by the Istituto di Linguistica Computazionale (ilc - cnr) of Pisa, aimed at creating a digital edition of Ferdinand de Saussure's unpublished manuscripts. Since the use of a digital edition and of the most modern computer technology allows a more in-depth research, the ilc is developing a set of digital tools in order to take ad- vantage of both the documents and the related information added by the scientific community. The integration exploits the Java enterprise platform by organizing the different features in modules. Thus, the tool meets the following requirements: (i) converting legacy digital resources into valid Xml documents (tei compliant); (ii) parallel visualization among imported texts and related images; (iii) search and in- dexing; (iv) handling of variant readings; and (v) collaborative annotation.}, KEYWORDS = {Computational and collaborative philology}, PAGES = {163-175}, URL = {https://publications.cnr.it/doc/288069}, PUBLISHER = {CLEUP (Padova, ITA)}, ISBN = {978-88-6787-260-2}, CONFERENCE_NAME = {Aiucd}, CONFERENCE_PLACE = {Padova}, CONFERENCE_DATE = {11-12 december 2013}, BOOKTITLE = {Collaborative Research Practices and Shared Infrastructures for Humanities Computing}, EDITOR = {Agosti, M. and Tomasi, F.}, } @INPROCEEDINGS{BOZZI_2014_INPROCEEDINGS_BM_316572, AUTHOR = {Bozzi, A. and Marchi, S.}, TITLE = {"Greek into Arabic Web App" as a Research Infrastructure for the History of Philosophy and Science}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/316572}, CONFERENCE_NAME = {Plotinus, East and West-The Enneads in Arabic and Latin}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {November 3-6, 2014}, } @ARTICLE{MARCHI_2013_ARTICLE_M_288182, AUTHOR = {Marchi, S.}, TITLE = {G2A: a Web application to study, annotate and scholarly edit ancient texts and their aligned translations. Part II. Towards a user manual}, YEAR = {2013}, ABSTRACT = {The present contribution describes the current operating procedures of the G2A application. The system is still under development at the Institute of Computational Linguistics (ILC/CNR, Area della ricerca di Pisa); therefore, all the functions described in this provisional user manual should be considered as an exemplification of the general model described by Andrea Bozzi in Part I.}, KEYWORDS = {Literary computing, XML, Web Application, Java, exist-db}, PAGES = {173-183}, URL = {https://learningroads.cfs.unipi.it/wp-content/uploads/2015/10/MARCHISGA3-2013.pdf}, VOLUME = {3}, PUBLISHER = {CNR, Istituto di Linguistica Computazionale (Pisa, Italia)}, ISSN = {2239-012X}, JOURNAL = {Studia graeco-arabica}, } @INCOLLECTION{DELGROSSO_2013_INCOLLECTION_DM_288051, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Una Applicazione Web per la Filologia Computazionale. Un esperimento su alcuni scritti autografi di Ferdinand de Saussure}, YEAR = {2013}, ABSTRACT = {Nel progetto PRIN "Per un'edizione digitale dei manoscritti di Ferdinand de Saussure", il cui scopo era approntare una serie di strumenti utili all'edizione di- gitale degli autografi saussuriani, il gruppo di ricerca e sviluppo di filologia com- putazionale guidato da Andrea Bozzi e presente presso l'Istituto di Linguistica Computazionale "Antonio Zampolli" (ILC-CNR, Pisa) ha individuato i requisiti ed implementato le funzionalita? rispondenti alle esigenze dichiarate degli studiosi di questi materiali.}, KEYWORDS = {filologia computazionale, piattaforma web}, PAGES = {131-157}, URL = {https://publications.cnr.it/doc/288051}, PUBLISHER = {Edizioni dell'Orso (Alessandria, ITA)}, ISBN = {978-88-6274-478-2}, BOOKTITLE = {Guida per un'edizione digitale dei manoscritti di Ferdinand de Saussure}, EDITOR = {Gambarara, D. and Marchese, M. P.}, } @INCOLLECTION{DELLORLETTA_2013_INCOLLECTION_DMMVAF_266373, AUTHOR = {Dell'Orletta, F. and Marchi, S. and Montemagni, S. and Venturi, G. and Agnoloni, T. and Francesconi, E.}, TITLE = {Domain Adaptation for Dependency Parsing at EVALITA 2011}, YEAR = {2013}, ABSTRACT = {The domain adaptation task was aimed at investigating techniques for adapting state-of-the-art dependency parsing systems to new domains. Both the language dealt with, i.e. Italian, and the target do- main, namely the legal domain, represent two main novelties of the task organised at Evalita 2011 with respect to previous domain adaptation ini- tiatives. In this paper, we define the task and describe how the datasets were created from different resources. In addition, we characterize the different approaches of the participating systems, report the test results, and provide a first analysis of these results.}, KEYWORDS = {Dependency Parsing, Domain Adaptation, Self-training, Active Learning, Legal-NLP}, PAGES = {58-69}, URL = {https://publications.cnr.it/doc/266373}, VOLUME = {7689}, PUBLISHER = {Springer (Berlin Heidelberg, DEU)}, ISBN = {978-3-642-35827-2}, BOOKTITLE = {Evaluation of NLP and Speech Tools for Italian}, EDITOR = {Magnini, B. and Cutugno, F. and Falcone, M. and Pianta, E.}, } @INPROCEEDINGS{DELGROSSO_2013_INPROCEEDINGS_DMPM_288072, AUTHOR = {Del Grosso, A. M. and Murano, F. and Pesini, L. and Marchi, S.}, TITLE = {A Web tool for philological research. An experiment on some Saussurean writings}, YEAR = {2013}, ABSTRACT = {The work describes a philological-computational tool developed by the Istituto di Linguistica Computazionale, CNR, Pisa to create a digital edition of Ferdinand de Saussure's unpublished manuscripts. Since the use of a digital edition and of the most modern computer technology allow a more in-depth research, the ILC is developing a set of digital tools to facilitate the research and to take advantage of both the documents and the related information by the scientific community.}, KEYWORDS = {digital philology, digital humaniteis, software enginnering}, URL = {https://publications.cnr.it/doc/288072}, CONFERENCE_NAME = {AIUCD2013}, CONFERENCE_PLACE = {Padua, Italy}, CONFERENCE_DATE = {11-12 December 2013}, } @ARTICLE{MARINELLI_2012_ARTICLE_MBMCCS_217369, AUTHOR = {Marinelli, R. and Bindi, R. and Marchi, S. and Castellani, E. and Carli, G. and Santarcangelo, E. L.}, TITLE = {Hypnotizability-related differences in written language}, YEAR = {2012}, ABSTRACT = {The study was aimed at analyzing the written production of subjects with high (Highs) and low (Lows) hypnotizability scores. The participants were asked to write short texts following highly imaginative titles in standard conditions. The texts were processed through computerized and manual methods. The results showed that the Highs' texts are more sophisticated owing to a higher number of abstract nouns, more intense and imaginative owing to a larger number of similes, metaphors and onomatopoeias, less detailed owing to a higher nouns-to adjectives ratio. The differences in the use of abstract nouns and highly imageable expressions are discussed in relation to the pre-eminent left hemisphere activity of highs during wakefulness and to a possibly different involvement of the precuneus which is involved in hypnotic phenomena.}, KEYWORDS = {Written language, text analysis, hypnosis, hypnotizability}, PAGES = {54-66}, URL = {https://publications.cnr.it/doc/217369}, VOLUME = {1}, DOI = {10.1080/00207144.2011.622196}, PUBLISHER = {Taylor \& Francis (Philadelphia, Stati Uniti d'America)}, ISSN = {0020-7144}, JOURNAL = {International journal of clinical and experimental hypnosis}, } @INCOLLECTION{MARINELLI_2012_INCOLLECTION_MBMSCCC_136479, AUTHOR = {Marinelli, R. and Bindi, R. and Marchi, S. and Santarcangelo, E. L. and Cavallaro, F. and Castellani, E. and Carli, G.}, TITLE = {Suscettibilità ipnotica e linguaggio}, YEAR = {2012}, ABSTRACT = {-}, KEYWORDS = {ipnosi, linguaggio, nlp}, URL = {https://publications.cnr.it/doc/136479}, PUBLISHER = {Bulzoni (Roma, ITA)}, } @INPROCEEDINGS{DELLORLETTA_2012_INPROCEEDINGS_DMMPV_219489, AUTHOR = {Dell'Orletta, F. and Marchi, S. and Montemagni, S. and Plank, B. and Venturi, G.}, TITLE = {The SPLeT-2012 Shared Task on Dependency Parsing of Legal Texts}, YEAR = {2012}, ABSTRACT = {The 4th Workshop on "Semantic Processing of Legal Texts" (SPLeT-2012) presents the first multilingual shared task on Dependency Parsing of Legal Texts. In this paper, we define the general task and its internal organization into sub-tasks, describe the datasets and the domain-specific linguistic peculiarities characterizing them. We finally report the results achieved by the participating systems, describe the underlying approaches and provide a first analysis of the final test results.}, KEYWORDS = {Dependency Parsing, Domain Adaptation, Legal Text Processing}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/workshops/27.LREC%202012%20Workshop%20Proceedings%20SPLeT.pdf}, CONFERENCE_NAME = {Fourth Workshop on Semantic Processing of Legal Texts (SPLeT 2012)-First Shared Task on Dependency Parsing of Legal Texts (SPLeT 2012)}, CONFERENCE_PLACE = {Istanbul}, CONFERENCE_DATE = {27 Maggio 2012}, } @INPROCEEDINGS{DELLORLETTA_2012_INPROCEEDINGS_DMMVAF_219483, AUTHOR = {Dell'Orletta, F. and Marchi, S. and Montemagni, S. and Venturi, G. and Agnoloni, T. and Francesconi, E.}, TITLE = {Domain Adaptation for Dependency Parsing at Evalita 2011}, YEAR = {2012}, ABSTRACT = {The domain adaptation task was aimed at investigating techniques for adapting state-of-the-art dependency parsing systems to new domains. Both the language dealt with, i.e. Italian, and the target domain, namely the legal domain, represent two main novelties of the task organised at Evalita 2011. In this paper, we define the task and describe how the datasets were created from different resources. In addition, we characterize the different approaches of the participating systems, report the test results, and provide a first analysis of these results.}, KEYWORDS = {Dependency Parsing, Domain Adaptation, Legal Text Processing}, PAGES = {1-7}, URL = {http://www.evalita.it/sites/evalita.fbk.eu/files/working_notes2011/Domain_Adaptation/}, CONFERENCE_NAME = {Evaluation of NLP and Speech Tools for Italian (EVALITA 2011): Domain Adaptation track}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {24-25 Gennaio 2012}, } @TECHREPORT{BOZZI_2012_TECHREPORT_BGBNMPRD_390781, AUTHOR = {Bozzi, A. and Giovannetti, E. and Boschetti, F. and Nahli, O. and Marchi, S. and Piccini, S. and Ruimy, N. and Del Grosso, A. M.}, TITLE = {Greek into Arabic: contents, technologies and (humanistic and scientific) applications of a new software}, YEAR = {2012}, ABSTRACT = {This contribution aims to describe the methodological approach to Digital Philology by means of the G\&A Web Application. It also shows running examples for the: 1) Visualization and ordering of parallel texts subdivided in pericopes; 2) Linguistic annotations; 3) Scholarly comments; and 4)Search functions}, KEYWORDS = {digital philology, computational philology, software engineering, Greek into Arabic, Computational linguistics}, URL = {https://publications.cnr.it/doc/390781}, } @TECHREPORT{DELGROSSO_2012_TECHREPORT_DM_391001, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Il trattamento digitale dei manoscritti di F. de Saussure}, YEAR = {2012}, ABSTRACT = {Il contributo presenta i risultati del progetto PRIN2008 Per un'edizione digitale dei manoscritti di Ferdinand de Saussure", finanziato dal Ministero dell'Istruzione, dell'Università e della Ricerca italiano per il biennio 2009-2011.}, KEYWORDS = {prin, saussure, digital humanities, computational philology, digital philology}, URL = {https://publications.cnr.it/doc/391001}, } @MISC{DELGROSSO_2012_MISC_DMMP_390653, AUTHOR = {Del Grosso, A. M. and Marchi, S. and Murano, F. and Pesini, L.}, TITLE = {Banca dati testuale Codifica Théorie des sonantes}, YEAR = {2012}, ABSTRACT = {Banca dati testuale XML della Théorie des sonantes edito dalla Marchese nel 2002.}, KEYWORDS = {PRIN, Saussure, Digital philology, Computational philology, Digital Humanities}, URL = {http://licodemo.ilc.cnr.it:8080/Saussure_Wapp/controlPanelView.xhtml}, } @ARTICLE{THOMPSON_2011_ARTICLE_TMMCDLMMPQRSVRA_205232, AUTHOR = {Thompson, P. and McNaught, J. and Montemagni, S. and Calzolari, N. and Del Gratta, R. and Lee, V. and Marchi, S. and Monachini, M. and Pezik, P. and Quochi, V. and Rupp, C. and Sasaki, Y. and Venturi, G. and Rebholz Schuhmann, D. and Ananiadou, S.}, TITLE = {The BioLexicon: a large-scale terminological resource for biomedical text mining}, YEAR = {2011}, ABSTRACT = {Background Due to the rapidly expanding body of biomedical literature, biologists require increasingly sophisticated and efficient systems to help them to search for relevant information. Such systems should account for the multiple written variants used to represent biomedical concepts, and allow the user to search for specific pieces of knowledge (or events) involving these concepts, e.g., protein-protein interactions. Such functionality requires access to detailed information about words used in the biomedical literature. Existing databases and ontologies often have a specific focus and are oriented towards human use. Consequently, biological knowledge is dispersed amongst many resources, which often do not attempt to account for the large and frequently changing set of variants that appear in the literature. Additionally, such resources typically do not provide information about how terms relate to each other in texts to describe events. Results This article provides an overview of the design, construction and evaluation of a large-scale lexical and conceptual resource for the biomedical domain, the BioLexicon. The resource can be exploited by text mining tools at several levels, e.g., part-of-speech tagging, recognition of biomedical entities, and the extraction of events in which they are involved. As such, the BioLexicon must account for real usage of words in biomedical texts. In particular, the BioLexicon gathers together different types of terms from several existing data resources into a single, unified repository, and augments them with new term variants automatically extracted from biomedical literature. Extraction of events is facilitated through the inclusion of biologically pertinent verbs (around which events are typically organized) together with information about typical patterns of grammatical and semantic behaviour, which are acquired from domain-specific texts. In order to foster interoperability, the BioLexicon is modelled using the Lexical Markup Framework, an ISO standard. Conclusions The BioLexicon contains over 2.2 M lexical entries and over 1.8 M terminological variants, as well as over 3.3 M semantic relations, including over 2 M synonymy relations. Its exploitation can benefit both application developers and users. We demonstrate some such benefits by describing integration of the resource into a number of different tools, and evaluating improvements in performance that this can bring.}, KEYWORDS = {Text Mining, Information Extraction, Computational Lexicon}, PAGES = {1-29}, URL = {http://www.biomedcentral.com/1471-2105/12/397}, VOLUME = {12}, DOI = {10.1186/1471-2105-12-397}, PUBLISHER = {BioMed Central ([London], Regno Unito)}, ISSN = {1471-2105}, JOURNAL = {BMC bioinformatics}, } @INPROCEEDINGS{GIOVANNETTI_2011_INPROCEEDINGS_GM_282633, AUTHOR = {Giovannetti, E. and Marchi, S.}, TITLE = {Cross-Language Boosting in Pattern-based Semantic Relation Extraction from Text}, YEAR = {2011}, ABSTRACT = {In this work we propose a novel technique called "Cross-Language Boosting" (C-LB), aimed at increasing the accuracy of pattern-based semantic relation extraction systems: given a pair of terms expressed in a "Target Language" (e.g. in Italian), we can translate the terms in a "Support Language" (e.g. in English) and apply the translated term pair to reliable lexico-syntactic patterns expressed in that language to increase the accuracy of the system. Experiments have been conducted by comparing the results obtained by the SemRelEx system, a hybrid unsupervised system for semantic relation extraction from texts, with and without the support of the C-LB technique, applied to a set of candidate semantically related term pairs automatically extracted from a corpus in the History of Art domain.}, KEYWORDS = {Computational Linguistics, Cross Language, semantic relation extraction systems, Ontology Learning from Text}, PAGES = {29-36}, URL = {https://web.archive.org/web/20121101020859/http://www.proceedings2011.cla-conf.info/}, ISBN = {9788360810477}, CONFERENCE_NAME = {Computational Linguistics Application Conference-CLA 2011}, CONFERENCE_PLACE = {Jachranka, Poland}, CONFERENCE_DATE = {17-19 ottobre 2011}, BOOKTITLE = {Proceedings of the Computational Linguistics-Applications Conference}, EDITOR = {Jassem, K. and Fuglewicz, P. and Piasecki, M. and Przepiorkowski, A.}, } @TECHREPORT{MARZI_2010_TECHREPORT_MM_157480, AUTHOR = {Marzi, C. and Marchi, S.}, TITLE = {Procedura Web per la generazione automatica dei bandi di concorso per Assegno di Ricerca in formato pdf}, YEAR = {2010}, ABSTRACT = {Lo sviluppo di una procedura web per la generazione automatica e gestione dei testi dei bandi di concorso per il conferimento di Assegni di Ricerca risponde alla necessità di uniformare i testi dei bandi di concorso al Disciplinare dell'Ente in continuo aggiornamento. Ogni modifica apportata al regolamento viene immediatamente recepita e convertita in modifica al modello automatico di bando. La procedura "Bandi" consente, inoltre, ad ogni Gruppo di ricerca, Laboratorio, o Commessa, o anche singolo Ricercatore, di avviare la richiesta per un Assegno di Ricerca generando autonomamente una bozza di testo in formato pdf, da sottoporre agli utenti Validatori per approvazione, correzione e/o integrazione, e la conseguente generazione e stampa del testo definitivo in formato pdf.}, KEYWORDS = {Tool, Procedura web creazione bandi}, URL = {http://bandi.ilc.cnr.it/form/login.php}, } @TECHREPORT{PIRRELLI_2010_TECHREPORT_PLMDGM_367784, AUTHOR = {Pirrelli, V. and Lenci, A. and Montemagni, S. and Dell'Orletta, F. and Giovannetti, E. and Marchi, S.}, TITLE = {ConnectToLife (modulo semantico)-Rapporto tecnico finale}, YEAR = {2010}, ABSTRACT = {Il presente documento costituisce il rapporto tecnico finale del progetto Connect-To-Life (modulo semantico) relativo alle attività svolte dall'unità ILC-CNR.}, KEYWORDS = {annotazione linguistica, estrazione di termini, clustering semantico, trattamento automatico della lingua, costruzione di ontologie}, PAGES = {16}, URL = {https://publications.cnr.it/doc/367784}, } @MISC{MARZI_2010_MISC_MM_176395, AUTHOR = {Marzi, C. and Marchi, S.}, TITLE = {Procedura Web per la generazione automatica dei bandi di concorso per Assegno di Ricerca}, YEAR = {2010}, ABSTRACT = {Lo sviluppo di una procedura web per la generazione automatica e gestione dei testi dei bandi di concorso per il conferimento di Assegni di Ricerca risponde alla necessità di uniformare i testi dei bandi di concorso al Disciplinare dell'Ente in continuo aggiornamento. Ogni modifica apportata al regolamento viene immediatamente recepita e convertita in modifica al modello automatico di bando. La procedura "Bandi" consente, inoltre, ad ogni Gruppo di ricerca, Laboratorio, o Commessa, o anche singolo Ricercatore, di avviare la richiesta per un Assegno di Ricerca generando autonomamente una bozza di testo in formato pdf, da sottoporre agli utenti Validatori per approvazione, correzione e/o integrazione, e la conseguente generazione e stampa del testo definitivo in formato pdf.}, KEYWORDS = {Tool, Procedura web creazione bandi}, URL = {http://bandi.ilc.cnr.it/form/login.php}, } @INCOLLECTION{DELLORLETTA_2009_INCOLLECTION_DLMMP_184585, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: una piattaforma linguistico-computazionale per l'estrazione di conoscenza da testi}, YEAR = {2009}, ABSTRACT = {The paper describes the automatic extraction of domain knowledge from Italian document collections and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Term extraction, Ontology Learning}, PAGES = {285-300}, URL = {https://publications.cnr.it/doc/184585}, PUBLISHER = {Bulzoni (Roma, ITA)}, ISBN = {978-88-7870-469-5}, EDITOR = {Ferrari, G. and Benatti, R. and Mosca, M.}, } @INPROCEEDINGS{VENTURI_2009_INPROCEEDINGS_VMMSTMA_84736, AUTHOR = {Venturi, G. and Montemagni, S. and Marchi, S. and Sasaki, Y. and Thompson, P. and McNaught, J. and Ananiadou, S.}, TITLE = {Bootstrapping a Verb Lexicon for Biomedical Information Extraction}, YEAR = {2009}, ABSTRACT = {The extraction of information from texts requires resources that contain both syntactic and semantic properties of lexical units. As the use of language in specialized domains, such as biology, can be very different to the general domain, there is a need for domain-specific resources to ensure that the information extracted is as accurate as possible. We are building a large-scale lexical resource for the biology domain, providing information about predicate-argument structure that has been bootstrapped from a biomedical corpus on the subject of E. Coli. The lexicon is currently focussed on verbs, and includes both automatically-extracted syntactic subcategorization frames, as well as semantic event frames that are based on annotation by domain experts. In addition, the lexicon contains manually-added explicit links between semantic and syntactic slots in corresponding frames. To our knowledge, this lexicon currently represents a unique resource within in the biomedical domain.}, KEYWORDS = {domain-specific lexical resources, Biological Language Processing, syntax-semantic linking}, PAGES = {137-148}, URL = {https://publications.cnr.it/doc/84736}, DOI = {10.1007/978-3-642-00382-0_11}, PUBLISHER = {Springer-Verlag (Berlin Heidelberg, DEU)}, ISBN = {9783642003813}, CONFERENCE_NAME = {10th International Conference on Intelligent Text Processing and Computational Linguistics}, CONFERENCE_PLACE = {Mexico City, Mexico}, CONFERENCE_DATE = {1-7/03/2009}, } @INPROCEEDINGS{SPINOSA_2009_INPROCEEDINGS_SGCMVM_130118, AUTHOR = {Spinosa, P. and Giardiello, G. and Cherubini, M. and Marchi, S. and Venturi, G. and Montemagni, S.}, TITLE = {NLP–based Metadata Extraction for Legal Text Consolidation}, YEAR = {2009}, KEYWORDS = {Natural Language Processing, textual amendments, XML representation, metadata extraction, consolidation of legal text}, URL = {https://publications.cnr.it/doc/130118}, CONFERENCE_NAME = {Twelfth International Conference on Artificial Intelligence and Law (ICAIL 2009)}, CONFERENCE_PLACE = {Barcelona}, CONFERENCE_DATE = {June 8-12, 2009}, } @INPROCEEDINGS{VENTURI_2009_INPROCEEDINGS_VMMSTMA_112956, AUTHOR = {Venturi, G. and Montemagni, S. and Marchi, S. and Sasaki, Y. and Thompson, P. and McNaught, J. and Ananiadou, S.}, TITLE = {Bootstrapping a Verb Lexicon for Biomedical Information Extraction}, YEAR = {2009}, ABSTRACT = {The extraction of information from texts requires resources that contain both syntactic and semantic properties of lexical units. As the use Of language in specialized domains, such as biology, can be very different to the general domain, there is a need for domain-specific resources to ensure that the information extracted is as accurate as possible. We are building a large-scale lexical resource for the biology domain. providing information about predicate-argument structure that has been bootstrapped from a biomedical corpus on the subject of E. Coli. The lexicon is currently focussed on verbs, and includes both automatically-extracted syntactic subcategorization frames, as well as semantic event frames that are based on annotation by domain experts. In addition, the lexicon contains manually-added explicit links between semantic and syntactic slots in corresponding frames. To Our knowledge, this lexicon currently represents a unique resource within in the biomedical domain.}, KEYWORDS = {domain-specific lexical resources, lexical acquisition, syntax-semantics linking, Information Extraction, Biological Language Processing}, PAGES = {137-148}, URL = {https://publications.cnr.it/doc/112956}, VOLUME = {5449}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, ISBN = {978-3-642-00381-3}, CONFERENCE_NAME = {International Conference on Intelligent Text Processing and Computational Linguistics (CICLing 2009)}, CONFERENCE_PLACE = {Mexico City, Mexico}, CONFERENCE_DATE = {March 1-7, 2009}, BOOKTITLE = {Proceedings of the 10th International Conference on Intelligent Text Processing and Computational Linguistics (CICLing 2009)}, EDITOR = {Gelbukh, A.}, } @ARTICLE{DELLORLETTA_2008_ARTICLE_DLMMPV_64541, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio}, YEAR = {2008}, ABSTRACT = {The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Natural Language Processing, Machine Learning, Knowledge extraction from texts, Ontology learning, Legal ontologies}, PAGES = {197-218}, URL = {https://publications.cnr.it/doc/64541}, VOLUME = {26}, PUBLISHER = {Aida (Roma, Italia)}, ISSN = {1594-2201}, JOURNAL = {Aida Informazioni (Online)}, } @INPROCEEDINGS{DELLORLETTA_2008_INPROCEEDINGS_DLMMPV_84707, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio}, YEAR = {2008}, ABSTRACT = {The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Natural Language Processing, Machine Learning, Knowledge extraction from texts, Ontology learning, Legal ontologies}, PAGES = {197-218}, URL = {http://www.assiterm91.it/wp-content/uploads/2010/11/Convegno-2008.pdf}, VOLUME = {Anno 26, numero 1-2}, PUBLISHER = {Aida (Roma, Italia)}, ISSN = {1594-2201}, CONFERENCE_NAME = {Atti del Convegno Nazionale Ass. I. Term}, CONFERENCE_PLACE = {Arcavacata di Rende (CS)}, CONFERENCE_DATE = {5-7/06/2008}, BOOKTITLE = {Terminologia analisi testuale e documentazione nella città digitale}, } @INPROCEEDINGS{DELLORLETTA_2008_INPROCEEDINGS_DLMMPV_84698, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Marchi, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Acquiring Legal Ontologies from Domain-specific Texts}, YEAR = {2008}, ABSTRACT = {The paper reports on methodology and preliminary results ofa case study in automatically extracting ontological knowledgefrom Italian legislative texts in the environmental domain. Weuse a fully-implemented ontology learning system (T2K) thatincludes a battery of tools for Natural Language Processing(NLP), statistical text analysis and machine language learn-ing. Tools are dynamically integrated to provide an incremen-tal representation of the content of vast repositories of unstruc-tured documents. Evaluated results, however preliminary, arevery encouraging, showing the great potential of NLP-poweredincremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.}, KEYWORDS = {Ontology learning, Document management, knowledge extraction from texts, Natural Language Processing}, PAGES = {98-101}, URL = {https://publications.cnr.it/doc/84698}, CONFERENCE_NAME = {LangTech 2008}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {28-29/02/2008}, } @INPROCEEDINGS{GIOVANNETTI_2008_INPROCEEDINGS_GMM_84706, AUTHOR = {Giovannetti, E. and Marchi, S. and Montemagni, S.}, TITLE = {Combining statistical techniques and lexico-syntactic patterns for semantic relations extraction from text}, YEAR = {2008}, ABSTRACT = {We describe here a methodology to combine two different techniques for Semantic Relation Extraction from texts. On the one hand, generic lexicosyntactic patterns are applied to the linguistically analyzed corpus to detect a first set of pairs of co-occurring words, possibly involved in "syntagmatic" relations. On the other hand, a statistical unsupervised association system is used to obtain a second set of pairs of "distributionally similar" terms, that appear to occur in similar contexts, thus possibly involved in "paradigmatic" relations. The approach aims at learning ontological information by filtering the candidate relations obtained through generic lexico-syntactic patterns and by labelling the anonymous relations obtained through the statistical system. The resulting set of relations can be used to enrich existing ontologies and for semantic annotation of documents or web pages.}, KEYWORDS = {Ontology Learning from Text, Semantic Relation Extraction, Lexico-syntactic Patterns, Distributional Similarity}, URL = {http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_54.pdf}, CONFERENCE_NAME = {SWAP 2008-Semantic Web Applications and Perspectives}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {15-17 December 2008}, EDITOR = {Gangemi, A. and Keizer, J. and Presutti, V. and Stoermer, H.}, } @INPROCEEDINGS{GIOVANNETTI_2008_INPROCEEDINGS_GMMB_84726, AUTHOR = {Giovannetti, E. and Marchi, S. and Montemagni, S. and Bartolini, R.}, TITLE = {Ontology Learning and Semantic Annotation: a Necessary Symbiosis}, YEAR = {2008}, ABSTRACT = {Semantic annotation of text requires the dynamic merging of linguistically structured information and a "world model", usually represented as a domain-specific ontology. On the other hand, the process of engineering a domain-ontology through semi-automatic ontology learning system requires the availability of a considerable amount of semantically annotated documents. Facing this bootstrapping paradox requires an incremental process of annotation-acquisition-annotation, whereby domain-specific knowledge is acquired from linguistically-annotated texts and then projected back onto texts for extra linguistic information to be annotated and further knowledge layers to be extracted. The presented methodology is a first step in the direction of a full "virtuous" circle where the semantic annotation platform and the evolving ontology interact in symbiosis. As a case study we have chosen the semantic annotation of product catalogues. We propose a hybrid approach, combining pattern matching techniques to exploit the regular structure of product descriptions in catalogues, and Natural Language Processing techniques which are resorted to analyze natural language descriptions. The semantic annotation involves the access to the ontology, semi-automatically bootstrapped with an ontology learning tool from annotated collections of catalogues.}, KEYWORDS = {Information Extraction, Information Retrieval, Ontologies, Tools, Systems}, PAGES = {2079-2085}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {2008}, BOOKTITLE = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Tapias, D.}, } @INPROCEEDINGS{MARINELLI_2008_INPROCEEDINGS_MBMSCCC_112941, AUTHOR = {Marinelli, R. and Bindi, R. and Marchi, S. and Santarcangelo, E. L. and Cavallaro, F. I. and Castellani, E. and Carli, G.}, TITLE = {Suscettibilità ipnotica e linguaggio}, YEAR = {2008}, ABSTRACT = {--}, KEYWORDS = {Psycholinguistics}, PAGES = {10}, URL = {https://publications.cnr.it/doc/112941}, PUBLISHER = {Bulzoni (Roma, ITA)}, ISBN = {978-88-7870-652-1}, CONFERENCE_NAME = {XLII Congresso Internazionale di Studi della Società di Linguistica Italiana}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {25-27/09/2008}, } @INPROCEEDINGS{CARLI_2008_INPROCEEDINGS_CMBMCCS_112938, AUTHOR = {Carli, G. and Marinelli, R. and Bindi, R. and Marchi, S. and Cavallaro, F. I. and Castellani, E. and Santarcangelo, E. L.}, TITLE = {Language modulation by hypnotizability}, YEAR = {2008}, KEYWORDS = {Psychotherapy, Neurorehabilitation}, URL = {https://publications.cnr.it/doc/112938}, CONFERENCE_NAME = {59° Congresso Nazionale della Società Italiana di Fisiologia}, CONFERENCE_PLACE = {Villasimius (CA)}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{DELLORLETTA_2007_INPROCEEDINGS_DLMMP_84687, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: una piattaforma linguistico-computazionale per l'estrazione di conoscenza da testi}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84687}, CONFERENCE_NAME = {XL Congresso Internazionale di Studi della Società di Linguistica Italiana (SLI 2006)}, CONFERENCE_PLACE = {Roma}, } @INPROCEEDINGS{GIOVANNETTI_2007_INPROCEEDINGS_GMMB_84690, AUTHOR = {Giovannetti, E. and Marchi, S. and Montemagni, S. and Bartolini, R.}, TITLE = {Ontology-based Semantic Annotation of Product Catalogues}, YEAR = {2007}, ABSTRACT = {This paper describes a methodology for the semantic annotation of product catalogues. We propose a hybrid approach, combining pattern matching techniques to exploit the regular structure of product descriptions in catalogues, and Natural Language Processing techniques which are resorted to analyze natural language descriptions. It also includes the access to an application ontology, semi-automatically bootstrapped from collections of catalogues with an ontology learning tool, which is used to drive the semantic annotation process.}, KEYWORDS = {Semantic Annotation of texts, Ontology Learning, Information Extraction for e-commerce}, PAGES = {235-239}, URL = {https://publications.cnr.it/doc/84690}, CONFERENCE_NAME = {Recent Advances in Natural Language Processing (RANLP-2007)}, CONFERENCE_PLACE = {Borovets}, CONFERENCE_DATE = {27-29 settembre 2007}, BOOKTITLE = {Proceedings of the International Conference "Recent Advances in Natural Language Processing"}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157412, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Segmentazione di un Testo Italiano in Token}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157412}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157413, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Language Recognition Tool, Specifiche di Implementazione}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157413}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157414, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Analisi Morfosintattica per l'Italiano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157414}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157415, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Specifiche di Chunking per l'Italiano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157415}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157416, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Specifiche di Named Entity Recognition per l'Italiano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157416}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157417, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Segmentazione di un Testo Inglese in Token}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157417}, } @TECHREPORT{MONTEMAGNI_2007_TECHREPORT_MMVBBRPT_157440, AUTHOR = {Montemagni, S. and Marchi, S. and Venturi, G. and Bartolini, R. and Bertagna, F. and Ruffolo, P. and Peters, W. and Tiscornia, D.}, TITLE = {Report on Ontology learning tool and testing}, YEAR = {2007}, ABSTRACT = {This deliverable documents the work done within the DALOS EU project for what concerns the definition and implementation of methodologies and techniques to bootstrap terminological and ontological knowledge from domain corpora. Starting from a corpus of legacy legislative texts in different languages, linguistic technologies combined with statistical techniques have been used to extract significant terms as well as to structure them in conceptual structures for the different languages dealt with within the project, namely Italian, English, Spanish and Dutch.}, KEYWORDS = {Ontology Learning, Term Extraction, Natural Language Processing, Conceptual Indexing}, URL = {https://publications.cnr.it/doc/157440}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BCGLMPRS_84608, AUTHOR = {Bartolini, R. and Caracciolo, C. and Giovannetti, E. and Lenci, A. and Marchi, S. and Pirrelli, V. and Renso, C. and Spinsanti, L.}, TITLE = {Creation and Use of Lexicons and Ontologies for NL Interfaces to Databases}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84608}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BCGLMPRS_91313, AUTHOR = {Bartolini, R. and Caracciolo, C. and Giovannetti, E. and Lenci, A. and Marchi, S. and Pirrelli, V. and Renso, C. and Spinsanti, L.}, TITLE = {Creation and use of lexicons and ontologies for natural language interface to databases}, YEAR = {2006}, ABSTRACT = {In this paper we present an original approach to natural language query interpretation which has been implemented within the FuLL (Fuzzy Logic and Language) Italian project of BC S.r.l. In particular, we discuss here the creation of linguistic and ontological resources, together with the exploitation of existing ones, for natural language-driven database access and retrieval. Both the database and the queries we experiment with are Italian, but the methodology we broach naturally extends to other languages.}, KEYWORDS = {Natual language processing, ontologies, gis, databases}, PAGES = {6}, URL = {https://publications.cnr.it/doc/91313}, CONFERENCE_NAME = {LREC Conference}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26/05/2006}, BOOKTITLE = {LREC 2006}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BGMMABSB_84664, AUTHOR = {Bartolini, R. and Giovannetti, E. and Marchi, S. and Montemagni, S. and Andreatta, C. and Brunelli, R. and Stecher, R. and Bouquet, P.}, TITLE = {Multimedia Information Extraction in Ontology-based Semantic Annotation of Product Catalogues}, YEAR = {2006}, ABSTRACT = {The demand for efficient methods for extracting knowledge from multimedia content has led to a growing research community investigating the convergence of multimedia and knowledge technologies. In this paper we describe a methodology for extracting multimedia information from product catalogues empowered by the synergetic use and extension of a domain ontology. The methodology was implemented in the Trade Fair Advanced Semantic Annotation Pipeline of the VIKE-framework.}, KEYWORDS = {Semantic Web Technologies, ontology creation, ontology extraction, ontology evolution, semantic annotation of multimedia content}, URL = {https://publications.cnr.it/doc/84664}, CONFERENCE_NAME = {SWAP 2006}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {18-20 December 2006}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BGMMABSNBB_84663, AUTHOR = {Bartolini, R. and Giovannetti, E. and Marchi, S. and Montemagni, S. and Andreatta, C. and Brunelli, R. and Stecher, R. and Niederée, C. and Bouquet, P. and Bortoli, S.}, TITLE = {Ontology Learning in Multimedia Information Extraction from Product Catalogues}, YEAR = {2006}, ABSTRACT = {We propose a methodology for extracting multimedia information from product catalogues empowered by the synergetic use and extension of a domain ontology. The use of domain ontologies in this context additionally opens up innovative ways of catalogue use. The method is characterized by incrementally feeding and exploiting the ontology during an information extraction process, implemented by the semantic annotation of the analysed document, and by providing support for detecting existing similar ontologies to enable reuse of (parts of) them.}, KEYWORDS = {knowledge-drive multimedia analysis, ontology learning, semi-automatic content annotation tools}, URL = {https://publications.cnr.it/doc/84663}, CONFERENCE_NAME = {BOEMIE 2006}, CONFERENCE_PLACE = {Podebrady, Czech Republic}, CONFERENCE_DATE = {6 ottobre 2006}, } @MISC{BARTOLINI_2006_MISC_BDLMMP_151563, AUTHOR = {Bartolini, R. and Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-to-Knowledge (T2K) Versione 2}, YEAR = {2006}, ABSTRACT = {Versione 2. Text-to-Knowledge (T2K) è una piattaforma software di supporto avanzato alla gestione documentale per la creazione dinamica di repertori terminologici e ontologie di dominio a partire da testi e per l'indicizzazione concettuale di documenti. Il sistema T2K si propone di offrire una batteria integrata di strumenti avanzati di analisi linguistica del testo, analisi statistica e apprendimento automatico del linguaggio, destinati a offrire una rappresentazione accurata del contenuto di una base documentale non strutturata, per scopi di indicizzazione avanzata e navigazione intelligente. I risultati di questo processo di acquisizione sono annotati in forma di metadati XML, offrendo in tal modo la prospettiva di una sempre crescente e diretta interoperabilità con sistemi automatici per la produzione di contenuti digitali selezionati e strutturati dinamicamente su misura, per diversi profili di utenza. Versioni prototipali di T2K sono già operative su alcuni portali della pubblica amministrazione e sono state applicate per l'indicizzazione di contenuti didattici multimediali. E' in corso l'integrazione della tecnologia T2K nel sistema di gestione informatica di documentazione scientifica del CNR.}, KEYWORDS = {text to knowledge, nlp, estrazione terminologica, ontology learning, indicizzazione terminologica}, URL = {https://publications.cnr.it/doc/151563}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BCLMP_157365, AUTHOR = {Bartolini, R. and Caracciolo, C. and Lenci, A. and Marchi, S. and Pirrelli, V.}, TITLE = {Motore semantico. Documento di progettazione e sviluppo}, YEAR = {2005}, ABSTRACT = {Il presente documento descrive architettura, funzionalità e algoritmo di un componente software dedicato, designato come "Motore Semantico", che ha lo scopo di produrre rappresentazioni logico-concettuali, ontologicamente interpretate, di interrogazioni in linguaggio naturale su una base di dati di tipo anche GIS.}, KEYWORDS = {NLP}, PAGES = {1-42}, URL = {https://publications.cnr.it/doc/157365}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BGMM_157366, AUTHOR = {Bartolini, R. and Giorgetti, D. and Marchi, S. and Montemagni, S.}, TITLE = {ILC-CNR Contribution to Deliverable 4. 1}, YEAR = {2005}, ABSTRACT = {The goal of the semantic annotation is the annotation of entities and relations starting from input documents conformant with the harmonisation output schema as defined within WP3. This harmonisation schema will focus on the structural and logical organisation of the documents, while WP4 will concentrate on the annotation of textual entities and image elements. The results of semantic annotation are intended to populate the domain ontology.}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157366}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMMP_157367, AUTHOR = {Bartolini, R. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Personalizzazione degli Italian NLP tools}, YEAR = {2005}, ABSTRACT = {Il presente documento intende offrire criteri e risultati della fase di personalizzazione dei moduli per l'analisi automatica del testo (Italian NLP tools o "AnITA") all'interno dell'architettura prevista nell'ambito del progetto FuLL.}, KEYWORDS = {NLP}, PAGES = {13}, URL = {https://publications.cnr.it/doc/157367}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMMP_157370, AUTHOR = {Bartolini, R. and Lenci, L. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: Acquisizione semi-automatica di ontologie per l'indicizzazione semantica di documenti}, YEAR = {2005}, ABSTRACT = {Text-2-Knowledge, Acquisizione semi-automatica di ontologie per l'indicizzazione semantica di documenti}, KEYWORDS = {nlp, terminology extraction}, URL = {https://publications.cnr.it/doc/157370}, } @TECHREPORT{GIORGETTI_2005_TECHREPORT_GMM_157380, AUTHOR = {Giorgetti, D. and Marchi, S. and Montemagni, S.}, TITLE = {ILC-CNR Contribution to Deliverable 5. 1}, YEAR = {2005}, ABSTRACT = {This document describes the high level infrastructure designed as part of the project VIKEF for creating a Virtual Information and Knowledge Environment (VIKE), namely an environment made up of explicit representation of the information and knowledge implicitly contained in one or more collections of Information-Content-Knowledge (ICK) resources, and of a collection of services operating on this explicit representation of information and knowledge; it is a virtual environment, as the representation and the services for accessing information and knowledge is almost completely independent from the physical properties of the original data.}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157380}, } @TECHREPORT{LENCI_2005_TECHREPORT_LMP_157381, AUTHOR = {Lenci, A. and Marchi, S. and Pirrelli, V.}, TITLE = {Motore del dialogo. Documento di progettazione e sviluppo}, YEAR = {2005}, ABSTRACT = {Il presente documento intende offrire i criteri generali e le funzionalità di base relativi alla progettazione del motore di dialogo nell'ambito del progetto FuLL}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157381}, } @TECHREPORT{MARCHI_2005_TECHREPORT_MM_157384, AUTHOR = {Marchi, S. and Montemagni, S.}, TITLE = {ILC-CNR Contribution to Deliverable 3. 1}, YEAR = {2005}, ABSTRACT = {This document presents the first set of knowledge and content acquisition components. Starting from the Annotation Schema definition, it will then describe the Harmonization support and the Annotation components, as well as the various resources needed all along the current chain.}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157384}, } @MISC{BARTOLINI_2005_MISC_BDGMLMP_151548, AUTHOR = {Bartolini, R. and Dell'Orletta, F. and Giorgetti, D. and Marchi, S. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-to-Knowledge (T2K)}, YEAR = {2005}, ABSTRACT = {Piattaforma di estrazione e indicizzazione terminologica.}, KEYWORDS = {NLP, estrazione terminologica}, URL = {https://publications.cnr.it/doc/151548}, } @MISC{BARTOLINI_2005_MISC_BMLMP_151550, AUTHOR = {Bartolini, R. and Marchi, S. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {NLPtools}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151550}, } @INPROCEEDINGS{HEPPLE_2004_INPROCEEDINGS_HIAMMG_84609, AUTHOR = {Hepple, M. and Ireson, N. and Allegrini, P. and Marchi, S. and Montemagni, S. and Gómez Hidalgo, J. M.}, TITLE = {NLP-enhanced Content filtering within the POESIA Project}, YEAR = {2004}, ABSTRACT = {This paper introduces the POESIA internet filtering system, which is open-source, and which combines standard filtering methods, such as positive/negative URL lists, with more advanced techniques, such as image processing and NLP-enhanced text filtering. The description here focusses on components providing textual content filtering for three European languages (English, Italian and Spanish), employing NLP methods to enhance performance. We address also the acquisition of language data needed to develop these filters, and the evaluation of the system and its components.}, KEYWORDS = {Image processing, Natural language processing systems, Open systems}, PAGES = {1967-1970}, URL = {https://www.aclweb.org/anthology/L04-1507/}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {26-28 May 2004}, BOOKTITLE = {Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC 2004)}, EDITOR = {Lino, M. T. and Xavier, M. F. and Ferreira, F. and Costa, R. and Silva, R.}, } @TECHREPORT{ALLEGRINI_2003_TECHREPORT_ACMMHIGCDP_157348, AUTHOR = {Allegrini, P. and Calzolari, N. and Marchi, S. and Montemagni, S. and Hepple, M. and Ireson, N. and Gomez Hidalgo, J. M. and Carrero Garcia, F. and De Buenaga Rodriguez, M. and Puera Sanz, E.}, TITLE = {POESIA Lexical Resources and Tools for Each Language}, YEAR = {2003}, ABSTRACT = {The aim of this report is to review the various resources that the different language processing sites expect to use in the development of their language-specific text filtering components. Some of the required resources are ones that were developed before Poesia, possibly by one the Poesia partners, or possibly elsewhere but being now in the public domain. Such resources may require adaptation to the Poesia task. Other resources required for Poesia will be developed as part of the project. In some cases, this development has already been done or is in progress, whilst in others, it is yet to be undertaken. In what follows, the status of each of the resources described will be made clear in terms of these alternatives.}, KEYWORDS = {Lexical Resources, nlp}, PAGES = {30}, URL = {https://publications.cnr.it/doc/157348}, } @TECHREPORT{STARYNKEVITCH_2002_TECHREPORT_SDTZHIGACMMG_430635, AUTHOR = {Starynkevitch, B. and Daoudi, M. and Tombelle, C. and Zheng, H. and Hepple, M. and Ireson, N. and Gomez Hildago, J. and Allegrini, P. and Calzolari, N. and Marchi, S. and Montemagni, S. and Guerra, S.}, TITLE = {POESIA Software Architecture Definition Document}, YEAR = {2002}, ABSTRACT = {Software Architecture Definition Document}, KEYWORDS = {NLP, Software Engineering}, PAGES = {68-80}, URL = {https://publications.cnr.it/doc/430635}, }