@ARTICLE{BIFFI_2023_ARTICLE_BGMS_490948, AUTHOR = {Biffi, M. and Guadagnini, E. and Montemagni, S. and Sassolini, E.}, TITLE = {Il lemmario del «GDLI»: dati quantitativi e prime osservazioni}, YEAR = {2023}, ABSTRACT = {Dopo la realizzazione della versione elettronica del solo testo del "Grande dizionario della lingua italiana" (GDLI), si è avviato un progetto di graduale informatizzazione della sua struttura. Questo articolo ne presenta il primo risultato, vale a dire l'estrazione automatica del lemmario che è così per la prima volta quantificabile e individuabile. Una prima parte del testo è dedicata all'illustrazione della strutturazione dei contenuti del dizionario e la loro rappresentazione secondo standard internazionalmente riconosciuti (XML-TEI); la seconda presenta una prima elaborazione dei dati del lemmario estratto; la terza propone una prima analisi comparativa con i lemmari di altri dizionari della lingua italiana.}, KEYWORDS = {Lessicografia, Lessicografia digitale, Lessicografia storica}, PAGES = {331-351}, URL = {https://accademiadellacrusca.it/it/riviste/articoli/slei-xl-2023/8679}, VOLUME = {40}, PUBLISHER = {Le Lettere (Firenze, Italia)}, ISSN = {0392-5218}, JOURNAL = {Studi di lessicografia italiana}, } @ARTICLE{BIFFI_2022_ARTICLE_BDFGMS_477716, AUTHOR = {Biffi, M. and De Blasi, F. and Favaro, M. and Guadagnini, E. and Montemagni, S. and Sassolini, E.}, TITLE = {Parole in rete / reti di parole. Possibili impieghi didattici dei grandi vocabolari storici digitalizzati}, YEAR = {2022}, ABSTRACT = {After a brief presentation of the great historical dictionaries of Italian, which are free to use online thanks to the digitalisation work carried out by the Accademia della Crusca, the contribution offers a number of examples of how these tools can be used for educational purposes. Finally, further didactic uses are described, which will be made possible thanks to the advanced digital tools that the Accademia della Crusca and the Istituto di Linguistica Computazionale "Antonio Zampolli" del Consiglio Nazionale delle Ricerche (ILC) are currently working on.}, KEYWORDS = {Lessicografia italiana, Didattica dell'italiano, Lessicografia digitale}, PAGES = {143-188}, URL = {https://italianoascuola.unibo.it/article/view/14866}, VOLUME = {4}, DOI = {10.6092/issn.2704-8128/14866}, PUBLISHER = {ABIS-AlmaDL (Bologna, Italia)}, ISSN = {2704-8128}, JOURNAL = {Italiano a scuola}, } @INPROCEEDINGS{SASSOLINI_2021_INPROCEEDINGS_SBDGM_455303, AUTHOR = {Sassolini, E. and Biffi, M. and De Blasi, F. and Guadagnini, E. and Montemagni, S.}, TITLE = {La digitalizzazione del GDLI: un approccio linguistico per la corretta acquisizione del testo?}, YEAR = {2021}, ABSTRACT = {In questo articolo sono discussi metodi e strategie in via di elaborazione per la correzione (propedeutica alla successiva strutturazione) dei contenuti del Grande dizionario della lingua italiana (GDLI) fondato da Salvatore Battaglia, estratti da un formato digitale non standard. La presenza, in questo formato, di errori distribuiti di vario tipo ha condizionato la scelta dell'approccio all'estrazione e messo in luce tutte le difficoltà dell'operazione. Le sperimentazioni fatte sino a oggi portano a privilegiare una strategia di correzione multilivello, che procede scomponendo in sezioni distinte l'individuazione e la correzione degli errori, in modo da rendere gestibili interventi complessi di correzione semi-automatica, altrimenti improponibili, e consentire un loro raffinamento progressivo. Parallelamente alla definizione di regole di riconoscimento di struttura e formato, stiamo analizzando metodi e procedure in grado di migliorare la qualità dell'input e specializzare i moduli di estrazione per i singoli campi della voce a partire dal "lemma". Le finalità del lavoro sono duplici: l'estrazione e strutturazione dei contenuti e la produzione di un formato standard di rappresentazione dei dati. Si tratta di un percorso difficile perché il formato dei dati rende l'uso di strumenti reperibili in letteratura non applicabile. Solamente al termine del lavoro potremo capire se esistono le condizioni per trasformare l'approccio adottato in un protocollo di intervento replicabile.}, KEYWORDS = {dizionari digitali, risorse linguistiche, estrazione dell'informazione, correzione del testo post OCR}, PAGES = {159-166}, URL = {https://aiucd2021.labcd.unipi.it/wp-content/uploads/2021/05/AIUCD2021_BOA-versione3A.pdf}, DOI = {10.6092/unibo/amsacta/6712}, ISBN = {9788894253559}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {19-22/01/2021}, } @INPROCEEDINGS{SASSOLINI_2020_INPROCEEDINGS_SB_455300, AUTHOR = {Sassolini, E. and Biffi, M.}, TITLE = {Strategie e metodi per il recupero di dizionari storici}, YEAR = {2020}, ABSTRACT = {L'articolo descrive un approccio sperimentale all'estrazione, da formato digitale non standard, della completa struttura delle entrate lessicali del Grande Dizionario storico della Lingua Italiana (GDLI) di S. Battaglia. Sono riportati i risultati preliminari di una collaborazione tra l'Accademia della Crusca e Istituto di Linguistica Computazionale "A. Zampolli" del CNR, che mira a convertire i contenuti testuali in dati digitali strutturati per offrirli alla consultazione e allo studio degli utenti e/o per la successiva integrazione con altre risorse linguistiche, sia dizionari che corpora. Il processo di estrazione si articola da un lato nella definizione di procedure di estrazione dei dati, dall'altro nell'adozione di strategie finalizzate al supporto alla correzione degli errori.}, KEYWORDS = {Archivi digitali, recupero e conservazione, estrazione dell'informazione}, PAGES = {235-239}, URL = {https://publications.cnr.it/doc/455300}, DOI = {10.6092/unibo/amsacta/6316}, ISBN = {978-88-942535-4-2}, CONFERENCE_NAME = {IX Convegno annuale AIUCD: LA SVOLTA INEVITABILE: SFIDE E PROSPETTIVE PER L'INFORMATICA UMANISTICA}, CONFERENCE_PLACE = {Università Cattolica del Sacro Cuore, Milano}, CONFERENCE_DATE = {15-17/01/2020}, } @INPROCEEDINGS{SASSOLINI_2017_INPROCEEDINGS_SCC_382393, AUTHOR = {Sassolini, E. and Cucurullo, S. and Cinini, A.}, TITLE = {I corpora digitali: dall'obsolescenza tecnologica, alla salvaguardia e alla condivisione}, YEAR = {2017}, ABSTRACT = {Studio e implementazione di un protocollo di recupero, conservazione e valorizzazione di testi e corpora digitali interessati da problemi di obsolescenza tecnologica. Le strategie di salva-guardia adottate si spingono oltre il salvataggio dei testi e la conservazione in un formato di rappresentazione in linea con gli standard internazionali (XML TEI), si pongono come obiettivo la valorizzazione di questo patrimonio attraverso nuove modalità di fruizione dei contenuti. Lo scopo è affiancare le funzionalità classiche di analisi testuale, che da sempre caratterizzano le nostre attività di ricerca, a nuove modalità grafiche e visuali di fruizione dei dati e, in alcuni casi, migrare verso dispositivi mobili e tecnologie App. In questo articolo, oltre al protocollo di recupero, presentiamo due sperimentazioni di valorizzazione di contenuti testuali. Nel primo caso proponiamo tecniche di visual analytics applicate ad un corpus testuale semi strutturato riguardante corrispondenza redatta in lingua italiana del 1600. Nel secondo caso abbiamo realizzato un'applicazione per sistema Android finalizzata all'interrogazione di dati testuali relativi ad un progetto di censimento di architetture moderne della regione Liguria.}, KEYWORDS = {Testi digitali, Analisi testuale, Preservazione dei dati, Diffusione dei risultati}, PAGES = {31-35}, URL = {https://www.garr.it/it/documenti/3529-conferenza-2016-selected-papers-sassolini-et-al/file}, DOI = {10.26314/GARR-Conf16-proceeedings-06}, PUBLISHER = {Consortium GARR (Roma, ITA)}, ISBN = {978-88-905077-6-2}, CONFERENCE_NAME = {Conferenza GARR 2016-The CreActive Network}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {30/11/2016-02/12/2016}, } @INPROCEEDINGS{SASSOLINI_2017_INPROCEEDINGS_SC_382418, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {Approcci grafici all'analisi di corpora testuali}, YEAR = {2017}, ABSTRACT = {sperimentazioni finalizzate a combinare tecniche di "distant reading" e funzionalità classiche di Information Retrieval (IR) su dati testuali. Incrementare con sintesi grafiche e visuali l'offerta di strumenti di studio e di analisi dei dati testuali rappresenta una nuova frontiera del nostro ambito di ricerca consueto.}, KEYWORDS = {analisi testuale, distant reading, visual analytics}, PAGES = {83-86}, URL = {http://aiucd2017.aiucd.it/wp-content/uploads/2017/01/book-of-abstract-AIUCD-2017.pdf}, CONFERENCE_NAME = {AIUCD 2017 Conference}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {24-28/01/2017}, } @TECHREPORT{CININI_2017_TECHREPORT_CCS_382931, AUTHOR = {Cinini, A. and Cucurullo, S. and Sassolini, E.}, TITLE = {Rapporto Tecnico: Standardizzazione del corpus testuale del PRIN Crusca}, YEAR = {2017}, ABSTRACT = {Attività previste nella convenzione operativa tra ILC-CNR e Accademia della Crusca che riguardano la progettazione e lo sviluppo di una piattaforma Web modulare per l'archiviazione, la gestione e l'interrogazione di corpora testuali in lingua italiana, con funzionalità derivate dal DBT (Data Base Testuale) nelle sue diverse implementazioni.Il lavoro preliminare svolto riguarda anche la normalizzazione dei testi e la conversione nello standard di rappresentazione XML TEI.}, KEYWORDS = {Codifica dei testi, Analisi testuale, formato XML TEI}, PAGES = {1-21}, URL = {https://publications.cnr.it/doc/382931}, } @TECHREPORT{SASSOLINI_2017_TECHREPORT_SC_383394, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {DIGESTO: NUOVE FUNZIONALITÀ E SITO WEB}, YEAR = {2017}, ABSTRACT = {Realizzazione di un nuovo sito web per la consultazione dei testi bilingui, con sviluppo di nuove funzionalità di ricerca, non più solo per parola ma anche per Titolo, Frammento o Paragrafo. Realizzazione di una versione PDF scaricabile di parti ragionate dell'intero corpus}, KEYWORDS = {testi paralleli, sito web, analisi testuale, visual analytics}, PAGES = {1-12}, URL = {https://publications.cnr.it/doc/383394}, } @INPROCEEDINGS{WIELING_2016_INPROCEEDINGS_WSCM_359168, AUTHOR = {Wieling, M. and Sassolini, E. and Cucurullo, S. and Montemagni, S.}, TITLE = {ALT Explored: Integrating an Online Dialectometric Tool and an Online Dialect Atlas}, YEAR = {2016}, ABSTRACT = {In this paper, we illustrate the integration of an online dialectometric tool, Gabmap, together with an online dialect atlas, the Atlante Lessicale Toscano (ALT-Web). By using a newly created url-based interface to Gabmap, ALT-Web is able to take advantage of the sophisticated dialect visualization and exploration options incorporated in Gabmap. For example, distribution maps showing the distribution in the Tuscan dialect area of a specific dialectal form (selected via the ALT-Web website) are easily obtainable. Furthermore, the complete ALT-Web dataset as well as subsets of the data (selected via the ALT-Web website) can be automatically uploaded and explored in Gabmap. By combining these two online applications, macro- and micro-analyses of dialectal data (respectively offered by Gabmap and ALT-Web) are effectively and dynamically combined.}, KEYWORDS = {Lexicon, Lexical Database, Tools, Systems, Applications}, PAGES = {3265-3272}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {LREC 2016}, CONFERENCE_PLACE = {Portorož, Slovenia}, CONFERENCE_DATE = {23/10/2016}, } @INPROCEEDINGS{SASSOLINI_2016_INPROCEEDINGS_SCC_382394, AUTHOR = {Sassolini, E. and Cucurullo, S. and Cinini, A.}, TITLE = {I corpora digitali: dall'obsolescenza tecnologica, alla salvaguardia e alla condivisione}, YEAR = {2016}, ABSTRACT = {Il progetto di recupero, nato pochi anni fa come iniziativa fortemente voluta da ILC, prosegue oggi con la collaborazione di molte istituzioni pubbliche e private, impegnate sullo stesso fronte. Approccio al recupero a tappe: inizialmente lavorando su testi che erano stati prodotti per essere indicizzati con le prime procedure di analisi testuale presenti all'ILC sin dalla fine degli anni '70 del secolo scorso. Definizione di criteri da adottare per la scelta dei testi, basati sullo studio di casi significativi e sull'importanza dei materiali, spesso legati alla realizzazione di autorevoli progetti nazionali e internazionali.}, KEYWORDS = {recupero testi, conversione in formato XML, valorizzazione dei risultati}, PAGES = {1-3}, URL = {https://www.eventi.garr.it/it/conf16/home/materiali-conferenza-2016/paper}, CONFERENCE_NAME = {Conferenza GARR 2016-The CreActive Network}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {30/11/2016-02/12/2016}, } @INPROCEEDINGS{SASSOLINI_2014_INPROCEEDINGS_SSCCS_319040, AUTHOR = {Sassolini, E. and Sassi, M. and Cucurullo, S. and Cinini, A. and Sbrulli, S.}, TITLE = {Industrial Philology: Problems and techniques of data and archives preservation for future generations}, YEAR = {2014}, ABSTRACT = {The main objective of digital archiving of texts is their re-use and preservation. The concept that guides these initiatives is linked to structural and organizational needs which heavily influence the definition of the format specifications that describe the organisation of the archives at various levels and consists of a more or less complex document. A format specification provides the details needed to build a file from a text, establishes the admitted encodings and software applications that can decode the file and make its content accessible. These structural specifications can have an extremely variable size and they depend on the complexity of the format. Although some format specifications are, for the most part, independent of the specific software (for example, ASCII and Unicode codes), many of them are related to the historical period in which the texts were acquired and also by dated software technologies. The file format specification should evolve hand in hand with the related software, and the fate of one is in fact often linked to that of the other. It is therefore appropriate to face the issue of obsolescence of software together with the obsolescence of file formats and of storage medium.}, KEYWORDS = {text management, text analysis}, PAGES = {168-172}, URL = {https://publications.cnr.it/doc/319040}, PUBLISHER = {TransAtlantic (Amsterdam, Paesi Bassi)}, ISSN = {1386-2316}, ISBN = {978-90-77484-22-7}, CONFERENCE_NAME = {GL15: Fifteenth International Conference on Grey Literature}, CONFERENCE_PLACE = {Bratislava}, CONFERENCE_DATE = {2, 3 december 2013}, BOOKTITLE = {The GL-conference series. Conference proceedings}, } @INPROCEEDINGS{SPADONI_2012_INPROCEEDINGS_STLRTSO_219516, AUTHOR = {Spadoni, F. and Tartarelli, A. and Loparco, L. and Rossi, R. and Tariffi, F. and Sassolini, E. and Ongaro, P.}, TITLE = {SMARTCITY: Customized and Dynamic Multimedia Content Production for Tourism Applications}, YEAR = {2012}, ABSTRACT = {This paper presents the SMARTCITY project experience: customized and dynamic multimedia content production for professional tourism applications.}, KEYWORDS = {Corpus Annotation Cultural Heritage Access To The Culture Information}, PAGES = {132-137}, URL = {http://digital.casalini.it/9788866551300}, CONFERENCE_NAME = {Electronic Imaging \& the Visual Arts EVA 2012 Florence (EVA 2012 Florence)}, CONFERENCE_PLACE = {Firenze, Italia}, CONFERENCE_DATE = {9-10-11 maggio 2012}, } @INPROCEEDINGS{SPADONI_2012_INPROCEEDINGS_STLRTSO_220325, AUTHOR = {Spadoni, F. and Tartarelli, A. and Loparco, L. and Rossi, R. and Tariffi, F. and Sassolini, E. and Ongaro, P.}, TITLE = {SMARTCITY: CUSTOMIZED AND DYNAMIC MULTIMEDIA CONTENT PRODUCTION FOR TOURISM APPLICATIONS}, YEAR = {2012}, ABSTRACT = {This paper presents the final results of the SMARTCITY project, co-funded by the Tuscany Region under the POR CREO 1.d program. The project proposes an innovative methodology as well as advanced technologies enabling professional services for cultural tourism applications in urban areas as well larger archaeological sites.}, KEYWORDS = {Tourism application Dynamic Multimedia Content Production Semantic Annotation}, URL = {https://publications.cnr.it/doc/220325}, CONFERENCE_NAME = {Smartcity: Customized and dynamic multimedia content production for tourism applications (EVA 2012 Florence)}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {9-10-11 maggio 2012}, } @INPROCEEDINGS{EUGENIO_2011_INPROCEEDINGS_ES_205537, AUTHOR = {Eugenio, P. and Sassolini, E.}, TITLE = {The "Micro Semantics" for intelligent browsing}, YEAR = {2011}, ABSTRACT = {Study and development of methodologies to improve systems of "information retrieval". Our approach is based on the integration of techniques, originally created to disciplines such as philology, lexicography, literature, with linguistic and statistical tools for the extraction and analysis of information in the text. Also we experimented a special methodology, for the creation of specific semantic metadata for text materials. In this paper, we describe "SmartCity", a project in which we applied these strategies. The project aims at designing and developing multimedia content (audio-guide for the new generation of interactive media and off-line and on-line) for the use of custom-cultural tourist routes, both physical (in the context of museums and cities) and virtual.}, KEYWORDS = {Semantic Analysis, Information Retrieval, Text Mining}, PAGES = {117-123}, URL = {https://publications.cnr.it/doc/205537}, VOLUME = {4}, ISBN = {978-88-905639-8-0}, CONFERENCE_NAME = {5th International Congress on "Science and Technology for the Safeguard of Cultural Heritage in the Mediterranean Basin"}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {22-25 November 2011}, } @INPROCEEDINGS{SPADONI_2011_INPROCEEDINGS_STS_205482, AUTHOR = {Spadoni, F. and Tariffi, F. and Sassolini, E.}, TITLE = {SMARTCITY: Innovative Technologies for customized and dynamic multimedia content production for Tourism applications}, YEAR = {2011}, ABSTRACT = {This paper presents the first results of the SMARTCITY project, co-funded by the Tuscany Region under the POR CREO 1.d program. the project proposess an innovative methodology as well as advanced technologies enabling professional services for cultural tourism applications in urban areas as well as larger archaeological sites.}, KEYWORDS = {smartcity project, Tourism Applications, Dynamic Multimedia Content Production}, PAGES = {130-135}, URL = {https://publications.cnr.it/doc/205482}, PUBLISHER = {Pitagora Editrice Bologna (Bologna, ITA)}, ISBN = {88-371-1837-6}, CONFERENCE_NAME = {EVA 2011 Florence Electronic Imaging and the Visual Arts}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {4-5-6 maggio 2011}, EDITOR = {Cappellini, V.}, } @TECHREPORT{SASSOLINI_2011_TECHREPORT_SC_206250, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {Metodologie di "thorough indexing" descrittivo, semantico e topologico delle risorse contenutistiche}, YEAR = {2011}, ABSTRACT = {D4 (deliverable) is the final result of task T2.1. of Smartcity project. It analyzes the principles and methodologies of "Thorough indexing" of the text materials in order to define possible new rules for the application of a systematic approach to tagging of knowledge base}, KEYWORDS = {Term extraction, Analisi Semantica}, URL = {https://publications.cnr.it/doc/206250}, } @TECHREPORT{SASSOLINI_2011_TECHREPORT_SC_206417, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {SmartCity-II relazione scientifica: report tecnico}, YEAR = {2011}, ABSTRACT = {This technical report presents the use of NLP techniques (text mining, text analysis) to develop specific tools that allow to create linguistic resources related to the cultural heritage domain, particularly "Empoli e dintorni".}, KEYWORDS = {Corpus (creation, annotation, etc.), Cultural Heritage}, URL = {https://publications.cnr.it/doc/206417}, } @TECHREPORT{SASSOLINI_2011_TECHREPORT_SS_206460, AUTHOR = {Sassolini, E. and Sbrulli, S.}, TITLE = {Primo report tecnico: tecnologie per il trattamento delle informazioni multimediali. ILC-CNR e STRAT-CRIT}, YEAR = {2011}, ABSTRACT = {Technologies for semantic annotation, automatic classification, clustering and browsing in textual databases, for realization of a multimedia on-line press review.}, KEYWORDS = {Information Extraction, Information Retrieval}, URL = {https://publications.cnr.it/doc/206460}, } @INPROCEEDINGS{PICCHI_2010_INPROCEEDINGS_PS_84806, AUTHOR = {Picchi, E. and Sassolini, E.}, TITLE = {"Text power": Tools for the cultural heritage}, YEAR = {2010}, ABSTRACT = {This article presents NLP techniques (text mining, text analysis) to create tools for the avaluation, analysis and classification of text materials available on the web. In particular we developed tools for the automatic extraction of mistic relevant information related to the cultural heritage domain and tools for linguistic resouces creation. On this knowledge basis, we also developed a system for text browsing.}, KEYWORDS = {information extraction, named entity recognition, text bvrowsing}, PAGES = {435-439}, URL = {http://www.cairocongress.com/}, VOLUME = {1}, ISBN = {978-88-96680-31-5}, CONFERENCE_NAME = {4-th Intl. Congr. Science and Technology for the Safeguard of Cultural Heritage in the Mediterranean Basin}, CONFERENCE_PLACE = {Il Cairo}, CONFERENCE_DATE = {6-7-8 dicembre 2009}, BOOKTITLE = {Proceedings in: CHC 2010-4-th Intl. Congr. Science and Technology for the Safeguard of Cultural Heritage in the Mediterranean Basin (Il Cairo, 6-7-8/12/2009)}, EDITOR = {Ferrari, A.}, } @INPROCEEDINGS{SASSOLINI_2010_INPROCEEDINGS_SC_84768, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {Cultural Heritage: Knowledge Extraction from Web Documents}, YEAR = {2010}, ABSTRACT = {This article presents the use of NLP techniques (text mining, text analysis) to develop specific tools that allow to create linguistic resources related to the cultural heritage domain. The aim of our approach is to create tools for the building of an online "knowledge network", automatically extracted from text materials concerning this domain. A particular methodology was experimented by dividing the automatic acquisition of texts, and consequently, the creation of reference corpus in two phases. In the first phase, on-line documents have been extracted from lists of links provided by human experts. All documents extracted from the web by means of automatic spider have been stored in a repository of text materials. On the basis of these documents, automatic parsers create the reference corpus for the cultural heritage domain. Relevant information and semantic concepts are then extracted from this corpus. In a second phase, all these semantically relevant elements (such as proper names, names of institutions, names of places, and other relevant terms) have been used as basis for a new search strategy of text materials from heterogeneous sources. In this case also specialized crawlers (TP-crawler) have been used to work on a bulk of text materials available on line.}, KEYWORDS = {Information Extraction, Information Retrieval, Text mining, Named Entity recognition}, PAGES = {3363-3368}, URL = {https://publications.cnr.it/doc/84768}, ISBN = {978-2-9517408-6-0}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-23/05/2010}, } @INPROCEEDINGS{PICCHI_2010_INPROCEEDINGS_PS_112960, AUTHOR = {Picchi, E. and Sassolini, E.}, TITLE = {La tecnologia TextPower per la navigazione intelligente}, YEAR = {2010}, ABSTRACT = {Compito dell'ILC è di creare una rete di conoscenza linguistica, terminologica e semantica, estratta dai documenti, fatta di concetti che sintetizzano il valore semantico del documento. Questa rete di conoscenza individuata automaticamente costituisce la base conoscitiva necessaria alla classificazione e alla navigazione "intelligente" e rappresenta la ricchezza dello strumento e del servizio che l'Osservatorio può offrire.}, KEYWORDS = {Text power, navigazione intelligente}, PAGES = {419-425}, URL = {http://oraal.ittig.cnr.it/oraal/}, VOLUME = {1}, ISBN = {88-14-17365-6}, CONFERENCE_NAME = {Convegno di inaugurazione dell'Osservatorio sulle Regole dell'Agricoltura e dell'Alimentazione}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {22-23 GENNAIO 2010}, BOOKTITLE = {PER UNO STUDIO INTERDISCIPLINARE SU AGRICOLTURA E ALIMENTAZIONE Atti del Convegno di inaugurazione dell'Osservatorio}, EDITOR = {Sirsi, M. G. E.}, } @ARTICLE{QUOCHI_2009_ARTICLE_QDSBMC_30876, AUTHOR = {Quochi, V. and Del Gratta, R. and Sassolini, E. and Bartolini, R. and Monachini, M. and Calzolari, N.}, TITLE = {A Standard Lexical-Terminological Resource for the Bio Domain}, YEAR = {2009}, ABSTRACT = {The present paper describes a large-scale lexical resource for the biology domain designed both for human and for machine use. This lexicon aims at semantic interoperability and extendability, through the adoption of ISO-LMF standard for lexical representation and through a granular and distributed encoding of relevant information. The first part of this contribution focuses on three aspects of the model that are of particular interest to the biology community: the treatment of term variants, the representation on bio events and the alignment with a domain ontology. The second part of the paper describes the physical implementation of the model: a relational database equipped with a set of automatic uploading procedures. Peculiarity of the BioLexicon is that it combines features of both terminologies and lexicons. A set verbs relevant for the domain is also represented with full details on their syntactic and semantic argument structure.}, KEYWORDS = {Lexical representation model, Lexical Database, Computational Lexicography, Special Domains, Standards}, PAGES = {325-335}, URL = {https://publications.cnr.it/doc/30876}, VOLUME = {5603}, DOI = {10.1007/978-3-642-04235-5_28}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @INPROCEEDINGS{SASSOLINI_2009_INPROCEEDINGS_SP_84749, AUTHOR = {Sassolini, E. and Picchi, E.}, TITLE = {Text Power: tools for the Cultural Heritage}, YEAR = {2009}, KEYWORDS = {Text power, Text mining, Cultural Heritage}, URL = {https://publications.cnr.it/doc/84749}, CONFERENCE_NAME = {4}, CONFERENCE_PLACE = {Cairo-Egypt}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{SASSOLINI_2009_INPROCEEDINGS_SPEG_84742, AUTHOR = {Sassolini, E. and Picchi, E. and Ensini, M. and Guerriero, L.}, TITLE = {Il progetto SUBITO e l'analisi semantica come strumento utile all'innovazione biomedica}, YEAR = {2009}, KEYWORDS = {Biomedicina, Analisi Semantica, Progetto"Subito"}, URL = {https://publications.cnr.it/doc/84742}, CONFERENCE_NAME = {10° Congresso Nazionale @ITIM 2009/, Monserrato, Cagliari, Italy}, CONFERENCE_PLACE = {Monserrato(CA)}, CONFERENCE_DATE = {2009}, } @TECHREPORT{PICCHI_2009_TECHREPORT_PSCE_157473, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S. and Ensini, M.}, TITLE = {WP3. 1 Tecniche di analisi semantica per l’estrazione di ontologie bio-medicali}, YEAR = {2009}, KEYWORDS = {Ontologie, Semantica computazionale, Terminologia medica}, URL = {https://publications.cnr.it/doc/157473}, } @INPROCEEDINGS{PICCHI_2008_INPROCEEDINGS_PSCB_84719, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S. and Bertagna, F.}, TITLE = {Mining the News with Semantic Press}, YEAR = {2008}, KEYWORDS = {Text mining, Press review}, URL = {https://publications.cnr.it/doc/84719}, CONFERENCE_NAME = {LangTech 2008}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{PICCHI_2008_INPROCEEDINGS_PSCBB_84727, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S. and Bertagna, F. and Baroni, P.}, TITLE = {Semantic Press}, YEAR = {2008}, ABSTRACT = {In this paper Semantic Press, a tool for the automatic press review, is introduced. It is based on Text Mining technologies and is tailored to meet the needs of the eGovernment and eParticipation communities. First, a general description of the application demands emerging from the eParticipation and eGovernment sectors is offered. Then, an introduction to the framework of the automatic analysis and classification of newspaper content is provided, together with a description of the technologies underlying it.}, KEYWORDS = {Text Mining, Tools, Systems, Applications}, PAGES = {2752-2756}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008-Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech}, CONFERENCE_DATE = {26/05/2008-01/06/2008}, BOOKTITLE = {Proceedings of the Sixth International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Tapias, D.}, } @MISC{PICCHI_2008_MISC_PCS_151568, AUTHOR = {Picchi, E. and Cucurullo, S. and Sassolini, E.}, TITLE = {Semantic Press}, YEAR = {2008}, KEYWORDS = {Rassegna stampa, Estrazione di informazione}, URL = {https://publications.cnr.it/doc/151568}, } @MISC{SASSOLINI_2008_MISC_SPBP_151567, AUTHOR = {Sassolini, E. and Picchi, E. and Bellone, G. and Porquier, E.}, TITLE = {Progetto per lo studio e la realizzazione di un sistema di erogazione on line in modalità multicanale, dei servizi sanitari prioritari per cittadini e imprese}, YEAR = {2008}, KEYWORDS = {Studi statistici, Patologia}, URL = {https://publications.cnr.it/doc/151567}, } @MISC{SASSOLINI_2008_MISC_SPH_151566, AUTHOR = {Sassolini, E. and Picchi, E. and Haines, M.}, TITLE = {Gli anni della Cupola}, YEAR = {2008}, KEYWORDS = {Corpus trilingue di documenti antichi}, URL = {https://publications.cnr.it/doc/151566}, } @INPROCEEDINGS{QUOCHI_2007_INPROCEEDINGS_QDSMC_84735, AUTHOR = {Quochi, V. and Del Gratta, R. and Sassolini, E. and Monachini, M. and Calzolari, N.}, TITLE = {Toward a Standard Lexical Resource in the Bio Domain}, YEAR = {2007}, ABSTRACT = {The present paper describes a large-scale lexical resource for the biology domain designed both for human and for machine use. This lexicon aims at semantic interoperability and extendability, through the adoption of ISO-LMF standard for lexical representation and through a granular and distributed encoding of relevant information. The first part of this contribution focuses on three aspects of the model that are of particular interest to the biology community: the treatment of term variants, the representation on bio events and the alignment with a domain ontology. The second part of the paper describes the physical implementation of the model: a relational database equipped with a set of automatic uploading procedures. Peculiarity of the BioLexicon is that it combines features of both terminologies and lexicons. A set verbs relevant for the domain is also represented with full details on their syntactic and semantic argument structure.}, KEYWORDS = {Lexical representation model, Lexical Database, Computational Lexicography, Special Domains, Standards}, PAGES = {295-299}, URL = {https://publications.cnr.it/doc/84735}, PUBLISHER = {Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)}, ISBN = {978-83-7177-413-3}, CONFERENCE_NAME = {LTC07-3rd Language and Technology Conference: Human Language Technology. Challenges of the Information Society}, CONFERENCE_PLACE = {Poznan, Poland}, CONFERENCE_DATE = {5-7 Ottobre 2007}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DMQSC_157425, AUTHOR = {Del Gratta, R. and Monachini, M. and Quochi, V. and Sassolini, E. and Calzolari, N.}, TITLE = {Bio-Lexicon DataBase: Architecture, Concepts and Loading Software}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157425}, } @TECHREPORT{PICCHI_2007_TECHREPORT_PSC_157426, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S.}, TITLE = {Implementazione di procedure per la Named Entity Recognition}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157426}, } @TECHREPORT{PICCHI_2007_TECHREPORT_PSC_157427, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S.}, TITLE = {Implementazione di procedure di Clustering}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157427}, } @TECHREPORT{PICCHI_2007_TECHREPORT_PSC_157428, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S.}, TITLE = {Implementazione di procedure di Spidering per sistemi di Alerting}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157428}, } @MISC{PICCHI_2007_MISC_PMCSP_157436, AUTHOR = {Picchi, E. and Montemagni, S. and Cucurullo, S. and Sassolini, E. and Paoli, M.}, TITLE = {ALT-Web. Sito dell’Atlante Lessicale Toscano (ALT) in rete}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157436}, } @MISC{PICCHI_2007_MISC_PSC_157429, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S.}, TITLE = {Legislazione Toscana raccolta e illustrata da Lorenzo Cantini}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157429}, } @INPROCEEDINGS{CALZOLARI_2006_INPROCEEDINGS_CSSCPBEMSC_84625, AUTHOR = {Calzolari, F. and Sassolini, E. and Sassi, M. and Cucurullo, S. and Picchi, E. and Bertagna, F. and Enea, A. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Next Generation Language Resources using Grid}, YEAR = {2006}, ABSTRACT = {This paper presents a case study concerning the challenges and requirements posed by next generation language resources, realized as an overall model of open, distributed and collaborative language infrastructure. If a sort of "new paradigm" for language resource sharing is required, we think that the emerging and still evolving technology connected to Grid computing is a very interesting and suitable one for a concrete realization of this vision. Given the current limitations of Grid computing, it is very important to test the new environment on basic language analysis tools, in order to get the feeling of what are the potentialities and possible limitations connected to its use in NLP. For this reason, we have done some experiments on a module of the Linguistic Miner, i.e. the extraction of linguistic patterns from restricted domain corpora. The Grid environment has produced the expected results (reduction of the processing time, huge storage capacity, data redundancy) without any additional cost for the final user.}, KEYWORDS = {grid, acquisition, topic classification}, PAGES = {1858-1861}, URL = {https://publications.cnr.it/doc/84625}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{CUCURULLO_2006_INPROCEEDINGS_CMPPS_84629, AUTHOR = {Cucurullo, S. and Montemagni, S. and Paoli, M. and Picchi, E. and Sassolini, E.}, TITLE = {Dialectal resources on-line: the ALT-Web experience}, YEAR = {2006}, ABSTRACT = {The paper presents an on-line dialectal resource, ALT-Web, which gives access to the linguistic data of the Atlante Lessicale Toscano, a specially designed linguistic atlas in which lexical data have both a diatopic and diastratic characterisation. The paper focuses on: the dialectal data representation model; the access modalities to the ALT dialectal corpus; ontology-based search.}, KEYWORDS = {Computational dialectology, Dialectal databases, Construction of lexical resources}, PAGES = {1846-1851}, URL = {http://www.lrec-conf.org/lrec2006/}, VOLUME = {Proceedings}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {24-25-26 Maggio 2006}, BOOKTITLE = {Dialectal resources on-line: the ALT-Web experience}, } @INPROCEEDINGS{CUCURULLO_2006_INPROCEEDINGS_CMPPS_84661, AUTHOR = {Cucurullo, S. and Montemagni, S. and Paoli, M. and Picchi, E. and Sassolini, E.}, TITLE = {Atlante Dialettale in rete: ALT-Web}, YEAR = {2006}, ABSTRACT = {The paper presents an on-line dialectal resource, ALT-Web, which gives access to the linguistic data of the Lexical Atlas of Tuscany or Atlante Lessicale Toscano, a specially designed linguistic atlas in which lexical data have both a diatopic and diastratic characterisation. The paper illustrates ALT-Web with particular emphasis on: 1) the dialectal data representation model; 2) the access modalities to the ALT dialectal corpus designed to produce an output tailored to the specific needs of the different classes of users (both professionals and common citizens); 3) ontology-based search. These represent three main features which differentiate ALT-Web both from the previous digitalised ALT version and, most interestingly, from other on-line dialectal resources. At the time of writing, this is the first resource of this type in Italy, and one of the few at the international level.}, KEYWORDS = {dialectal resources, information retrieval}, PAGES = {661-672}, URL = {http://www.euralex.org/publications/}, VOLUME = {2}, PUBLISHER = {Edizioni dell'ORSO (Alessandria, ITA)}, ISBN = {8876949186}, CONFERENCE_NAME = {12° EURALEX International Congress}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {6-9 Settembre 2006}, BOOKTITLE = {Proceedings in 12° EURALEX International Congress, Congresso internazionale di lessicografia}, EDITOR = {Corino, E. and Marello, C. and Onesti, C.}, } @TECHREPORT{QUOCHI_2006_TECHREPORT_QMCDS_157403, AUTHOR = {Quochi, V. and Monachini, M. and Calzolari, N. and Del Gratta, R. and Sassolini, E.}, TITLE = {Bio-Lexicon Model and Preliminary ISO Conformant Data Categories}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157403}, } @MISC{PICCHI_2006_MISC_PMSCP_151557, AUTHOR = {Picchi, E. and Montemagni, S. and Sassolini, E. and Cucurullo, S. and Paoli, M.}, TITLE = {ALTWEB}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151557}, } @MISC{PICCHI_2006_MISC_PSCSC_151561, AUTHOR = {Picchi, E. and Sassi, M. and Ceccotti, M. L. and Sassolini, E. and Cucurullo, S.}, TITLE = {Linguistic Miner}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151561}, } @TECHREPORT{CUCURULLO_2005_TECHREPORT_CMPPS_157373, AUTHOR = {Cucurullo, S. and Montemagni, S. and Paoli, M. and Picchi, E. and Sassolini, E.}, TITLE = {Atlante Lessicale Toscano in rete (ALT-Web). Relazione finale}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157373}, } @TECHREPORT{SASSOLINI_2005_TECHREPORT_SE_373510, AUTHOR = {Sassolini, E. and Evangelista, A.}, TITLE = {P8/L488-01: report tecnico di progetto}, YEAR = {2005}, ABSTRACT = {Diffusione della cultura e valorizzazione del patrimonio letterario della lingua italiana e della lingua araba attraverso una diffusione telematica di banche dati letterarie.}, KEYWORDS = {trattamento della lingua araba, applicazione web}, PAGES = {12}, URL = {https://publications.cnr.it/doc/373510}, } @MISC{MONACHINI_2005_MISC_MSPSR_151547, AUTHOR = {Monachini, M. and Soria, C. and Picchi, E. and Sassolini, E. and Ruffolo, P.}, TITLE = {Procedure e tecniche di acquisizione semi-automatica di terminologie da testi paralleli}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151547}, } @MISC{PICCHI_2005_MISC_PMSCP_151532, AUTHOR = {Picchi, E. and Montemagni, S. and Sassolini, E. and Cucurullo, S. and Paoli, M.}, TITLE = {ALTWEB}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151532}, } @MISC{PICCHI_2005_MISC_PSCSC_151533, AUTHOR = {Picchi, E. and Sassi, M. and Ceccotti, M. L. and Sassolini, E. and Cucurullo, S.}, TITLE = {Linguistic Miner}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151533}, } @INPROCEEDINGS{PICCHI_2004_INPROCEEDINGS_PCCSS_84615, AUTHOR = {Picchi, E. and Ceccotti, M. L. and Cucurullo, S. and Sassi, M. and Sassolini, E.}, TITLE = {Linguistic Miner. An Italian Linguistic Knowledge System}, YEAR = {2004}, ABSTRACT = {Linguistic Miner is a project carried out at ILC whose objective is the development of an integrated system to build, organise and manage a corpus of Italian texts (of various origins and formats), and to design and constantly add new tools for the automatic extraction of tiered linguistic knowledge to be made available for many teaching, publishing, and other cultural purposes. The project is based on a notion that is preliminary to all the systems for corpus-based linguistic analysis: a language represented by the largest possible collection of heterogeneous texts is the best source of linguistic information at any level of analysis considered. The first goals of such a system are the semi-automated construction of an Italian data mine for the extraction of linguistic information, the validation of linguistic patterns, the installation of useful tools and resources for a range of different categories of Italian language users. The main feature of the project is its purpose of building large language reference corpora allowing for the creation and use of effective tools for the handling and processing, as well as the automatic linguistic synthesis, of such corpora.}, KEYWORDS = {linguistic analysis, information extraction}, PAGES = {1811-1814}, URL = {http://www.lrec-conf.org/lrec2004/}, VOLUME = {V}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {26-27-28 Maggio 2004}, BOOKTITLE = {Proceedings of the 4th International Conference on Language Resources and Evaluation}, } @TECHREPORT{GAVRILIDOU_2004_TECHREPORT_GGDLMSPRS_157392, AUTHOR = {Gavrilidou, M. and Giouli, V. and Desipri, E. and Labropoulou, P. and Monachini, M. and Soria, C. and Picchi, E. and Ruffolo, P. and Sassolini, E.}, TITLE = {Report on the multilingual resources production}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157392}, } @ARTICLE{PICCHI_2003_ARTICLE_PSNC_64493, AUTHOR = {Picchi, E. and Sassolini, E. and Nahli, O. and Cucurullo, S.}, TITLE = {Risorse monolingui e multilingui. Corpus bilingue italiano-arabo}, YEAR = {2003}, ABSTRACT = {Abstract - The objective of the project is twofold: on the one hand, the creation and elaboration of software procedures for the Arabic language and, on the other hand, the creation of linguistic resources for the management of large Arabic corpora. The linguistic resources are substantially the following: a) Morphological engine for the Arabic language. The engine is constituted by a number of modules: the algorithms and modules for generation and analysis, an appropriate encoding system for the representation of lexical data and of morphological characteristics of Arabic, the so-called “lemmario”, i.e. the archive of lemmas; b) The automatic alignment of parallel texts in Italian and Arabic language; c) Automatic tagging of Arabic texts, performed by using the above morphological engine; d) Systems for accessing and querying (raw and/or tagged) Arabic texts and parallel Italian-Arabic corpora.}, KEYWORDS = {Morfologia araba, Corpora bilingui, Analisi testuale, Aligner, Tagger}, PAGES = {629-678}, URL = {https://publications.cnr.it/doc/64493}, VOLUME = {18-19}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Ghezzano La Fontina, Italia)}, ISSN = {1824-1573}, JOURNAL = {Linguistica computazionale (Online)}, } @INPROCEEDINGS{PICCHI_2003_INPROCEEDINGS_PCCCFSST_84548, AUTHOR = {Picchi, E. and Ceccotti, M. L. and Cignoni, L. and Cucurullo, N. and Fiorentini, G. and Sassi, M. and Sassolini, E. and Turrini, G.}, TITLE = {Linguistic Miner}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84548}, CONFERENCE_NAME = {Congresso annuale AICA 2003: I costi dell'ignoranza e il valore della conoscenza nella società dell'informazione}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{PICCHI_2002_INPROCEEDINGS_PSNCV_288585, AUTHOR = {Picchi, E. and Sassolini, E. and Nahli, O. and Cucurullo, S. and Vargas, I. M.}, TITLE = {Italian Arabic Linguistic Tools}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/288585}, VOLUME = {Volume II}, CONFERENCE_NAME = {LREC 2002}, CONFERENCE_PLACE = {Las Palmas de Gran Canaria, Spain}, CONFERENCE_DATE = {30th \& 31 May 2002}, BOOKTITLE = {Third International Conference on Language Resources and Evaluation}, } @TECHREPORT{SASSOLINI_2002_TECHREPORT_SN_288591, AUTHOR = {Sassolini, E. and Nahli, O.}, TITLE = {Motore morfologico della lingua araba}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/288591}, }