@INPROCEEDINGS{AGNOLONI_2022_INPROCEEDINGS_ABFMMQRV_472294, AUTHOR = {Agnoloni, T. and Bartolini, R. and Frontini, F. and Montemagni, S. and Marchetti, C. and Quochi, V. and Ruisi, M. and Venturi, G.}, TITLE = {Making Italian Parliamentary Records Machine-Actionable: the Construction of the ParlaMint-IT corpus}, YEAR = {2022}, ABSTRACT = {This paper describes the process of acquisition, cleaning, interpretation, coding and linguistic annotation of a collection of parliamentary debates from the Senate of the Italian Republic covering the COVID-19 pandemic emergency period and a former period for reference and comparison according to the CLARIN ParlaMint prescriptions. The corpus contains 1199 sessions and 79,373 speeches for a total of about 31 million words, and was encoded according to the ParlaCLARIN TEI XML format. It includes extensive metadata about the speakers, sessions, political parties and parliamentary groups. As required by the ParlaMint initiative, the corpus was also linguistically annotated for sentences, tokens, POS tags, lemmas and dependency syntax according to the universal dependencies guidelines. Named entity annotation and classification is also included. All linguistic annotation was performed automatically using state-of-the-art NLP technology with no manual revision. The Italian dataset is freely available as part of the larger ParlaMint 2.1 corpus deposited and archived in CLARIN repository together with all other national corpora. It is also available for direct analysis and inspection via various CLARIN services and has already been used both for research and educational purposes.}, KEYWORDS = {parliamentary debates, CLARIN ParlaMint, corpus creation, corpus annotation}, PAGES = {117-124}, URL = {https://aclanthology.org/2022.parlaclarin-1.17/}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, CONFERENCE_NAME = {Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {20/06/2022}, } @TECHREPORT{BARTOLINI_2020_TECHREPORT_BQMA_453502, AUTHOR = {Bartolini, R. and Quochi, V. and Monachini, M. and Affé, F.}, TITLE = {Relazione di fine progetto "PIM-Piattaforma Integrata Monitoraggio"}, YEAR = {2020}, ABSTRACT = {Il documento presenta l'attività svolta dal CNR-ILC nel ruolo di subcontraente di COMDATA per la realizzazione di moduli di trattamento automatico del linguaggio e la consulenza per l'integrazione di metodi di clustering automatico di documenti nella Digital Library del progetto PIM.}, KEYWORDS = {accesso intelligente al testo, digital library, natural language processing}, PAGES = {156}, URL = {https://publications.cnr.it/doc/453502}, } @ARTICLE{GOGGI_2019_ARTICLE_GPBMBC_411599, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic Query Analysis from the Global Science Gateway}, YEAR = {2019}, ABSTRACT = {Nowadays web portals play an essential role in searching and retrieving information in the several fields of knowledge: they are ever more technologically advanced and designed for supporting the storage of a huge amount of information in natural language originating from the queries launched by users worldwide. Given this scenario, we focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal: the aim is to retrieve information related to social media which as of today represent a considerable source of data more and more widely used for research ends.}, KEYWORDS = {Information Extraction, Query Log, WorldWideScience Alliance, Information gateways, Social Media}, PAGES = {147-155}, URL = {https://publications.cnr.it/doc/411599}, VOLUME = {15}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @INPROCEEDINGS{GOGGI_2019_INPROCEEDINGS_GPBMBC_400343, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic query analysis from the global science gateway}, YEAR = {2019}, ABSTRACT = {We focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal.}, KEYWORDS = {Information Extraction, Terminology}, PAGES = {105-113}, URL = {https://publications.cnr.it/doc/400343}, VOLUME = {20}, ISBN = {978-90-77484-33-3}, CONFERENCE_NAME = {GL20-Twentieth International Conference on Grey Literature: Research Data Fuels and Sustains Grey Literature}, CONFERENCE_PLACE = {New Orleans, USA}, CONFERENCE_DATE = {3-4 December 2018}, BOOKTITLE = {Research Data Fuels and Sustains Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @ARTICLE{GOGGI_2018_ARTICLE_GPRBM_388612, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2018}, ABSTRACT = {"In the electronic age, the World Wide Web has played a major role in making scientific information accessible to a wide audience more rapidly and efficiently. This democratic approach to information dissemination in science is changing the way science is perceived and implemented in our daily lives" (Weintraub, 2000).}, KEYWORDS = {CLARIN-IT, CLARIN-European Research Infrastructure for Language Resources and Technology, Grey Literature}, PAGES = {87-93}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85048643343\&origin=inward}, VOLUME = {14}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @INPROCEEDINGS{BARTOLINI_2018_INPROCEEDINGS_BGMP_387159, AUTHOR = {Bartolini, R. and Goggi, S. and Monachini, M. and Pardelli, G.}, TITLE = {The LREC Workshops Map}, YEAR = {2018}, ABSTRACT = {The aim of this work is to present an overview of the research presented at the LREC workshops over the years 1998-2016 with the aim to shed light on the community represented by workshop participants in terms of country of origin, type of affiliation, gender. There has been also an effort towards the identification of the major topics dealt with as well as of the terminological variations noticed in this time span. Data has been retrieved from the portal of the European Language Resources Association (ELRA) which organizes the conference and the resulting corpus made up of workshops titles and of the related presentations has then been processed using a term extraction tool developed at ILC-CNR.}, KEYWORDS = {corpus creation, terminology, LREC}, PAGES = {557-562}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/summaries/639.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{GOGGI_2018_INPROCEEDINGS_GPRBM_385571, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2018}, ABSTRACT = {This work will provide a map of the documentation archived in the CLARIN infrastructure, whose purpose is to share language resources produced and managed in the various European countries but finally merged into the CLARIN data centers for allowing access, interoperability, reuse and preservation of scientific documentation as well as Grey Literature.}, KEYWORDS = {CLARIN Infrastructure, Language Resources, Grey Literature}, PAGES = {93-99}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2018/03/GL19_Conference_Proceedings.pdf}, VOLUME = {19}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-31-9}, CONFERENCE_NAME = {Nineteenth International Conference on Grey Literature, GL19}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {October 23-24, 2017}, BOOKTITLE = {Nineteenth International Conference on Grey Literature "Public Awareness and Access to Grey Literature"}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GOGGI_2018_INPROCEEDINGS_GPBMBC_395584, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic query analysis from the global science gateway}, YEAR = {2018}, ABSTRACT = {We focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal: the aim is to retrieve information related to social media which as of today represent a considerable source of data more and more widely used for research ends. This project includes eight months of query logs3 registered between July 2017 and February 2018 for a total of 445,827 queries. The analysis mainly concentrates on the semantics of the queries received from the portal clients: it is a process of information retrieval from a rich digital catalogue whose language is dynamic, is evolving and follows - as well as reflects - the cultural changes of our modern society.}, KEYWORDS = {Global Science Gateway, Semantic Query Analysis, Terminology}, PAGES = {93-95}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2018/12/GL20_ProgramBook.pdf}, VOLUME = {20}, ISBN = {978-90-77484-34-0}, CONFERENCE_NAME = {Twentieth International Conference on Grey Literature "Research Data Fuels and Sustains Grey Literature"}, CONFERENCE_PLACE = {New Orleans, USA (Loyola University)}, CONFERENCE_DATE = {December 3-4, 2018}, BOOKTITLE = {Research Data Fuels and Sustains Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @ARTICLE{BARTOLINI_2017_ARTICLE_BPGGB_369103, AUTHOR = {Bartolini, R. and Pardelli, G. and Goggi, S. and Giannini, S. and Biagioni, S.}, TITLE = {A terminological "journey" in the Grey Literature domain}, YEAR = {2017}, ABSTRACT = {"It is by means of terms that the expert usually transfer their knowledge and again through terms scientific communication reaches the highest effectiveness. Therefore we can assert that terminology - in the sense of a set of representative and domain-specific units - is necessary for representing and connecting specialized fields as well as any attempt to represent and/or transfer scientific knowledge requires, more or less extensively, the use of terminology." (Cabré, 2000). "When we read the articles or papers of a particular domain, we can recognize some lexical items in the texts as technical terms. In a domain where new knowledge is generated, new terms are constantly created to fulfill the needs of the domain, while others become obsolete. In addition, existing terms may undergo changes of meaning..." (Kageura K., 1998/1999). Specialized lexicons are made up of the terms which are specific to each field of knowledge, «a subset which is distinct but not separated from the common language» (Cassese, 1992): it is usually difficult to extract the relevant domain-specific terminology, meaning to discern terms which belong to a specialized glossary from those belonging to the common dictionary. The interest in the study of terminology and the "truth" contained in the above definitions has led us to make a "journey" in the Grey Literature (GL) domain in order to offer an overall vision on the terms used and the links between them. Within this scenario, the work analyzes a corpus constituted of the entire amount of full research papers published in the GL conference series over a time-span of more than one decade (2003-2014) with the aim of creating a terminological map of relevant words in the various GL research topics. "... corpora used to extract terminological units can be further investigated to find semantic and conceptual information on terms or to represent conceptual relationships between terms. (Bourigault D. et al., 2001). Another interesting inquiry is the terminology used in the GL conferences for describing the types of documents which can be detected (Pej?ová P. et al., 2012).}, KEYWORDS = {Grey Literature, Information Extraction IE, Terminology}, PAGES = {41-53}, URL = {http://www.greynet.org/thegreyjournal/currentissue.html}, VOLUME = {13}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @INCOLLECTION{MANZELLA_2017_INCOLLECTION_MBBDDFMMMNS_368363, AUTHOR = {Manzella, G. M. R. and Bartolini, R. and Bustaffa, F. and D'Angelo, P. and De Mattei, M. and Frontini, F. and Maltese, M. and Medone, D. and Monachini, M. and Novellino, A. and Spada, A.}, TITLE = {Semantic Search Engine for Data Management and Sustainable Development: Marine Planning Service Platform}, YEAR = {2017}, ABSTRACT = {This chapter presents a computer platform supporting a Marine Information and Knowledge System based on a repository that gathers, classify and structures marine scientific literature and data, guaranteeing their accessibility by means of standard protocols. This requires the access to quality controlled data and to information that is provided in grey literature and/or in relevant scientific literature. There exist efforts to develop search engines to find author's contributions to scientific literature or publications. This implies the use of persistent identifiers. However very few efforts are dedicated to link publications to data that was used, or cited in them or that can be of importance for the published studies. Full-text technologies are often unsuccessful since they assume the presence of specific keywords in the text; to fix this problem,it is suggested to use different semantic technologies for retrieving the text and data and thus getting much more complying results.}, KEYWORDS = {Marine Information and Knowledge System}, PAGES = {127-154}, URL = {http://www.igi-global.com/chapter/semantic-search-engine-for-data-management-and-sustainable-development/166839#}, VOLUME = {Volume 7}, DOI = {10.4018/978-1-5225-0700-0.ch006}, PUBLISHER = {IGI Global (Hershey, USA)}, BOOKTITLE = {Oceanographic and Marine Cross-Domain Data Management for Sustainable Development}, EDITOR = {Diviacco, P. and Leadbetter, A. and Glaves, H.}, } @INPROCEEDINGS{BARTOLINI_2017_INPROCEEDINGS_BPGGB_368487, AUTHOR = {Bartolini, R. and Pardelli, G. and Goggi, S. and Giannini, S. and Biagioni, S.}, TITLE = {A terminological "journey" in the Grey Literature domain}, YEAR = {2017}, ABSTRACT = {The work analyzes a corpus constituted of the entire amount of full research papers published in the GL conference series over a time-span of more than one decade (2003-2014) with the aim of creating a terminological map of relevant words in the various GL research topics. "... corpora used to extract terminological units can be further investigated to find semantic and conceptual information on terms or to represent conceptual relationships between terms. (Bourigault D. et al., 2001). Another interesting inquiry is the terminology used in the GL conferences for describing the types of documents which can be detected (Pej?ová P. et al., 2012).}, KEYWORDS = {Grey Literature, Information Extraction IE, Terminology}, PAGES = {117-130}, URL = {https://publications.cnr.it/doc/368487}, VOLUME = {18}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-30-2}, CONFERENCE_NAME = {Eighteenth International Conference on Grey Literature (GL18): Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {New York, US}, CONFERENCE_DATE = {November 28-29, 2016}, BOOKTITLE = {Proceedings of the Eighteenth International Conference on Grey Literature (GL18): Leveraging Diversity in Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{PARDELLI_2017_INPROCEEDINGS_PGBRM_367782, AUTHOR = {Pardelli, G. and Goggi, S. and Bartolini, R. and Russo, I. and Monachini, M.}, TITLE = {A Geographical Visualization of GL Communities: A Snapshot}, YEAR = {2017}, ABSTRACT = {This quotation stresses the important role of the several international organizations in producing and disseminating knowledge in the field of Grey Literature (GL): the paper aims to provide a first snapshot of the geographical distribution of GL organizations and their participation to the annual International Conference on Grey Literature over the time (in the period from 2003 to 2015. See List of Conferences on Table 2 ). Nowadays a visual representation of data is often associated with the traditional statistical graphs, in particular for representing complex phenomena by means of maps and diagrams, which allow a deeper and more focused analysis of the data. In our case the geographical representation of stakeholders in government, academics, business and industry aims at visualizing the GL community across the globe: it concerns 674 organizations which over the years have contributed to the development of a common vision on the most pressing issues of the field by using new paradigms such as Open Access and the social networks.}, KEYWORDS = {Geographical Visualization, Grey Literature Communities}, PAGES = {109-113}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2017/04/GL18_Conference_Proceedings.pdf}, VOLUME = {18}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-30-2}, CONFERENCE_NAME = {Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {Washington}, CONFERENCE_DATE = {November 28-29, 2016}, BOOKTITLE = {GL18 Conference Proceedings Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{BARTOLINI_2017_INPROCEEDINGS_BGPRFF_377073, AUTHOR = {Bartolini, R. and Goggi, S. and Pardelli, G. and Russo, I. and Farace, D. and Frantzen, J.}, TITLE = {Data Visualization of a Grey Literature Community: A Cooperative Project}, YEAR = {2017}, ABSTRACT = {The expected outcome of this project will not only produce a revised and updated publication of International Directory of Organizations in Grey Literature, IDGL, but will also provide a visual overview of GreyNet as an international organization serving diverse communities with shared interests in grey literature. It would be a demonstration of GreyNet's commitment to research, publication, open access, education, and public awareness in this field of library and information science.}, KEYWORDS = {International Directory of Organizations in Grey Literature, Data Visualization}, PAGES = {63-63}, URL = {https://publications.cnr.it/doc/377073}, VOLUME = {19}, ISBN = {978-90-77484-32-6}, CONFERENCE_NAME = {Nineteenth International Conference on Grey Literature, GL19}, CONFERENCE_PLACE = {Rome, National Research Council, CNR}, CONFERENCE_DATE = {October 23-24, 2017}, BOOKTITLE = {Nineteenth International Conference on Grey Literature Public Awareness and Access to Grey Literature. Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GOGGI_2017_INPROCEEDINGS_GPRBM_377070, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2017}, ABSTRACT = {This work will provide a map of the documentation archived in the CLARIN infrastructure, whose purpose is to share language resources produced and managed in the various European countries but finally merged into the CLARIN data centers for allowing access, interoperability, reuse and preservation of scientific documentation as well as Grey Literature.}, KEYWORDS = {CLARIN ERIC, Terminological Resources, Grey Literature}, PAGES = {60-62}, URL = {https://publications.cnr.it/doc/377070}, VOLUME = {19}, ISBN = {978-90-77484-32-6}, CONFERENCE_NAME = {Nineteenth International Conference on Grey Literature, GL19}, CONFERENCE_PLACE = {Rome, National Research Council, CNR}, CONFERENCE_DATE = {October 23-24, 2017}, BOOKTITLE = {Nineteenth International Conference on Grey Literature Public Awareness and Access to Grey Literature. Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @ARTICLE{GOGGI_2016_ARTICLE_GPBFMMDB_359144, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2016}, ABSTRACT = {Here we present the final results of the MAPS (Marine Planning and Service Platform) project, an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. The system takes as input non-textual data (measurements) and text - both published papers and documentation - and it provides an advanced search facility thanks to the rich set of metadata and, above all, to the possibility of a refined and domain targeted key-word indexing of texts using Natural Language Processing (NLP) techniques. The paper describes the system in its details providing also evidence of evaluation.}, KEYWORDS = {Information Extraction, Search Engine, Operative Oceanography}, PAGES = {155-161}, URL = {http://www.greynet.org/thegreyjournal/currentissue.html}, VOLUME = {12}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @INPROCEEDINGS{DELGRATTA_2016_INPROCEEDINGS_DFMPRBKSC_355425, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Khan, F. and Soria, C. and Calzolari, N.}, TITLE = {LREC as a Graph: People and Resources in a Network}, YEAR = {2016}, ABSTRACT = {This proposal describes a new way to visualise resources in the LREMap, a community-built repository of language resource descriptions and uses. The LREMap is represented as a force-directed graph, where resources, papers and authors are nodes. The analysis of the visual representation of the underlying graph is used to study how the community gathers around LRs and how LRs are used in research.}, KEYWORDS = {Language Resources, Resources Documentation, Data Visualisation}, PAGES = {2529-2532}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{GOGGI_2016_INPROCEEDINGS_GPBFMMDB_350374, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2016}, ABSTRACT = {Here we present the final results of the MAPS (Marine Planning and Service Platform) project, an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. The system takes as input non-textual data (measurements) and text - both published papers and documentation - and it provides an advanced search facility thanks to the rich set of metadata and, above all, to the possibility of a refined and domain targeted key-word indexing of texts using Natural Language Processing (NLP) techniques. The paper describes the system in its details providing also evidence of evaluation.}, KEYWORDS = {Information Extraction, Search Engine, Operative Oceanography}, PAGES = {104-111}, URL = {https://publications.cnr.it/doc/350374}, VOLUME = {17}, ISBN = {978-90-77484-27-2}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {December 1st-2nd 2015}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{BARTOLINI_2016_INPROCEEDINGS_BPGGB_362848, AUTHOR = {Bartolini, R. and Pardelli, G. and Goggi, S. and Giannini, S. and Biagioni, S.}, TITLE = {A terminological "journey" in the Grey Literature domain}, YEAR = {2016}, ABSTRACT = {"When we read the articles or papers of a particular domain, we can recognize some lexical items in the texts as technical terms. In a domain where new knowledge is generated, new terms are constantly created to fulfil the needs of the domain, while others become obsolete. In addition, existing terms may undergo changes of meaning..." (Kageura K.,1998/1999). According to Kaugera, our aim with this work is to make a "journey" in the Grey Literature (GL) domain in order to offer an overall vision on the terms used and the links" "between them. Moreover, by performing a terminological comparison over a given period of time it could be possible to trace the presence of obsolete words as well as of neologisms in the most recent research fields.Within this scenario, the work analyzes a corpus constituted of the entire amount of full" "research papers published in the GL conference series over a time span of more than one decade (2003-2014) with the aim of creating a terminological map of relevant words. "... corpora used to extract terminological units can be further investigated to find semantic and conceptual information on terms or to represent conceptual relationships between terms. (Bourigault D. et al., 2001). Another interesting inquiry is the terminology used in the GL conferences for describing the types of documents (Pej?ová P. et al., 2012). The work is split up in four sections: creation of the corpus by acquiring the digital papers of GL conference proceedings (GL5 - GL16)1; data cleaning; data processing; terminological" "analysis and comparison. The corpus - made up of 231 research papers (for a total amount of 785.042 tokens) - was processed using a Natural Language Processing (NLP) tool for term extraction developed at the Institute of Computational Linguistics "Antonio Zampolli" of CNR (Goggi et al. 2015; 2016). This tool is what is called a "pipeline" (that is, a sequence of different tools) which extracts lexical knowledge from texts: in short, this is a rule system tool for knowledge extraction and document indexing that combines NLP technologies for term extraction and techniques to measure the associative strength of multi-words. This tool extracts a list of single (monograms) and multi-word terms (bigrams and trigrams) ordered by frequency with respect to the context. The pipeline - used as semantic engine within the MAPS project - has been customized for the extraction of terms from our corpus. This survey on the results of the information extraction process performed by the described NLP tool has been a sort of linguistic path in the past and present of terminology used in GL proceedings. By means of samplings, it has been possible to obtain the terminological flow in GL domain and to determine if and how the lexicon was evolving over these twelve years and investigate on its dynamic nature.}, KEYWORDS = {Grey Literature, Digital Repositories, Open Access}, PAGES = {79-84}, URL = {https://publications.cnr.it/doc/362848}, VOLUME = {18}, ISBN = {978-90-77484-29-6}, CONFERENCE_NAME = {GL18-Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {New York, US}, CONFERENCE_DATE = {28-29 November 2016}, BOOKTITLE = {Leveraging Diversity in Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{MANZELLA_2016_INPROCEEDINGS_MBBDDFMMMNS_355476, AUTHOR = {Manzella, G. M. R. and Bartolini, R. and Bustaffa, F. and D'Angelo, P. and De Mattei, M. and Frontini, F. and Maltese, M. and Medone, D. and Monachini, M. and Novellino, A. and Spada, A.}, TITLE = {Marine Planning and Service Platform: Specific Ontology Based semantic Search Engine Serving Data Management and Sustainable Development}, YEAR = {2016}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is aiming at building a computer platform supporting a Marine Information and Knowledge System. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. In oceanography the cost related to data collection is very high and the new paradigm is based on the concept to collect once and re-use many times (for re-analysis, marine environment assessment, studies on trends, etc). This concept requires the access to quality controlled data and to information that is provided in reports (grey literature) and/or in relevant scientific literature. Hence, creation of new technology is needed by integrating several disciplines such as data management, information systems, knowledge management...}, KEYWORDS = {Marine Information, Knowledge System}, PAGES = {2}, URL = {http://meetingorganizer.copernicus.org/EGU2016/orals/20144}, VOLUME = {18}, PUBLISHER = {Copernicus GmbH (Katlenburg-Lindau, Germania)}, ISSN = {1607-7962}, CONFERENCE_NAME = {European Geosciences Union General Assembly (EGU 2016)}, CONFERENCE_PLACE = {Vienna, Austria}, CONFERENCE_DATE = {17-22 aprile 2016}, BOOKTITLE = {Geophysical research abstracts (Online)}, } @INPROCEEDINGS{PARDELLI_2016_INPROCEEDINGS_PGMBR_362073, AUTHOR = {Pardelli, G. and Goggi, S. and Monachini, M. and Bartolini, R. and Russo, I.}, TITLE = {A Geographical Visualization of GL Community: a Snapshot}, YEAR = {2016}, ABSTRACT = {"Today, in the spirit of science, grey literature communities are called to demonstrate their know-how and merit to wider audiences" [Farace Dominic J., 2011]. This quotation stresses the important role of the several international organizations in producing and disseminating knowledge in the field of Grey Literature (GL): the paper aims to provide a first snapshot of the geographical distribution of GL organizations and their participation to the annual International Conference on Grey Literature over the time (in the period from 2003 to 2015). Nowadays a visual representation of data is often associated with the traditional statistical graphs, in particular for representing complex phenomena by means of maps and diagrams, which allow a deeper and more focused analysis of the data. In our case the geographical representation of stakeholders in government, academics, business and industry aims at visualizing the GL community across the globe: it concerns 675 organizations which over the years have contributed to the development of a common vision on the most pressing issues of the field by using new paradigms such as Open Acces and the social networks.}, KEYWORDS = {Geographical Visualization, Grey Literature}, PAGES = {67-67}, URL = {https://publications.cnr.it/doc/362073}, VOLUME = {18}, ISBN = {978-90-77484-29-6}, CONFERENCE_NAME = {Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {New York}, CONFERENCE_DATE = {November 28-29, 2016}, BOOKTITLE = {GL18 Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @ARTICLE{GOGGI_2015_ARTICLE_GMFBPDBM_334894, AUTHOR = {Goggi, S. and Monachini, M. and Frontini, F. and Bartolini, R. and Pardelli, G. and De Mattei, M. and Bustaffa, F. and Manzella, G.}, TITLE = {Marine Planning and Service Platform (MAPS) An Advanced Research Engine for Grey Literature in Marine Science}, YEAR = {2015}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting a Marine Information and Knowledge System, as part of the data management activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. We will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced search engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the great impact that the processing, re-use as well as application of grey data have on societal needs/problems and their answers.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {171-178}, URL = {https://publications.cnr.it/doc/334894}, VOLUME = {11}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @INPROCEEDINGS{DELGRATTA_2015_INPROCEEDINGS_DFMPRBGKQSC_342213, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Goggi, S. and Khan, F. and Quochi, V. and Soria, C. and Calzolari, N.}, TITLE = {Visualising Italian Language Resources: a Snapshot}, YEAR = {2015}, ABSTRACT = {This paper aims to provide a first snapshot of Italian Language Resources (LRs) and their uses by the community, as documented by the papers presented at two different conferences, LREC2014 and CLiC-it 2014. The data of the former were drawn from the LOD version of the LRE Map, while those of the latter come from manually analyzing the proceedings. The results are presented in the form of visual graphs and confirm the initial hypothesis that Italian LRs require concrete actions to enhance their visibility.}, KEYWORDS = {Italian Language Resources}, PAGES = {100-104}, URL = {https://books.openedition.org/aaccademia/1277?lang=it}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 December 2015}, BOOKTITLE = {Proceedings of the Second Italian Conference on Computational Linguistics CLiC-it 2015}, EDITOR = {Bosco, C. and Tonelli, S. and Zanzotto, F. M.}, } @INPROCEEDINGS{GOGGI_2015_INPROCEEDINGS_GMFBPDBM_329370, AUTHOR = {Goggi, S. and Monachini, M. and Frontini, F. and Bartolini, R. and Pardelli, G. and De Mattei, M. and Bustaffa, F. and Manzella, G.}, TITLE = {Marine Planning and Service Platform (MAPS): An Advanced Research Engine for Grey Literature in Marine Science}, YEAR = {2015}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting a Marine Information and Knowledge System, as part of the data management activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. We will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced search engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the great impact that the processing, re-use as well as application of grey data have on societal needs/problems and their answers.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {108-114}, URL = {http://www.textrelease.com/gl16program.html}, VOLUME = {16}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-23-4}, CONFERENCE_NAME = {Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change}, CONFERENCE_PLACE = {Library of Congress Washington D. C., USA}, CONFERENCE_DATE = {December 8-9 2014}, BOOKTITLE = {Grey Literature Lobby: Engines and Requesters for Change}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GOGGI_2015_INPROCEEDINGS_GPBFMMDB_342221, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2015}, ABSTRACT = {Here we present the final results of MAPS (Marine Planning and Service Platform), an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. In previous publications the general architecture of the system as well as the set of metadata (Common Data Index) used to describe the documents were presented [3]; it was shown how individual oceanographic data-sets could be indexed within the MAPS library by types of measure, measurement tools, geographic areas, and also linked to specific textual documentation. Documentation is described using the current international standards: Title, Authors, Publisher, Language, Date of publication, Body/Institution, Abstract, etc.; serial publications are described in terms of ISSN, while books are assigned ISBN; content of various types on electronic networks is described by means of doi and url. Each description is linked to the document. Thanks to this, the MAPS library already enables researchers to go from structured oceanographic data to documents describing it. But this was not enough: documents may contain important information that has not been encoded in the metadata. Thus an advanced Search Engine was put in place that uses semantic-conceptual technologies in order to extract key concepts from unstructured text such as technical documents (reports and grey literature) and scientific papers and to make them indexable and searchable by the end user in the same way as the structured data (such as oceanographic observations and metadata) is. More specifically once a document is uploaded in the MAPS library, key domain concepts in documents are extracted via a natural language processing pipeline and used as additional information for its indexing. The key term identification algorithm is based on marine concepts that were pre-defined in a domain ontology, but crucially it also allows for the discovery of new related concepts. So for instance starting from the domain term salinity, related terms such as sea salinity and average sea salinity will also be identified as key terms and used for indexing and searching documents. A hybrid search system is then put in place, where users can search the library by metadata or by free text queries. In the latter case, the NLP pipeline performs an analysis of the text of the query, and when key concepts are matched, the relevant documents are presented. The results may be later refined by using other structured information (e.g. date of publication, area, ...). Currently a running system has been put in place, with data from satellites, buoys and sea stations; such data is documented and searchable by its relevant metadata and documentation. Results of quantitative evaluation in terms of information retrieval measures will be presented in the poster; more specifically, given an evaluation set defined by domain experts and composed of pre-defined queries together with documents that answer such queries, it will be shown how the system is highly accurate in retrieving the correct documents from the library. Though this work focuses on oceanography, its results may be easily extended to other domains; more generally, the possibility of enhancing the visibility and accessibility of grey literature via its connection to the data it describes and to an advanced full text indexing are of great relevance for the topic of this conference.}, KEYWORDS = {Information Extraction, Search Engine, Oceanography}, PAGES = {76-77}, URL = {https://publications.cnr.it/doc/342221}, VOLUME = {17}, ISBN = {978-90-77484-26-5}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {December 1-2}, BOOKTITLE = {GL17 Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{BARTOLINI_2014_INPROCEEDINGS_BQDRM_286944, AUTHOR = {Bartolini, R. and Quochi, V. and De Felice, I. and Russo, I. and Monachini, M.}, TITLE = {From Synsets to Videos: Enriching ItalWordNet Multimodally}, YEAR = {2014}, ABSTRACT = {The paper describes the multimodal enrichment of ItalWordNet action verbs' entries by means of an automatic mapping with a conceptual ontology of action types instantiated by video scenes (ImagAct). The two resources present significative differences as well as interesting complementary features, such that a mapping of these two resources can lead to a an enrichment of IWN, through the connection between synsets and videos apt to illustrate the meaning described by glosses. Here, we describe an approach inspired by ontology matching methods for the automatic mapping of ImagAct video scenes onto ItalWordNet. The experiments described in the paper are conducted on Italian, but the same methodology can be extended to other languages for which WordNets have been created, since ImagAct is available also for English, Chinese and Spanish. This source of multimodal information can be exploited to design second language learning tools, as well as for language grounding in action recognition in video sources and potentially for robotics.}, KEYWORDS = {Action ontology, Multimodality, WordNet}, PAGES = {3110-3117}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {LREC 2014. European Language Resources Association ELRA: Paris (Francia)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{DEFELICE_2014_INPROCEEDINGS_DBRQM_291282, AUTHOR = {De Felice, I. and Bartolini, R. and Russo, I. and Quochi, V. and Monachini, M.}, TITLE = {Evaluating ImagAct-WordNet mapping for English and Italian through videos}, YEAR = {2014}, ABSTRACT = {In this paper we present the results of the evaluation of an automatic mapping between two lexical resources, WordNet/ItalWordNet and ImagAct, a conceptual ontology of action types instantiated by video scenes. Results are compared with those obtained from a previous experiment performed only on Italian data. Differences between the two evaluation strategies, as well as between the quality of the mappings for the two languages considered in this paper, are iscussed.}, KEYWORDS = {Language Resources (LRs)}, PAGES = {128-131}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-CLICit-2014.pdf}, DOI = {10.12871/CLICIT2014126}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-88-67-41472-7}, CONFERENCE_NAME = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 \& the Fourth International Workshop EVALITA 2014. Pisa University Press srl: Pisa (Italia)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 December 2014, Pisa}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{GOGGI_2014_INPROCEEDINGS_GMFBPDBM_291816, AUTHOR = {Goggi, S. and Monachini, M. and Frontini, F. and Bartolini, R. and Pardelli, G. and De Mattei, M. and Bustaffa, F. and Manzella, G.}, TITLE = {Marine Planning and Service Platform (MAPS): An Advanced Research Engine for Grey Literature in Marine Science}, YEAR = {2014}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting Operative Oceanography in its activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. Community and Requirements. Operative Oceanography is the branch of marine research which deals with the development of integrated systems for examining and modeling the ocean monitoring and forecast. Experts need access to real-time data on the state of the sea such as forecasts on temperatures, streams, tides and the relevant scientific literature. This finds application in many areas, ranging from civilian and military safety to protection of off-shore and coastal infrastructures. The metadata. The set of metadata associated with marine data is defined in the CDI (Common Data Index) documented standard. They encode: the types of sizes which have been measured; the measurement tools the platform which has been employed; the geographic area where measures have been taken; the environmental matrix; the descriptive documentation. As concerns the scientific documentation, at the current stage of the CDI standard, a document is shaped around the following metadata: Title, Authors, Version, ISBN/DOI, Topic, Date of publication, Body/Institution, Abstract. The search engine. The query system (which is actually under development) has been designed for operating with structured data - the metadata - and raw data - the associated technical and scientific documentation. Full-text technologies are often unsuccessful when applied to this type of queries since they assume the presence of specific keywords in the text; in order to fix this problem, the MAPS project suggests to use different emantic technologies for retrieving the text and data and thus getting much more complying results. In the Poster we will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced earch engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the 2 great impact that the processing, re-use as well as application of grey data have on societal needs/problems and their answers.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {93-94}, URL = {http://greyguide.isti.cnr.it/dfdownloadnew.php?ident=GLConference/GL16/2014-G01-015\&langver=en\&scelta=Metadata}, ISBN = {978-90-77484-24-1}, CONFERENCE_NAME = {Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change}, CONFERENCE_PLACE = {Library of Congress Washington D. C., USA}, CONFERENCE_DATE = {December 8-9, 2014}, EDITOR = {Farace, C. B. D. and Frantzen, J.}, } @TECHREPORT{DEMATTEI_2014_TECHREPORT_DMDMBF_335399, AUTHOR = {De Mattei, M. and Medone, D. and D'Angelo, P. and Monachini, M. and Bartolini, R. and Frontini, F.}, TITLE = {MAPS: Architettura del Sistema}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il presente documento è il deliverable "D3.1 - Architettura del Sistema" del progetto MAPS (Marine Planning and Service Platform). Il progetto MAPS è un'evoluzione del progetto precedente Marine. Tale evoluzione si articola su tre aspetti diversi: - Un meccanismo di federazione dei dati, che consenta di rendere disponibili ai propri utenti non soltanto i dati prodotti internamente da sistema Marine ma anche quelli resi disponibili da altri sistemi similari, soddisfacendo così un più ampio ambito di esigenze informative. Il deliverable D2.2, Modello della Soluzione specifica in dettaglio queste nuove funzionalità. - Un Catalogo dei Documenti che, conservando la documentazione tecnica e scientifica dei prodotti offerti, possa documentare in modo accurato le modalità di misurazione, elaborazione e controllo dei prodotti forniti e quindi i relativi ambiti di applicabilità. - Un sistema di ricerca capace di selezionare i dati necessari ad uno scopo determinato non soltanto sulla base della loro tipologia, della loro dislocazione territoriale o di altre informazioni simili contenute nei metadati associati come avviene oggi nella maggior parte dei sistemi esistenti, ma anche sulla base delle informazioni contenute nella documentazione tecnica e scientifica. Tali funzionalità sono specificate nel deliverable D1.3 - Modello della Soluzione.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {1-35}, URL = {https://publications.cnr.it/doc/335399}, } @TECHREPORT{DEMATTEI_2014_TECHREPORT_DMMFBM_335403, AUTHOR = {De Mattei, M. and Medone, D. and Maltese, M. and Frontini, F. and Bartolini, R. and Monachini, M.}, TITLE = {META: Report di progettazione degli algoritmi individuati}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il deliverable definisce l'architettura del Sistema di Estrazione Eventi Meteo realizzato dagli autori nell'ambito del progetto META. Il sistema estrae da contenuti online informazione su eventi meteo critici verificatesi in Liguria e nel nord della Toscana.}, KEYWORDS = {Ontology, Information Extraction, Taxonomy}, PAGES = {1-19}, URL = {https://publications.cnr.it/doc/335403}, } @TECHREPORT{FRONTINI_2014_TECHREPORT_FBM_335400, AUTHOR = {Frontini, F. and Bartolini, R. and Monachini, M.}, TITLE = {MAPS: Stato dell'Arte}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012 Il documento descrive lo stato dell'arte delle tecnologie linguistiche applicate ai sistemi di ricerca semantica.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {1-21}, URL = {https://publications.cnr.it/doc/335400}, } @TECHREPORT{FRONTINI_2014_TECHREPORT_FBM_335402, AUTHOR = {Frontini, F. and Bartolini, R. and Monachini, M.}, TITLE = {META:-Report sui modelli e tecniche linguistiche}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il deliverable riassume lo stato dell'arte delle tecnologie semantiche che possono essere impiegate nella realizzazione del progetto META. Il progetto META è una progetto di ricerca e sviluppo tecnologico finanziato dalla Regione Liguria con i fondi POR-FESR 2007-2013 della Comunità Europea che mira alla realizzazione di un sistema per l'allerta di eventi meteo critici in Liguria e nel nord della Toscana. Nell'ambito del progetto META le tecnologie semantiche sono utilizzate per estrarre eventi meteo di interesse da articoli pubblicati in rete o sui social network.}, KEYWORDS = {Ontology, Information Extraction, Semantic Web, Search Engine}, PAGES = {1-20}, URL = {https://publications.cnr.it/doc/335402}, } @TECHREPORT{FRONTINI_2014_TECHREPORT_FBMPG_287039, AUTHOR = {Frontini, F. and Bartolini, R. and Monachini, M. and Pardelli, G. and Goggi, S.}, TITLE = {Stato dell'arte dei motori semantici. Progetto MAPS, programma operativo regionale POR-FESR (2007-2013)}, YEAR = {2014}, ABSTRACT = {Il presente documento è il deliverable "D1.1 - Stato dell'Arte dei motori semantici del progetto MAPS (Marine Planning and Service Platform). Il progetto MAPS è una evoluzione del progetto precedente Marine. Tramite il progetto Marine (Bando Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013 - pos n.1) è stata realizzata una piattaforma informatica di supporto all'Oceanografia Operativa capace di raccogliere dati marini per renderli poi disponibili ai ricercatori e alle organizzazioni interessate tramite protocolli standard. Lo scopo del progetto MAPS è quello di realizzare una Catalogo di Documenti contenente informazioni per la piattaforma Marine. Caratteristica di MAPS è di fornire accesso ai dati oceanografici sia attraverso la ricerca per metadati, sia attraverso la ricerca semantica contenuta nella manualistica tecnico scientifica di riferimento.}, PAGES = {1-22}, URL = {https://publications.cnr.it/doc/287039}, } @INPROCEEDINGS{FRONTINI_2012_INPROCEEDINGS_FABBMPPS_278677, AUTHOR = {Frontini, F. and Aliprandi, C. and Bacciu, C. and Bartolini, R. and Marchetti, A. and Parenti, E. and Piccinonno, F. and Soru, T.}, TITLE = {GLOSS, an infrastructure for the semantic annotation and mining of documents in the public security domain}, YEAR = {2012}, ABSTRACT = {Efficient access to information is crucial in the work of organizations that require decision taking in emergency situations. This paper gives an outline of GLOSS, an integrated system for the analysis and retrieval of data in the environmental and public security domain. We shall briefly present the GLOSS infrastructure and its use, and how semantic information of various kinds is integrated, annotated and made available to the final users.}, KEYWORDS = {semantic annotation, text mining, geographic data}, PAGES = {21-25}, URL = {https://publications.cnr.it/doc/278677}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {Eight International Conference on Language Resources and Evaluation. LREC'12. European Language Resources Association: France}, CONFERENCE_PLACE = {Istanbul}, CONFERENCE_DATE = {21-27/05/2012}, } @TECHREPORT{ALIPRANDI_2012_TECHREPORT_ABBFLMPS_221743, AUTHOR = {Aliprandi, C. and Bacciu, C. and Bartolini, R. and Frontini, F. and Lapolla, N. and Marchetti, A. and Piccinonno, F. and Soru, T.}, TITLE = {Specifiche architetturali e funzionali}, YEAR = {2012}, ABSTRACT = {Questo documento contiene le specifiche funzionali ed architetturali del sistema GLOSS elaborate come risultato dell'obiettivo operativo 1. Tali specifiche debbono essere di riferimento per tutte le fasi di sviluppo dei vari componenti del sistema stesso e della loro integrazione in un prototipo dimostrativo. Ad una breve introduzione che richiama gli obiettivi generali del progetto, seguono: 1. La descrizione delle funzionalità suddivisa nelle varie fasi che compongono il flusso operativo di GLOSS. 2. La descrizione dell'architettura del sistema da realizzare nella quale si fornisce lo schema dell'integrazione dei vari componenti, il protocollo di comunicazione e memorizzazione dei dati che viene trattato più nel dettaglio nel documento D1.2 GAF - Gloss Annotation Format, e la descrizione di ciascun componente del sistema. Per sua natura, questo documento sarà soggetto a revisione durante tutto il periodo di sviluppo del sistema. Questa prima versione deve intendersi come guida per l'implementazione ed ha lo scopo di fornire a chi partecipa a questo progetto una visione generale delle funzionalità di GLOSS e come queste dovranno essere integrate nel prototipo dimostratore.}, KEYWORDS = {GLOSS specifiche funzionali}, URL = {https://publications.cnr.it/doc/221743}, } @TECHREPORT{QUOCHI_2012_TECHREPORT_QFBHPPBTTK_221616, AUTHOR = {Quochi, V. and Frontini, F. and Bartolini, R. and Hamon, O. and Poch Riera, M. and Padro, M. and Bel, N. and Thurmair, G. and Toral, A. and Kamran, A.}, TITLE = {D7. 4 Third evaluation report. Evaluation of PANACEA v3 and produced resources}, YEAR = {2012}, ABSTRACT = {D7.4 reports on the evaluation of the different components integrated in the PANACEA third cycle of development as well as the final validation of the platform itself. All validation and evaluation experiments follow the evaluation criteria already described in D7.1. The main goal of WP7 tasks was to test the (technical) functionalities and capabilities of the middleware that allows the integration of the various resource-creation components into an interoperable distributed environment (WP3) and to evaluate the quality of the components developed in WP5 and WP6. The content of this deliverable is thus complementary to D8.2 and D8.3 that tackle advantages and usability in industrial scenarios. It has to be noted that the PANACEA third cycle of development addressed many components that are still under research. The main goal for this evaluation cycle thus is to assess the methods experimented with and their potentials for becoming actual production tools to be exploited outside research labs. For most of the technologies, an attempt was made to re-interpret standard evaluation measures, usually in terms of accuracy, precision and recall, as measures related to a reduction of costs (time and human resources) in the current practices based on the manual production of resources. In order to do so, the different tools had to be tuned and adapted to maximize precision and for some tools the possibility to offer confidence measures that could allow a separation of the resources that still needed manual revision has been attempted. Furthermore, the extension to other languages in addition to English, also a PANACEA objective, has been evaluated. The main facts about the evaluation results are now summarized.}, KEYWORDS = {PANACEA, evaluation, machine translation}, URL = {http://hdl.handle.net/10230/22533}, } @TECHREPORT{BARTOLINI_2011_TECHREPORT_BPHTPRTPB_206269, AUTHOR = {Bartolini, R. and Poch, M. and Hamon, O. and Toral, A. and Prokopidis, P. and Rubino, F. and Thurmair, G. and Papavassiliou, V. and Bel, N.}, TITLE = {D3. 3 Second version (v2) of the integrated platform and documentation}, YEAR = {2011}, ABSTRACT = {the integrated platform and documentation of panacea}, KEYWORDS = {platform}, URL = {https://publications.cnr.it/doc/206269}, } @TECHREPORT{VOSSEN_2011_TECHREPORT_VBRASADHMBF_206329, AUTHOR = {Vossen, P. and Bosma, W. and Rigau, G. and Agirre, E. and Soroa, A. and Aliprandi, C. and De Jonge, J. and Hielkema, F. and Monachini, M. and Bartolini, R. and Frontini, F.}, TITLE = {KyotoCore: integrated system for knowledge mining from text}, YEAR = {2011}, ABSTRACT = {In this deliverable, we describe KyotoCore, an integrated system for applying text mining. We describe the software architecture of KyotoCore, the single modules and the process flows. Finally, we describe a use case where we apply the complete process toan English database on estuaries.}, KEYWORDS = {Knowledge and text mining software}, PAGES = {56}, URL = {https://publications.cnr.it/doc/206329}, } @INPROCEEDINGS{DELGRATTA_2010_INPROCEEDINGS_DDBCEMQSTC_84782, AUTHOR = {Del Gratta, R. and D'Onofrio, L. and Bartolini, R. and Caselli, T. and Enea, A. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A. and Calzolari, N.}, TITLE = {A Web-based Architecture for Interoperability of Lexical Resources}, YEAR = {2010}, ABSTRACT = {In this paper we present aWeb Service Architecture for managing high level interoperability of Language Resources (LRs) by means of a Service Oriented Architecture (SOA) and the use of ISO standards, such as ISO LMF. We propose a layered architecture which separates the management of legacy resources (data collection) from data aggregation (workflow) and data access (user requests). We provide a case study to demonstrate how the proposed architecture is capable of managing data exchange among different lexical services in a coherent way and show how the use of a lexical standard becomes of primary importance when a protocol of interoperability is defined.}, KEYWORDS = {Interoperability, Web sercives, Lexical resources}, PAGES = {53-62}, URL = {http://weblab.iit.cnr.it/kyoto/www2.let.vu.nl/twiki/pub/Kyoto/Publications/icgl2010_DOnofrioetal.pdf}, PUBLISHER = {City university of Hong Kong press (Hong Kong, CHN)}, ISBN = {978-962-442-323-5}, CONFERENCE_NAME = {2nd International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {18-20 January 2010}, BOOKTITLE = {2nd International Conference on Global Interoperability for Language Resources, ICGL 2010}, EDITOR = {Fang, A. C. and Ide, N. and Webster, J.}, } @INPROCEEDINGS{VOSSEN_2010_INPROCEEDINGS_VRASMB_184375, AUTHOR = {Vossen, P. and Rigau, G. and Agirre, E. and Soroa, A. and Monachini, M. and Bartolini, R.}, TITLE = {KYOTO: an Open Platform for Mining Facts}, YEAR = {2010}, ABSTRACT = {This paper describes an open text-mining system that was developed for the Asian-European project KYOTO. The KYOTO system uses an open text representation format and a central ontology to enable extraction of knowledge and facts from large volumes of text in many different languages. We implemented a semantic tagging approach that performs off-line reasoning. Mining of facts and knowledge is achieved through a flexible pattern matching module that can work in much the same way for different languages, can handle efficiently large volumes of documents and is not restricted to a specific domain. We applied the system to an English database on estuaries}, URL = {https://publications.cnr.it/doc/184375}, ISBN = {978-7-900268-00-6}, CONFERENCE_NAME = {OntoLex 2010}, CONFERENCE_PLACE = {Beijing}, CONFERENCE_DATE = {2010}, BOOKTITLE = {ONTOLEX-COLING 2010}, } @ARTICLE{QUOCHI_2009_ARTICLE_QDSBMC_30876, AUTHOR = {Quochi, V. and Del Gratta, R. and Sassolini, E. and Bartolini, R. and Monachini, M. and Calzolari, N.}, TITLE = {A Standard Lexical-Terminological Resource for the Bio Domain}, YEAR = {2009}, ABSTRACT = {The present paper describes a large-scale lexical resource for the biology domain designed both for human and for machine use. This lexicon aims at semantic interoperability and extendability, through the adoption of ISO-LMF standard for lexical representation and through a granular and distributed encoding of relevant information. The first part of this contribution focuses on three aspects of the model that are of particular interest to the biology community: the treatment of term variants, the representation on bio events and the alignment with a domain ontology. The second part of the paper describes the physical implementation of the model: a relational database equipped with a set of automatic uploading procedures. Peculiarity of the BioLexicon is that it combines features of both terminologies and lexicons. A set verbs relevant for the domain is also represented with full details on their syntactic and semantic argument structure.}, KEYWORDS = {Lexical representation model, Lexical Database, Computational Lexicography, Special Domains, Standards}, PAGES = {325-335}, URL = {https://publications.cnr.it/doc/30876}, VOLUME = {5603}, DOI = {10.1007/978-3-642-04235-5_28}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @INPROCEEDINGS{CASELLI_2008_INPROCEEDINGS_CIB_84699, AUTHOR = {Caselli, T. and Ide, N. and Bartolini, R.}, TITLE = {A Bilingual Corpus of Inter-linked Events}, YEAR = {2008}, ABSTRACT = {This paper describes the creation of a bilingual corpus of inter-linked events for Italian and English. Linkage is accomplished through the Inter-Lingual Index (ILI) that links ItalWordNet withWordNet. The availability of this resource, on the one hand, enables contrastive analysis of the linguistic phenomena surrounding events in both languages, and on the other hand, can be used to perform multilingual temporal analysis of texts. In addition to describing the methodology for construction of the inter-linked corpus and the analysis of the data collected, we demonstrate that the ILI could potentially be used to bootstrap the creation of comparable corpora by exporting layers of annotation for words that have the same sense.}, KEYWORDS = {Corpus (creation, annotation, etc.), Semantics, Validation of LRs}, PAGES = {2424-2429}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/610_paper.pdf}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26 May-1 June 2008}, BOOKTITLE = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Tapias, D.}, } @INPROCEEDINGS{DELGRATTA_2008_INPROCEEDINGS_DBCMSC_84729, AUTHOR = {Del Gratta, R. and Bartolini, R. and Caselli, T. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {UFRA: a UIMA-based Approach to Federated Language Resource Architecture}, YEAR = {2008}, ABSTRACT = {In this paper we address the issue of developing an interoperable infrastructure for language resources and technologies. In our approach, called UFRA, we extend the Federate Database Architecture System adding typical functionalities caming from UIMA. In this way, we capitalize the advantages of a federated architecture, such as autonomy, heterogeneity and distribution of components, monitored by a central authority responsible for checking both the integration of components and user rights on performing different tasks. We use the UIMA approach to manage and define one common front-end, enabling users and clients to query, retrieve and use language resources and technologies. The purpose of this paper is to show how UIMA leads from a Federated Database Architecture to a Federated Resource Architecture, adding to a registry of available components both static resources such as lexicons and corpora and dynamic ones such as tools and general purpose language technologies. At the end of the paper, we present a case-study that adopts this framework to integrate the SIMPLE lexicon and TIMEML annotation guidelines to tag natural language texts.}, KEYWORDS = {LR Infrastructures and Architectures, LR web services, Lexicon, Lexical database}, PAGES = {2634-2639}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/656_paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26 May-1 June 2008}, } @INPROCEEDINGS{GIOVANNETTI_2008_INPROCEEDINGS_GMMB_84726, AUTHOR = {Giovannetti, E. and Marchi, S. and Montemagni, S. and Bartolini, R.}, TITLE = {Ontology Learning and Semantic Annotation: a Necessary Symbiosis}, YEAR = {2008}, ABSTRACT = {Semantic annotation of text requires the dynamic merging of linguistically structured information and a "world model", usually represented as a domain-specific ontology. On the other hand, the process of engineering a domain-ontology through semi-automatic ontology learning system requires the availability of a considerable amount of semantically annotated documents. Facing this bootstrapping paradox requires an incremental process of annotation-acquisition-annotation, whereby domain-specific knowledge is acquired from linguistically-annotated texts and then projected back onto texts for extra linguistic information to be annotated and further knowledge layers to be extracted. The presented methodology is a first step in the direction of a full "virtuous" circle where the semantic annotation platform and the evolving ontology interact in symbiosis. As a case study we have chosen the semantic annotation of product catalogues. We propose a hybrid approach, combining pattern matching techniques to exploit the regular structure of product descriptions in catalogues, and Natural Language Processing techniques which are resorted to analyze natural language descriptions. The semantic annotation involves the access to the ontology, semi-automatically bootstrapped with an ontology learning tool from annotated collections of catalogues.}, KEYWORDS = {Information Extraction, Information Retrieval, Ontologies, Tools, Systems}, PAGES = {2079-2085}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {2008}, BOOKTITLE = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Tapias, D.}, } @INPROCEEDINGS{GIOVANNETTI_2007_INPROCEEDINGS_GMMB_84690, AUTHOR = {Giovannetti, E. and Marchi, S. and Montemagni, S. and Bartolini, R.}, TITLE = {Ontology-based Semantic Annotation of Product Catalogues}, YEAR = {2007}, ABSTRACT = {This paper describes a methodology for the semantic annotation of product catalogues. We propose a hybrid approach, combining pattern matching techniques to exploit the regular structure of product descriptions in catalogues, and Natural Language Processing techniques which are resorted to analyze natural language descriptions. It also includes the access to an application ontology, semi-automatically bootstrapped from collections of catalogues with an ontology learning tool, which is used to drive the semantic annotation process.}, KEYWORDS = {Semantic Annotation of texts, Ontology Learning, Information Extraction for e-commerce}, PAGES = {235-239}, URL = {https://publications.cnr.it/doc/84690}, CONFERENCE_NAME = {Recent Advances in Natural Language Processing (RANLP-2007)}, CONFERENCE_PLACE = {Borovets}, CONFERENCE_DATE = {27-29 settembre 2007}, BOOKTITLE = {Proceedings of the International Conference "Recent Advances in Natural Language Processing"}, } @INPROCEEDINGS{SORIA_2007_INPROCEEDINGS_SBLMP_84682, AUTHOR = {Soria, C. and Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Automatic Extraction of Semantics in Law Documents}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84682}, CONFERENCE_NAME = {V Legislative XML Workshop}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {2007}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DBCEMQS_157442, AUTHOR = {Del Gratta, R. and Bartolini, R. and Caselli, T. and Enea, A. and Monachini, M. and Quochi, V. and Sassolini, V.}, TITLE = {TimeML: An Ontological Mapping onto the UIMA Type Systems}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157442}, } @TECHREPORT{MONTEMAGNI_2007_TECHREPORT_MMVBBRPT_157440, AUTHOR = {Montemagni, S. and Marchi, S. and Venturi, G. and Bartolini, R. and Bertagna, F. and Ruffolo, P. and Peters, W. and Tiscornia, D.}, TITLE = {Report on Ontology learning tool and testing}, YEAR = {2007}, ABSTRACT = {This deliverable documents the work done within the DALOS EU project for what concerns the definition and implementation of methodologies and techniques to bootstrap terminological and ontological knowledge from domain corpora. Starting from a corpus of legacy legislative texts in different languages, linguistic technologies combined with statistical techniques have been used to extract significant terms as well as to structure them in conceptual structures for the different languages dealt with within the project, namely Italian, English, Spanish and Dutch.}, KEYWORDS = {Ontology Learning, Term Extraction, Natural Language Processing, Conceptual Indexing}, URL = {https://publications.cnr.it/doc/157440}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BCGLMPRS_84608, AUTHOR = {Bartolini, R. and Caracciolo, C. and Giovannetti, E. and Lenci, A. and Marchi, S. and Pirrelli, V. and Renso, C. and Spinsanti, L.}, TITLE = {Creation and Use of Lexicons and Ontologies for NL Interfaces to Databases}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84608}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BCGLMPRS_91313, AUTHOR = {Bartolini, R. and Caracciolo, C. and Giovannetti, E. and Lenci, A. and Marchi, S. and Pirrelli, V. and Renso, C. and Spinsanti, L.}, TITLE = {Creation and use of lexicons and ontologies for natural language interface to databases}, YEAR = {2006}, ABSTRACT = {In this paper we present an original approach to natural language query interpretation which has been implemented within the FuLL (Fuzzy Logic and Language) Italian project of BC S.r.l. In particular, we discuss here the creation of linguistic and ontological resources, together with the exploitation of existing ones, for natural language-driven database access and retrieval. Both the database and the queries we experiment with are Italian, but the methodology we broach naturally extends to other languages.}, KEYWORDS = {Natual language processing, ontologies, gis, databases}, PAGES = {6}, URL = {https://publications.cnr.it/doc/91313}, CONFERENCE_NAME = {LREC Conference}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26/05/2006}, BOOKTITLE = {LREC 2006}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BGMMABSB_84664, AUTHOR = {Bartolini, R. and Giovannetti, E. and Marchi, S. and Montemagni, S. and Andreatta, C. and Brunelli, R. and Stecher, R. and Bouquet, P.}, TITLE = {Multimedia Information Extraction in Ontology-based Semantic Annotation of Product Catalogues}, YEAR = {2006}, ABSTRACT = {The demand for efficient methods for extracting knowledge from multimedia content has led to a growing research community investigating the convergence of multimedia and knowledge technologies. In this paper we describe a methodology for extracting multimedia information from product catalogues empowered by the synergetic use and extension of a domain ontology. The methodology was implemented in the Trade Fair Advanced Semantic Annotation Pipeline of the VIKE-framework.}, KEYWORDS = {Semantic Web Technologies, ontology creation, ontology extraction, ontology evolution, semantic annotation of multimedia content}, URL = {https://publications.cnr.it/doc/84664}, CONFERENCE_NAME = {SWAP 2006}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {18-20 December 2006}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BGMMABSNBB_84663, AUTHOR = {Bartolini, R. and Giovannetti, E. and Marchi, S. and Montemagni, S. and Andreatta, C. and Brunelli, R. and Stecher, R. and Niederée, C. and Bouquet, P. and Bortoli, S.}, TITLE = {Ontology Learning in Multimedia Information Extraction from Product Catalogues}, YEAR = {2006}, ABSTRACT = {We propose a methodology for extracting multimedia information from product catalogues empowered by the synergetic use and extension of a domain ontology. The use of domain ontologies in this context additionally opens up innovative ways of catalogue use. The method is characterized by incrementally feeding and exploiting the ontology during an information extraction process, implemented by the semantic annotation of the analysed document, and by providing support for detecting existing similar ontologies to enable reuse of (parts of) them.}, KEYWORDS = {knowledge-drive multimedia analysis, ontology learning, semi-automatic content annotation tools}, URL = {https://publications.cnr.it/doc/84663}, CONFERENCE_NAME = {BOEMIE 2006}, CONFERENCE_PLACE = {Podebrady, Czech Republic}, CONFERENCE_DATE = {6 ottobre 2006}, } @MISC{BARTOLINI_2006_MISC_BDLMMP_151563, AUTHOR = {Bartolini, R. and Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-to-Knowledge (T2K) Versione 2}, YEAR = {2006}, ABSTRACT = {Versione 2. Text-to-Knowledge (T2K) è una piattaforma software di supporto avanzato alla gestione documentale per la creazione dinamica di repertori terminologici e ontologie di dominio a partire da testi e per l'indicizzazione concettuale di documenti. Il sistema T2K si propone di offrire una batteria integrata di strumenti avanzati di analisi linguistica del testo, analisi statistica e apprendimento automatico del linguaggio, destinati a offrire una rappresentazione accurata del contenuto di una base documentale non strutturata, per scopi di indicizzazione avanzata e navigazione intelligente. I risultati di questo processo di acquisizione sono annotati in forma di metadati XML, offrendo in tal modo la prospettiva di una sempre crescente e diretta interoperabilità con sistemi automatici per la produzione di contenuti digitali selezionati e strutturati dinamicamente su misura, per diversi profili di utenza. Versioni prototipali di T2K sono già operative su alcuni portali della pubblica amministrazione e sono state applicate per l'indicizzazione di contenuti didattici multimediali. E' in corso l'integrazione della tecnologia T2K nel sistema di gestione informatica di documentazione scientifica del CNR.}, KEYWORDS = {text to knowledge, nlp, estrazione terminologica, ontology learning, indicizzazione terminologica}, URL = {https://publications.cnr.it/doc/151563}, } @INPROCEEDINGS{BARTOLINI_2005_INPROCEEDINGS_BGLMP_84576, AUTHOR = {Bartolini, R. and Giorgetti, D. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Automatic Incremental Term Acquisition from Domain Corpora}, YEAR = {2005}, ABSTRACT = {We describe a technique for the acquisition of terms from Italian domain text corpora, which relies both on sophisticated linguistic analysis and on statistical measures applied to linguistically processed text rather than to raw text as it is usually the case. The main advantage of this technique is that minimal a priori knowledge of term structure is required, thus allowing to explore and discover terms in a given domain without imposing a strict pattern matching structure on them, and also to easily extend it to different domains. The approach we present in this paper is incremental as it may be iterated to discover terms of increasing complexity built on top of terms discovered in the previous iteration. The reason why it is convenient to adopt such an incremental approach is that it allows to "clean" data from noise in the first step, elicitating the constituent terms, and then to refine term acquisition on "skimmed" term data.}, PAGES = {293-300}, URL = {https://publications.cnr.it/doc/84576}, CONFERENCE_NAME = {7th International conference on Terminology and Knowledge Engineering (TKE2005)}, CONFERENCE_PLACE = {Copenhagen}, CONFERENCE_DATE = {2005}, BOOKTITLE = {Proceedings of TKE 2005-7th International Conference on Terminology and Knowledge Engineering}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BCLMP_157365, AUTHOR = {Bartolini, R. and Caracciolo, C. and Lenci, A. and Marchi, S. and Pirrelli, V.}, TITLE = {Motore semantico. Documento di progettazione e sviluppo}, YEAR = {2005}, ABSTRACT = {Il presente documento descrive architettura, funzionalità e algoritmo di un componente software dedicato, designato come "Motore Semantico", che ha lo scopo di produrre rappresentazioni logico-concettuali, ontologicamente interpretate, di interrogazioni in linguaggio naturale su una base di dati di tipo anche GIS.}, KEYWORDS = {NLP}, PAGES = {1-42}, URL = {https://publications.cnr.it/doc/157365}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BGMM_157366, AUTHOR = {Bartolini, R. and Giorgetti, D. and Marchi, S. and Montemagni, S.}, TITLE = {ILC-CNR Contribution to Deliverable 4. 1}, YEAR = {2005}, ABSTRACT = {The goal of the semantic annotation is the annotation of entities and relations starting from input documents conformant with the harmonisation output schema as defined within WP3. This harmonisation schema will focus on the structural and logical organisation of the documents, while WP4 will concentrate on the annotation of textual entities and image elements. The results of semantic annotation are intended to populate the domain ontology.}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157366}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMMP_157367, AUTHOR = {Bartolini, R. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Personalizzazione degli Italian NLP tools}, YEAR = {2005}, ABSTRACT = {Il presente documento intende offrire criteri e risultati della fase di personalizzazione dei moduli per l'analisi automatica del testo (Italian NLP tools o "AnITA") all'interno dell'architettura prevista nell'ambito del progetto FuLL.}, KEYWORDS = {NLP}, PAGES = {13}, URL = {https://publications.cnr.it/doc/157367}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMP_157369, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Modellazione del motore sintattico e delle strutture dati di supporto}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157369}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMMP_157370, AUTHOR = {Bartolini, R. and Lenci, L. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: Acquisizione semi-automatica di ontologie per l'indicizzazione semantica di documenti}, YEAR = {2005}, ABSTRACT = {Text-2-Knowledge, Acquisizione semi-automatica di ontologie per l'indicizzazione semantica di documenti}, KEYWORDS = {nlp, terminology extraction}, URL = {https://publications.cnr.it/doc/157370}, } @MISC{BARTOLINI_2005_MISC_BDGMLMP_151548, AUTHOR = {Bartolini, R. and Dell'Orletta, F. and Giorgetti, D. and Marchi, S. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-to-Knowledge (T2K)}, YEAR = {2005}, ABSTRACT = {Piattaforma di estrazione e indicizzazione terminologica.}, KEYWORDS = {NLP, estrazione terminologica}, URL = {https://publications.cnr.it/doc/151548}, } @MISC{BARTOLINI_2005_MISC_BMLMP_151550, AUTHOR = {Bartolini, R. and Marchi, S. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {NLPtools}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151550}, } @INCOLLECTION{BARTOLINI_2004_INCOLLECTION_BLMPS_30867, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Automatic Classification and Analysis of Provisions in Italian Legal Texts: A Case Study}, YEAR = {2004}, ABSTRACT = {In this paper we address the problem of automatically enriching legal texts with semantic annotation, an essential pre–requisite to effective indexing and retrieval of legal documents. This is done through illustration of SALEM (Semantic Annotation for LEgal Management), a computational system developed for automated semantic annotation of (Italian) law texts. SALEM is an incremental system using Natural Language Processing techniques to perform two tasks: i) classify law paragraphs according to their regulatory content, and ii) extract relevant text fragments corresponding to specific semantic roles that are relevant for the different types of regulatory content. The paper sketches the overall architecture of SALEM and reports results of a preliminary case study on a sample of Italian law texts.}, KEYWORDS = {Annotazione semantica, Classificazione automatica}, PAGES = {593-604}, URL = {https://rdcu.be/dftjm}, VOLUME = {3292}, DOI = {10.1007/978-3-540-30470-8_72}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-3-540-23664-1}, BOOKTITLE = {On the Move to Meaningful Internet Systems 2004: OTM 2004 Workshops. OTM 2004}, EDITOR = {Meersman, R. and Tari, Z. and Corsaro, A.}, } @INPROCEEDINGS{BARTOLINI_2004_INPROCEEDINGS_BLMP_84570, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Hybrid Constraints for Robust Parsing: First Experiments and Evaluation}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84570}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BARTOLINI_2004_INPROCEEDINGS_BLMPS_84571, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Semantic Mark-up of Italian Legal Texts Through NLP-based Techniques}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84571}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {2004}, }