As an encoding format, the CoNLL de facto standard is used.}, KEYWORDS = {Syntactic Annotation, Merging of Resources, Dependency Parsing}, PAGES = {23-30}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/workshops/06.LREC%202012%20Merging%20Proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {LREC 2012 Workshop on Language Resource Merging}, CONFERENCE_PLACE = {Istambul}, CONFERENCE_DATE = {22 May 2012}, BOOKTITLE = {Proceedings of the LREC 2012 Workshop on Language Resource Merging}, EDITOR = {Bel, N.}, } @INPROCEEDINGS{DELLORLETTA_2012_INPROCEEDINGS_DMMPV_219489, AUTHOR = {Dell'Orletta, F. and Marchi, S. and Montemagni, S. and Plank, B. and Venturi, G.}, TITLE = {The SPLeT-2012 Shared Task on Dependency Parsing of Legal Texts}, YEAR = {2012}, ABSTRACT = {The 4th Workshop on "Semantic Processing of Legal Texts" (SPLeT-2012) presents the first multilingual shared task on Dependency Parsing of Legal Texts. In this paper, we define the general task and its internal organization into sub-tasks, describe the datasets and the domain-specific linguistic peculiarities characterizing them. We finally report the results achieved by the participating systems, describe the underlying approaches and provide a first analysis of the final test results.}, KEYWORDS = {Dependency Parsing, Domain Adaptation, Legal Text Processing}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/workshops/27.LREC%202012%20Workshop%20Proceedings%20SPLeT.pdf}, CONFERENCE_NAME = {Fourth Workshop on Semantic Processing of Legal Texts (SPLeT 2012)-First Shared Task on Dependency Parsing of Legal Texts (SPLeT 2012)}, CONFERENCE_PLACE = {Istanbul}, CONFERENCE_DATE = {27 Maggio 2012}, } @INPROCEEDINGS{DELLORLETTA_2012_INPROCEEDINGS_DMMVAF_219483, AUTHOR = {Dell'Orletta, F. and Marchi, S. and Montemagni, S. and Venturi, G. and Agnoloni, T. and Francesconi, E.}, TITLE = {Domain Adaptation for Dependency Parsing at Evalita 2011}, YEAR = {2012}, ABSTRACT = {The domain adaptation task was aimed at investigating techniques for adapting state-of-the-art dependency parsing systems to new domains. Both the language dealt with, i.e. Italian, and the target domain, namely the legal domain, represent two main novelties of the task organised at Evalita 2011. In this paper, we define the task and describe how the datasets were created from different resources. In addition, we characterize the different approaches of the participating systems, report the test results, and provide a first analysis of these results.}, KEYWORDS = {Dependency Parsing, Domain Adaptation, Legal Text Processing}, PAGES = {1-7}, URL = {http://www.evalita.it/sites/evalita.fbk.eu/files/working_notes2011/Domain_Adaptation/}, CONFERENCE_NAME = {Evaluation of NLP and Speech Tools for Italian (EVALITA 2011): Domain Adaptation track}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {24-25 Gennaio 2012}, } @INPROCEEDINGS{DELLORLETTA_2012_INPROCEEDINGS_DMV_278420, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Genre-oriented Readability Assessment: a Case Study}, YEAR = {2012}, PAGES = {91-98}, URL = {https://publications.cnr.it/doc/278420}, ISBN = {978-1-62748-389-6}, CONFERENCE_NAME = {Workshop on "Speech and Language Processing Tools in Education" (SLP-TED)}, CONFERENCE_PLACE = {Mumbai, India}, CONFERENCE_DATE = {15 December, 2012}, BOOKTITLE = {Proceedings of Workshop on "Speech and Language Processing Tools in Education" (SLP-TED)}, } @INPROCEEDINGS{LENCI_2012_INPROCEEDINGS_LMVC_285544, AUTHOR = {Lenci, A. and Montemagni, S. and Venturi, G. and Cutrulla, M. R.}, TITLE = {Enriching the ISST-TANL Corpus with Semantic Frames}, YEAR = {2012}, ABSTRACT = {The paper describes the design and the results of a manual annotation methodology devoted to enrich the ISST-TANL Corpus with Semantic Frames information. The main issues encountered in applying the English FrameNet annotation criteria to a corpus of Italian language are discussed together with the choice of anchoring the semantic annotation layer to the underlying dependency syntactic structure. We also describe an experiment to measure inter-annotator agreement and a first case study to extend and specialise FrameNet annotation to a corpus of legislative texts.}, KEYWORDS = {Semantic annotation, FrameNet, Multi-layer annotated corpus}, PAGES = {3719-3726}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/986_Paper.pdf}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 May 2012}, BOOKTITLE = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{MONTEMAGNI_2012_INPROCEEDINGS_MWDN_330114, AUTHOR = {Montemagni, S. and Wieling, M. and De Jonge, B. and Nerbonne, J.}, TITLE = {Patterns of Language Variation and Underlying Linguistic Features: A New Dialectometric Approach}, YEAR = {2012}, PAGES = {879-889}, URL = {https://publications.cnr.it/doc/330114}, VOLUME = {II}, PUBLISHER = {Franco Cesati Editore (Firenze, ITA)}, ISBN = {978-88-7667-433-4}, CONFERENCE_NAME = {XI Congresso SILFI (Società Internazionale di Linguistica e Filologia Italiana)}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-7 Ottobre 2010}, BOOKTITLE = {La variazione nell'italiano e nella sua storia. Varietà e varianti linguistiche e testuali. Atti dell'XI Congresso SILFI (Società Internazionale di Linguistica e Filologia Italiana)}, EDITOR = {Bianchi, P. and De Blasi, N. and De Caprio, C. and Montuori, F.}, } @ARTICLE{THOMPSON_2011_ARTICLE_TMMCDLMMPQRSVRA_205232, AUTHOR = {Thompson, P. and McNaught, J. and Montemagni, S. and Calzolari, N. and Del Gratta, R. and Lee, V. and Marchi, S. and Monachini, M. and Pezik, P. and Quochi, V. and Rupp, C. and Sasaki, Y. and Venturi, G. and Rebholz Schuhmann, D. and Ananiadou, S.}, TITLE = {The BioLexicon: a large-scale terminological resource for biomedical text mining}, YEAR = {2011}, ABSTRACT = {Background Due to the rapidly expanding body of biomedical literature, biologists require increasingly sophisticated and efficient systems to help them to search for relevant information. Such systems should account for the multiple written variants used to represent biomedical concepts, and allow the user to search for specific pieces of knowledge (or events) involving these concepts, e.g., protein-protein interactions. Such functionality requires access to detailed information about words used in the biomedical literature. Existing databases and ontologies often have a specific focus and are oriented towards human use. Consequently, biological knowledge is dispersed amongst many resources, which often do not attempt to account for the large and frequently changing set of variants that appear in the literature. Additionally, such resources typically do not provide information about how terms relate to each other in texts to describe events. Results This article provides an overview of the design, construction and evaluation of a large-scale lexical and conceptual resource for the biomedical domain, the BioLexicon. The resource can be exploited by text mining tools at several levels, e.g., part-of-speech tagging, recognition of biomedical entities, and the extraction of events in which they are involved. As such, the BioLexicon must account for real usage of words in biomedical texts. In particular, the BioLexicon gathers together different types of terms from several existing data resources into a single, unified repository, and augments them with new term variants automatically extracted from biomedical literature. Extraction of events is facilitated through the inclusion of biologically pertinent verbs (around which events are typically organized) together with information about typical patterns of grammatical and semantic behaviour, which are acquired from domain-specific texts. In order to foster interoperability, the BioLexicon is modelled using the Lexical Markup Framework, an ISO standard. Conclusions The BioLexicon contains over 2.2 M lexical entries and over 1.8 M terminological variants, as well as over 3.3 M semantic relations, including over 2 M synonymy relations. Its exploitation can benefit both application developers and users. We demonstrate some such benefits by describing integration of the resource into a number of different tools, and evaluating improvements in performance that this can bring.}, KEYWORDS = {Text Mining, Information Extraction, Computational Lexicon}, PAGES = {1-29}, URL = {http://www.biomedcentral.com/1471-2105/12/397}, VOLUME = {12}, DOI = {10.1186/1471-2105-12-397}, PUBLISHER = {BioMed Central ([London], Regno Unito)}, ISSN = {1471-2105}, JOURNAL = {BMC bioinformatics}, } @INCOLLECTION{DELLORLETTA_2011_INCOLLECTION_DMVV_138775, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Vecchi, E. M. and Venturi, G.}, TITLE = {Tecnologie linguistico-computazionali per il monitoraggio della competenza linguistica italiana degli alunni stranieri nella scuola primaria e secondaria}, YEAR = {2011}, ABSTRACT = {La possibilità di disporre di tecnologie avanzate e innovative che permettano di monitorare la competenza linguistica degli alunni stranieri e, al contempo, valutare l'adeguatezza dei materiali didattici a loro offerti può essere di supporto all'insegnante nell'orientare la propria azione formativa, rendendo così il processo di integrazione linguistico-culturale meno faticoso e traumatico. In tale ottica, questo studio, realizzato col supporto di una piattaforma ormai consolidata di metodi e strumenti per il trattamento automatico dell'italiano, costituisce il primo tentativo condotto in relazione alla lingua italiana, per mettere a punto una metodologia di monitoraggio linguistico rivolta specificamente agli studenti apprendenti la lingua italiana come L2 ed alle loro produzioni scritte.}, KEYWORDS = {Trattamento Automatico del Linguaggio, Stranieri, Lingua italiana}, PAGES = {319-336}, URL = {https://publications.cnr.it/doc/138775}, PUBLISHER = {Mc Graw-Hill (Milano, ITA)}, ISBN = {978-88-386-7296-5}, BOOKTITLE = {Percorsi Migranti}, EDITOR = {Bruno, G. C. and Caruso, I. and Sanna, M. and Vellecco, I.}, } @INPROCEEDINGS{DELLORLETTA_2011_INPROCEEDINGS_DMV_205510, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {READ-IT: assessing readability of Italian texts with a view to text simplification}, YEAR = {2011}, ABSTRACT = {In this paper, we propose a new approach to readability assessment with a specific view to the task of text simplification: the intended audience includes people with low literacy skills and/or with mild cognitive impairment. READ-IT represents the first advanced readability assessment tool for what concerns Italian, which combines traditional raw text features with lexical, morpho-syntactic and syntactic information. In READ-IT readability assessment is carried out with respect to both documents and sentences where the latter represents an important novelty of the proposed approach creating the prerequisites for aligning the readability assessment step with the text simplification process. READ-IT shows a high accuracy in the document classification task and promising results in the sentence classification scenario.}, KEYWORDS = {Readability Assessment, Text Simplification}, PAGES = {73-83}, URL = {http://dl.acm.org/citation.cfm?id=2140511}, ISBN = {978-1-937284-14-5}, CONFERENCE_NAME = {SLPAT '11 Proceedings of the Second Workshop on Speech and Language Processing for Assistive Technologies}, CONFERENCE_PLACE = {Edimburgo, UK}, CONFERENCE_DATE = {30 Luglio 2011}, } @INPROCEEDINGS{DELLORLETTA_2011_INPROCEEDINGS_DVM_205505, AUTHOR = {Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {ULISSE: an unsupervised algorithm for detecting reliable dependency parses}, YEAR = {2011}, ABSTRACT = {In this paper we present ULISSE, an unsupervised linguistically--driven algorithm to select reliable parses from the output of a dependency parser. Different experiments were devised to show that the algorithm is robust enough to deal with the output of different parsers and with different languages, as well as to be used across different domains. In all cases, ULISSE appears to outperform the baseline algorithms.}, KEYWORDS = {Dependency Parsing, Selection of Reliable Parses, Unsupervised Algorithm}, PAGES = {115-124}, URL = {http://dl.acm.org/citation.cfm?id=2018950}, ISBN = {978-1-932432-92-3}, CONFERENCE_NAME = {CoNLL '11 Proceedings of the Fifteenth Conference on Computational Natural Language Learning}, CONFERENCE_PLACE = {Portland, Oregon, USA}, CONFERENCE_DATE = {23-24 Giugno 2011}, } @INPROCEEDINGS{DELLORLETTA_2011_INPROCEEDINGS_DM_205737, AUTHOR = {Dell'Orletta, F. and Montemagni, S.}, TITLE = {Towards an NLP-based approach for measuring syntactic complexity: preliminary experiments with Italian texts from different registers}, YEAR = {2011}, ABSTRACT = {In this paper, we explore how NLP can be used to automatically identify relevant syntactic complexity features in texts with the aim of assessing their correlation with specific linguistic registers. Our final goal is twofold. On the one hand, we demonstrate that automatic morpho-syntactic and syntactic annotation of texts provides sufficiently accurate output for use in the automatic extraction and measurement of syntactic complexity features. On the other hand, we identify the set of syntactic features strongly correlating with considered linguistic registers.}, KEYWORDS = {Language Variation, Natural Language Processing, Syntactic Complexity}, URL = {http://www.benszm.net/BSBWWS/Dellorletta_Montemagni.pdf}, CONFERENCE_NAME = {Workshop on "Cross-linguistic and language-internal variation in text and speech: focus on the joint analysis of multiple characteristics"}, CONFERENCE_PLACE = {Freiburg Institute for Advanced Studies (FRIAS), University of Freiburg}, CONFERENCE_DATE = {29/10/2010}, } @INPROCEEDINGS{MONTEMAGNI_2011_INPROCEEDINGS_M_205779, AUTHOR = {Montemagni, S.}, TITLE = {Ontology Learning. An introduction}, YEAR = {2011}, ABSTRACT = {The tutorial is organised into two parts: PART 1 is devoted to provide the basic notions underlying Ontology Learning, in particular why it is needed, how it can be carried out and how its results can be evaluated. PART 2 discusses the topic of Ontology Learning in the Legal domain, with particular attention to the specific challenges posed by it. It also provides an overview of different feasibility studies carried out in the legal domain.}, KEYWORDS = {Ontology Learning, Legal Information extraction, Natural Language Processing}, URL = {https://publications.cnr.it/doc/205779}, CONFERENCE_NAME = {Summer School LEX 2011, Ravenna, Italy "Managing Legal Resources in the Semantic Web"}, CONFERENCE_PLACE = {Ravenna, Italia}, CONFERENCE_DATE = {8 settembre 2011}, } @INPROCEEDINGS{MONTEMAGNI_2011_INPROCEEDINGS_MWDN_205911, AUTHOR = {Montemagni, S. and Wieling, M. and De Jonge, B. and Nerbonne, J.}, TITLE = {Synchronic patterns of Tuscan phonetic variation and diachronic change: evidence from a dialectometric study}, YEAR = {2011}, ABSTRACT = {A careful investigation of synchronic patterns of linguistic variation with underlying linguistic features can lead to important insights into the comprehension of diachronic phonetic processes. Starting from the analysis of synchronic patterns of phonetic variation in Tuscany we tackled one of the main and most debated features of Tuscan dialects, the phenomenon of spirantization with a specific view to the so-called Tuscan "gorgia" (i.e. voiceless spirantization). In particular, we showed that the newly proposed method of spectral partitioning of bipartite graphs applied to synchronic dialectal data can effectively be used to investigate diachronic phonetic processes. From a careful analysis of the sound correspondences involving voiceless and voiced stops, we tracked the evolution of the spirantization phenomenon in several respects. First, we tracked spirantization geographically, across Tuscany from the influential center of Florence to the peripheral areas. Second, we tracked it phonologically, from voiceless to voiced stops, and within each voicing class from velars to dentals and then to bilabials. Finally, we tracked it demographically, with young speakers using the most innovative sound correspondences more than old speakers. The fact that these results are in line with the literature on the topic of Tuscan "gorgia" demonstrates the potential of the method of spectral partitioning of bipartite graphs with respect to the reconstruction of diachronic processes starting from diatopically distributed synchronic dialectal data.}, KEYWORDS = {Dialectometry, Phonetic Variation, Tuscan Dialects}, PAGES = {120-121}, URL = {http://westernlinguistics.ca/methods14/files/all_abstracts_one_document.pdf}, CONFERENCE_NAME = {Fourteenth Methods in Dialectology Conference}, CONFERENCE_PLACE = {University of Western Ontario}, CONFERENCE_DATE = {2-6 August 2011}, } @TECHREPORT{MONTEMAGNI_2011_TECHREPORT_MW_206506, AUTHOR = {Montemagni, S. and Wieling, M.}, TITLE = {Definizione di un modello computazionale della variazione dialettale basato sull'integrazione di fattori socio-demografici e geografici}, YEAR = {2011}, ABSTRACT = {In this study, we used a mixed-effects logistic regression model in combination with generalized additive logistic modeling to predict lexical differences in Tuscan dialects with respect to standard Italian. We used lexical information for 170 concepts in 213 locations in Tuscany. Although geographical position is an important predictor with locations distant from Florence having lexical forms more likely to differ from standard Italian, several other factors emerged as significant. The model predicts that lexical variants used by older speakers and in smaller as well as poorer communities are more likely to differ from standard Italian. The impact of the demographic variables, however, varied from concept to concept. For a majority of concepts, smaller and poorer communities have lexical forms different from standard Italian. For a smaller minority of concepts, however, larger and richer communities have lexical forms different from standard Italian. Similarly, the effect of speaker age and the average community age also varied per concept. While not significant as a fixed effect, the concept frequency showed significant geographical variation. These results clearly identify important factors involved in dialect variation at the lexical level. In addition, this study illustrates the usefulness of mixed-effects regression techniques together with generalized additive modeling for analyzing lexical dialect data.}, KEYWORDS = {Dialettologia toscana, Dialettometria, variazione lessicale}, URL = {https://publications.cnr.it/doc/206506}, } @ARTICLE{BONIN_2010_ARTICLE_BDVM_278419, AUTHOR = {Bonin, F. and Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {Singling out Legal Knowledge from World Knowledge}, YEAR = {2010}, PAGES = {217-229}, URL = {https://publications.cnr.it/doc/278419}, PUBLISHER = {Edizioni Scientifiche Italiane (Firenze, Italia)}, ISSN = {0390-0975}, JOURNAL = {Informatica e diritto}, } @ARTICLE{FRANCESCONI_2010_ARTICLE_FMPT_30888, AUTHOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Tiscornia, D.}, TITLE = {Integrating a Bottom-Up and Top-Down Methodology for Building Semantic Resources for the Multilingual Legal Domain}, YEAR = {2010}, ABSTRACT = {This article presents a methodology for multilingual legal knowledge acquisition and modelling. It encompasses two comlementary strategies. On the one hand, there is the top-down definition of the conceptual structure of the legal domain under consideration on the basis of expert jugdment. This structure is language-independent, modeled as an ontology, and can be aligned with other ontologies that capture similar or complementary knowledge, in order to provide a wider conceptual embedding. Another top-down approach is the exploitation of the explicit structure of legal texts, which enables the targeted identification of text spans that play an ontological role and their subsequent inclusion in the knowledge model. On the other hand, the linguistically motivated, text-based bottom-up population and incremental refinement of this conceptual structure using (semi-)automatic NLP techniques, maximizes the completeness and domain-specificity of the resulting knowledge. The proposed methodology is concerned with the relation between these two differently derived types of knowledge, and defines a framework for interfacing lexical and ontological knowledge, the result of which offers various perspectives on multilingual legal knowledge. Two case-studies combining bottom-up and top-down methodologies for knowledge modelling and learning are presented as illustrations of the methodology.}, KEYWORDS = {Knowledge Modelling, Knowledge Acquisition, Natural Language Processing, Ontology Learning}, PAGES = {95-121}, URL = {https://publications.cnr.it/doc/30888}, VOLUME = {6036/}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @BOOK{FRANCESCONI_2010_BOOK_FMPT_170395, AUTHOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Tiscornia, D.}, TITLE = {Semantic Processing of Legal Texts: Where the Language of Law Meets the Law of Language}, YEAR = {2010}, ABSTRACT = {The last few years have seen a growing body of research and practice addressing aspects such as automated legal reasoning and argumentation, semantic and cross-language legal information retrieval, document classification, legal drafting, legal knowledge discovery and extraction. This State-of-the-Art Survey contains invited contributions of leading researchers and groups eminently active in the field, which were complemented with selected papers from the Workshop on Semantic Processing of Legal Texts, held in Marrakech, Morocco, in 2008, within the framework of the Sixth International Conference on Language Resources and Evaluation (LREC 2008). These publications mirror the state-of-the-art in linguistic technologies, tools and resources focusing on the automatic extraction of relevant information from legal texts, and the structured organization of this extracted knowledge for legal knowledge representation and scholarly activity, with particular emphasis on the crucial role played by language resources and human language technologies. The contents are organized in three topical sections on information extraction; construction of knowledge resources; and semantic indexing, summarization and translation.}, KEYWORDS = {Legal Text Processing, Ontology Learning, Information Extraction}, URL = {https://publications.cnr.it/doc/170395}, } @EDITORIAL{FRANCESCONI_2010_EDITORIAL_FMPT_186091, AUTHOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Tiscornia, D.}, TITLE = {Semantic Processing of Legal Texts: Where the Language of Law Meets the Law of Language}, YEAR = {2010}, ABSTRACT = {The last few years have seen a growing body of research and practice addressing aspects such as automated legal reasoning and argumentation, semantic and cross-language legal information retrieval, document classification, legal drafting, legal knowledge discovery and extraction. This State-of-the-Art Survey contains invited contributions of leading researchers and groups eminently active in the field, which were complemented with selected papers from the Workshop on Semantic Processing of Legal Texts, held in Marrakech, Morocco, in 2008, within the framework of the Sixth International Conference on Language Resources and Evaluation (LREC 2008). These publications mirror the state-of-the-art in linguistic technologies, tools and resources focusing on the automatic extraction of relevant information from legal texts, and the structured organization of this extracted knowledge for legal knowledge representation and scholarly activity, with particular emphasis on the crucial role played by language resources and human language technologies. The contents are organized in three topical sections on information extraction; construction of knowledge resources; and semantic indexing, summarization and translation.}, PAGES = {249}, URL = {https://publications.cnr.it/doc/186091}, VOLUME = {6036}, ISBN = {978-3-642-12836-3}, } @EDITORIAL{FRANCESCONI_2010_EDITORIAL_FMPW_136477, AUTHOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Wyner, A.}, TITLE = {Proceedings of the LREC 2010 Workshop on SEMANTIC PROCESSING OF LEGAL TEXTS (SPLeT-2010)}, YEAR = {2010}, KEYWORDS = {Legal Knowledge Extraction, Natural Language Processing}, URL = {https://publications.cnr.it/doc/136477}, } @EDITORIAL{FRANCESCONI_2010_EDITORIAL_FMRT_136476, AUTHOR = {Francesconi, E. and Montemagni, S. and Rossi, P. and Tiscornia, D.}, TITLE = {Proceedings of the 4th Workshop on Legal Ontologies and Artificial Intelligence Techniques (LOAIT 2010)}, YEAR = {2010}, KEYWORDS = {Legal Ontologies, Ontology Learning, Legal Knowledge Extraction, Legal Knowledge Modelling}, URL = {https://publications.cnr.it/doc/136476}, } @INPROCEEDINGS{ATTARDI_2010_INPROCEEDINGS_ADDLMS_84775, AUTHOR = {Attardi, G. and Dei Rossi, S. and Di Pietro, G. and Lenci, A. and Montemagni, S. and Simi, M.}, TITLE = {A Resource and Tool for Super-sense Tagging of Italian Texts}, YEAR = {2010}, KEYWORDS = {Corpus (creation, annotation, etc.), Tools, Systems, Applications, Statistical and machine learning methods}, URL = {https://publications.cnr.it/doc/84775}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{BONIN_2010_INPROCEEDINGS_BDMV_84796, AUTHOR = {Bonin, F. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {A Contrastive Approach to Multi-word Extraction from Domain-specific Corpora}, YEAR = {2010}, ABSTRACT = {In this paper we present a novel approach to multi-word terminology extraction combining a well-known automatic term recognition approach, the C-NC value method, with a contrastive ranking technique, aimed at refining obtained results either by filtering noise due to common words or by discerning between semantically different types of terms within heterogeneous terminologies. The proposed methodology has been tested in two case studies carried out in the History of Art and Legal domains with promising results.}, KEYWORDS = {Terminology Extraction, Domain-specific Corpora, Multi-word Expression}, PAGES = {3222-3229}, URL = {https://publications.cnr.it/doc/84796}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {19-21 maggio 2010}, } @INPROCEEDINGS{BONIN_2010_INPROCEEDINGS_BDVM_84802, AUTHOR = {Bonin, F. and Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {Contrastive filtering of domain specific multi-word terms from different types of corpora}, YEAR = {2010}, ABSTRACT = {In this paper we tackle the challenging task of Multi-word term (MWT) extraction from different types of specialized corpora. Contrastive filtering of previously extracted MWTs results in a considerable increment of acquired domain-specific terms.}, KEYWORDS = {multi-word terms extraction, corpora}, PAGES = {76-79}, URL = {https://publications.cnr.it/doc/84802}, ISBN = {978-7-900268-00-6}, CONFERENCE_NAME = {The 23rd International Conference on Computational Linguistics (COLING 2010). Multiword Expressions: from Theory to Applications (MWE 2010)}, CONFERENCE_PLACE = {Beijing, China}, CONFERENCE_DATE = {28 agosto 2010}, } @INPROCEEDINGS{BOSCO_2010_INPROCEEDINGS_BMMDL_84799, AUTHOR = {Bosco, C. and Montemagni, S. and Mazzei, A. and Dell'Orletta, F. and Lenci, A.}, TITLE = {Evalita'09 Parsing Task: comparing dependency parsers and treebanks}, YEAR = {2010}, KEYWORDS = {dependency parsing, dependency treebank}, URL = {https://publications.cnr.it/doc/84799}, CONFERENCE_NAME = {Evaluation of NLP and Speech Tools for Italian. EVALITA 2009}, CONFERENCE_PLACE = {Reggio Emilia, Italy}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{BOSCO_2010_INPROCEEDINGS_BMMLDLLASLHNN_84789, AUTHOR = {Bosco, C. and Montemagni, S. and Mazzei, A. and Lombardo, V. and Dell'Orletta, F. and Lenci, A. and Lesmo, L. and Attardi, G. and Simi, M. and Lavelli, A. and Hall, J. and Nilsson, J. and Nivre, J.}, TITLE = {Comparing the Influence of Different Treebank Annotations on Dependency Parsing}, YEAR = {2010}, KEYWORDS = {Parsing, Corpus (creation, annotation, etc.), Evaluation methodologies}, URL = {https://publications.cnr.it/doc/84789}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_M_84772, AUTHOR = {Montemagni, S.}, TITLE = {Esplorazioni computazionali nello spazio della variazione lessicale in Toscana}, YEAR = {2010}, ABSTRACT = {Il passaggio dalla descrizione della distribuzione geografica di singole parole a un livello di descrizione più astratto volto a formulare generalizzazioni relative alla variazione diatopica è oggi reso possibile dal ricorso a tecnologie linguistico-computazionali affiancate da tecniche di analisi statistica multivariata. L'uso combinato di queste tecniche si è dimostrato particolarmente promettente nello studio della variazione linguistica (principalmente fonetica e lessicale) di diverse lingue e dialetti, tipologicamente anche molto distanti. Tali tecniche sono state anche proficuamente utilizzate per l'analisi del contatto tra varietà linguistiche e una norma di riferimento. L'articolo si colloca all'interno di questo filone di ricerca, riportando i risultati di esplorazioni computazionali nello spazio della variazione lessicale in Toscana. Tali esplorazioni intendono ripercorrere i passi di Gabriella Giacomelli, ideatrice e direttrice dell'impresa dell'Atlante Lessicale Toscano (ALT) e profonda conoscitrice della realtà dialettale toscana, nel suo studio sulle aree lessicali toscane (Giacomelli 1975). Questa rivisitazione dello studio sulle aree lessicali toscane di Giacomelli si è avvalsa, più di tre decenni dopo, di due importanti elementi di novità, ovvero: i) sul versante dei dati, si è basata sull'intero corpus dei materiali dialettali dell'ALT disponibili nel sito di ALT-Web (http://serverdbt.ilc.cnr.it/altweb); ii) sul versante degli strumenti di analisi, è stata condotta attraverso l'uso combinato di tecnologie linguistiche e tecniche di analisi statistica multivariata che rendono possibile un'analisi aggregata di corpora di materiali dialettali anche di vaste dimensioni. Lo studio si focalizza su due dei tre aspetti indicati come fondamentali da Giacomelli per l'analisi delle aree lessicali toscane, ovvero quello dei "rapporti interni, tra aree subregionali" e quello dei "rapporti con la lingua".}, KEYWORDS = {Computational Dialectology, Lexical Variation}, PAGES = {609-634}, URL = {https://publications.cnr.it/doc/84772}, PUBLISHER = {Centro Editoriale e Librario (Arcavacata di Rende, ITA)}, ISBN = {9788874581030}, CONFERENCE_NAME = {Convegno 'Parole. Il lessico come strumento per organizzare e trasmettere gli etnosaperi'}, CONFERENCE_PLACE = {Rende, Università della Calabr}, CONFERENCE_DATE = {2-4 luglio 2009}, BOOKTITLE = {Parole. Il lessico come strumento per organizzare e trasmettere gli etnosaperi}, EDITOR = {Prantera, N. and Mendicino, A. and Citraro, C.}, } @INPROCEEDINGS{BONIN_2010_INPROCEEDINGS_BDMV_112966, AUTHOR = {Bonin, F. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Lessico settoriale e lessico comune nell'estrazione di terminologia specialistica da corpora di dominio}, YEAR = {2010}, KEYWORDS = {Automatic Term Extraction}, URL = {https://publications.cnr.it/doc/112966}, CONFERENCE_NAME = {XLIV Congresso Internazionale di Studi della Società di Linguistica Italiana}, CONFERENCE_PLACE = {Viterbo, Università degli Stud}, } @INPROCEEDINGS{DELLORLETTA_2010_INPROCEEDINGS_DMVV_173723, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Vecchi, E. M. and Venturi, G.}, TITLE = {Tecnologie linguistico-computazionali per il monitoraggio delle competenze linguistiche di apprendenti l'italiano come L2}, YEAR = {2010}, KEYWORDS = {Natural Language Processing, Educational Linguistics, Language Learning}, URL = {https://publications.cnr.it/doc/173723}, CONFERENCE_NAME = {Congresso "IT. L2: italiano lingua seconda nell'università, nella scuola e sul territorio. Esperienze didattiche e ricerche" Università del Piemonte Orientale "Amedeo Avogadro", Facoltà di Lettere e Filosofia}, CONFERENCE_PLACE = {Vercelli}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_M_112955, AUTHOR = {Montemagni, S.}, TITLE = {Ontology Learning. An introduction}, YEAR = {2010}, KEYWORDS = {Legal Text Processing, Ontology Learning, NLP}, URL = {https://publications.cnr.it/doc/112955}, CONFERENCE_NAME = {Summer School LEX2010-Managing Legal Resources in the Semantic Web, Session "Ontology in the Legal Domain"}, CONFERENCE_PLACE = {Ravenna}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_M_112957, AUTHOR = {Montemagni, S.}, TITLE = {Tecnologie linguistico-computazionali per il monitoraggio della lingua italiana}, YEAR = {2010}, KEYWORDS = {Language Variation, Natural Language Processing}, URL = {https://publications.cnr.it/doc/112957}, CONFERENCE_NAME = {Giornata di Studio "Lo stato della lingua. Il CNR e l'italiano nel terzo millennio" organizzata dal Consiglio Nazionale delle Ricerche-Dipartimento Identità Culturale}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_M_112958, AUTHOR = {Montemagni, S.}, TITLE = {The BioLexicon: a Large-Scale Domain-Specific Lexical Resource for Biomedical Text Mining}, YEAR = {2010}, KEYWORDS = {Text Mining, Knowledge Extraction, Lexical Resources}, URL = {https://publications.cnr.it/doc/112958}, CONFERENCE_NAME = {LREC 2010 2nd Workshop on Building and evaluating resources for biomedical text mining}, CONFERENCE_PLACE = {Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_M_112962, AUTHOR = {Montemagni, S.}, TITLE = {Design, Construction and Use of an Italian Dependency Treebank: Methodological Issues and Empirical Results}, YEAR = {2010}, KEYWORDS = {Syntactic Annotation, Treebanks}, URL = {https://publications.cnr.it/doc/112962}, CONFERENCE_NAME = {TheCopenhagen Dependency Treebank Workshop on "Designing Treebanks"}, CONFERENCE_PLACE = {Copehagen (DK)}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_MWDN_112967, AUTHOR = {Montemagni, S. and Wieling, M. and De Jonge, B. and Nerbonne, J.}, TITLE = {Modelli di variazione dialettale e analisi dei tratti linguistici sottostanti: un nuovo approccio dialettometrico}, YEAR = {2010}, KEYWORDS = {Computational dialectology}, URL = {https://publications.cnr.it/doc/112967}, CONFERENCE_NAME = {XI Congresso Silfi-Congresso della Società Internazionale di Linguistica e Filologia Italiana}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {2010}, } @TECHREPORT{MONTEMAGNI_2010_TECHREPORT_M_157485, AUTHOR = {Montemagni, S.}, TITLE = {Computational Models of Dialectal Variation and Underlying Linguistic Features}, YEAR = {2010}, KEYWORDS = {Computational Dialectology, Language Variation}, URL = {https://publications.cnr.it/doc/157485}, } @TECHREPORT{PIRRELLI_2010_TECHREPORT_PLMDGM_367784, AUTHOR = {Pirrelli, V. and Lenci, A. and Montemagni, S. and Dell'Orletta, F. and Giovannetti, E. and Marchi, S.}, TITLE = {ConnectToLife (modulo semantico)-Rapporto tecnico finale}, YEAR = {2010}, ABSTRACT = {Il presente documento costituisce il rapporto tecnico finale del progetto Connect-To-Life (modulo semantico) relativo alle attività svolte dall'unità ILC-CNR.}, KEYWORDS = {annotazione linguistica, estrazione di termini, clustering semantico, trattamento automatico della lingua, costruzione di ontologie}, PAGES = {16}, URL = {https://publications.cnr.it/doc/367784}, } @INCOLLECTION{AGNOLONI_2009_INCOLLECTION_ABFPMV_173012, AUTHOR = {Agnoloni, T. and Bacci, L. and Francesconi, E. and Peters, W. and Montemagni, S. and Venturi, G.}, TITLE = {A two-level Knowledge approach to support multilingual legislative drafting}, YEAR = {2009}, KEYWORDS = {DALOS project, Ontological-linguistic}, URL = {https://publications.cnr.it/doc/173012}, } @INCOLLECTION{DELLORLETTA_2009_INCOLLECTION_DLMMP_184585, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: una piattaforma linguistico-computazionale per l'estrazione di conoscenza da testi}, YEAR = {2009}, ABSTRACT = {The paper describes the automatic extraction of domain knowledge from Italian document collections and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Term extraction, Ontology Learning}, PAGES = {285-300}, URL = {https://publications.cnr.it/doc/184585}, PUBLISHER = {Bulzoni (Roma, ITA)}, ISBN = {978-88-7870-469-5}, EDITOR = {Ferrari, G. and Benatti, R. and Mosca, M.}, } @INCOLLECTION{LENCI_2009_INCOLLECTION_LMP_186141, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Annotazione sintattica di corpora: aspetti metodologici}, YEAR = {2009}, ABSTRACT = {Un assunto sempre più condiviso nell'ambito degli studi sull'acquisizione sia di L1 che di L2 è che l'evidenza empirica privilegiata debba essere rappresentata da corpora di produzioni scritte o orali degli apprendenti, estensivamente annotate a molteplici livelli di rappresentazione linguistica. Più in generale, corpora lemmatizzati e annotati a livello morfosintattico fanno ormai parte dello strumentario comune del linguista. Accanto ad essi, si fa però strada l'esigenza di disporre di risorse testuali più sofisticate dal punto di vista delle modalità di esplorazione linguistica, come ad esempio corpora annotati a livello sintattico (le cosiddette treebank). Questi consentono infatti di osservare i processi di convergenza degli apprendenti verso la lingua "obiettivo" anche a livello di specifici tratti grammaticali astratti o di macro-strutture linguistiche. L'articolo propone uno schema di annotazione sintattica caratterizzato da un doppio livello di codifica. Si tratta di un approccio originale che differisce dalla maggior degli schemi di annotazione sintattica esistenti per due aspetti: 1. la separazione della dimensione relazionale da quella a costituenti, che sono trattati a livelli di annotazione indipendenti, ma al tempo stesso correlati, in modo tale che lo stesso testo è simultaneamente interrogabile ai due livelli; 2. la rappresentazione a costituenti fornisce una rappresentazione del testo come sequenza di proto-costituenti sintagmatici non ricorsivi. Questa strategia di annotazione permette una fattorizzazione di diversi aspetti e dimensioni della struttura sintattica che risulta promettente da un lato per l'annotazione di corpora di lingua "non-standard" come quelli contenenti produzioni di apprendenti di L1 o L2, sia come punto di partenza per successivi processi di estrazione di informazione linguistica dal testo. Dopo aver illustrato le motivazioni sottostanti allo schema proposto, ciascun livello di rappresentazione (chunking e dipendenze funzionali) viene illustrato in dettaglio, mostrandone anche la possibilità di combinazione sullo stesso testo. L'articolo si chiude con la discussione di prospettive di uso di corpora annotati secondo lo schema di annotazione proposto.}, KEYWORDS = {Corpora annotati, annotazione sintattica}, PAGES = {25-46}, URL = {https://publications.cnr.it/doc/186141}, PUBLISHER = {Guerra Edizioni (Perugia, ITA)}, ISBN = {978-88-557-0168-6}, BOOKTITLE = {CORPORA DI ITALIANO L2: TECNOLOGIE, METODI, SPUNTI TEORICI}, EDITOR = {Andorno, C. and Rastelli, S.}, } @INCOLLECTION{LENCI_2009_INCOLLECTION_LMPV_136465, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Ontology learning from Italian legal texts}, YEAR = {2009}, ABSTRACT = {The paper reports on the methodology and preliminary results of a case study in automatically extracting ontological knowledge from Italian legislative texts. We use a fully-implemented ontology learning system (T2K) that includes a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine language learning. Tools are dynamically integrated to provide an incremental representation of the content of vast repositories of unstructured documents. Evaluated results, however preliminary, show the great potential of NLP-powered incremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.}, KEYWORDS = {Ontology Learning, document management, legal knowledge extraction}, PAGES = {75-94}, URL = {https://publications.cnr.it/doc/136465}, VOLUME = {188}, DOI = {10.3233/978-1-58603-942-4-75}, ISBN = {978-1-58603-942-4}, BOOKTITLE = {Law, Ontologies and the Semantic Web-Channelling the Legal Information Flood}, EDITOR = {Breuker, J. and Casanovas, P. and Klein, M. C. A. and Francesconi, E.}, } @EDITORIAL{CASELLAS_2009_EDITORIAL_CFHM_143540, AUTHOR = {Casellas, N. and Francesconi, E. and Hokstra, R. and Montemagni, S.}, TITLE = {Proceedings of the 3rd Workshop on Legal Ontologies and Artificial Intelligence Techniques (LOAIT '09) joint with the 2nd Workshop on Semantic Processing of Legal Texts}, YEAR = {2009}, URL = {https://publications.cnr.it/doc/143540}, VOLUME = {2}, } @INPROCEEDINGS{VENTURI_2009_INPROCEEDINGS_VLMVSTA_173712, AUTHOR = {Venturi, G. and Lenci, A. and Montemagni, S. and Vecchi, E. M. and Sagri, M. T. and Tiscornia, D. and Agnoloni, T.}, TITLE = {Towards a FrameNet Resource for the Legal Domain}, YEAR = {2009}, KEYWORDS = {Frame Semantics, Legal Ontologies, Knowledge Representation, Corpus Annotation}, URL = {https://publications.cnr.it/doc/173712}, CONFERENCE_NAME = {3rd Workshop on Legal Ontologies and Artificial Intelligence Techniques joint with 2nd Workshop on Semantic Processing of Legal text}, CONFERENCE_PLACE = {Barcelona, Spain}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{VENTURI_2009_INPROCEEDINGS_VMMSTMA_84736, AUTHOR = {Venturi, G. and Montemagni, S. and Marchi, S. and Sasaki, Y. and Thompson, P. and McNaught, J. and Ananiadou, S.}, TITLE = {Bootstrapping a Verb Lexicon for Biomedical Information Extraction}, YEAR = {2009}, ABSTRACT = {The extraction of information from texts requires resources that contain both syntactic and semantic properties of lexical units. As the use of language in specialized domains, such as biology, can be very different to the general domain, there is a need for domain-specific resources to ensure that the information extracted is as accurate as possible. We are building a large-scale lexical resource for the biology domain, providing information about predicate-argument structure that has been bootstrapped from a biomedical corpus on the subject of E. Coli. The lexicon is currently focussed on verbs, and includes both automatically-extracted syntactic subcategorization frames, as well as semantic event frames that are based on annotation by domain experts. In addition, the lexicon contains manually-added explicit links between semantic and syntactic slots in corresponding frames. To our knowledge, this lexicon currently represents a unique resource within in the biomedical domain.}, KEYWORDS = {domain-specific lexical resources, Biological Language Processing, syntax-semantic linking}, PAGES = {137-148}, URL = {https://publications.cnr.it/doc/84736}, DOI = {10.1007/978-3-642-00382-0_11}, PUBLISHER = {Springer-Verlag (Berlin Heidelberg, DEU)}, ISBN = {9783642003813}, CONFERENCE_NAME = {10th International Conference on Intelligent Text Processing and Computational Linguistics}, CONFERENCE_PLACE = {Mexico City, Mexico}, CONFERENCE_DATE = {1-7/03/2009}, } @INPROCEEDINGS{SPINOSA_2009_INPROCEEDINGS_SGCMVM_130118, AUTHOR = {Spinosa, P. and Giardiello, G. and Cherubini, M. and Marchi, S. and Venturi, G. and Montemagni, S.}, TITLE = {NLP–based Metadata Extraction for Legal Text Consolidation}, YEAR = {2009}, KEYWORDS = {Natural Language Processing, textual amendments, XML representation, metadata extraction, consolidation of legal text}, URL = {https://publications.cnr.it/doc/130118}, CONFERENCE_NAME = {Twelfth International Conference on Artificial Intelligence and Law (ICAIL 2009)}, CONFERENCE_PLACE = {Barcelona}, CONFERENCE_DATE = {June 8-12, 2009}, } @INPROCEEDINGS{VENTURI_2009_INPROCEEDINGS_VMMSTMA_112956, AUTHOR = {Venturi, G. and Montemagni, S. and Marchi, S. and Sasaki, Y. and Thompson, P. and McNaught, J. and Ananiadou, S.}, TITLE = {Bootstrapping a Verb Lexicon for Biomedical Information Extraction}, YEAR = {2009}, ABSTRACT = {The extraction of information from texts requires resources that contain both syntactic and semantic properties of lexical units. As the use Of language in specialized domains, such as biology, can be very different to the general domain, there is a need for domain-specific resources to ensure that the information extracted is as accurate as possible. We are building a large-scale lexical resource for the biology domain. providing information about predicate-argument structure that has been bootstrapped from a biomedical corpus on the subject of E. Coli. The lexicon is currently focussed on verbs, and includes both automatically-extracted syntactic subcategorization frames, as well as semantic event frames that are based on annotation by domain experts. In addition, the lexicon contains manually-added explicit links between semantic and syntactic slots in corresponding frames. To Our knowledge, this lexicon currently represents a unique resource within in the biomedical domain.}, KEYWORDS = {domain-specific lexical resources, lexical acquisition, syntax-semantics linking, Information Extraction, Biological Language Processing}, PAGES = {137-148}, URL = {https://publications.cnr.it/doc/112956}, VOLUME = {5449}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, ISBN = {978-3-642-00381-3}, CONFERENCE_NAME = {International Conference on Intelligent Text Processing and Computational Linguistics (CICLing 2009)}, CONFERENCE_PLACE = {Mexico City, Mexico}, CONFERENCE_DATE = {March 1-7, 2009}, BOOKTITLE = {Proceedings of the 10th International Conference on Intelligent Text Processing and Computational Linguistics (CICLing 2009)}, EDITOR = {Gelbukh, A.}, } @MISC{CASELLAS_2009_MISC_CFHM_157461, AUTHOR = {Casellas, N. and Francesconi, E. and Hoekstra, R. and Montemagni, S.}, TITLE = {3rd Workshop on Legal Ontologies and Artificial Intelligence Techniques joint with 2nd Workshop on Semantic Processing of Legal text}, YEAR = {2009}, KEYWORDS = {Legal Ontologies, Computational Semantics}, URL = {https://publications.cnr.it/doc/157461}, } @ARTICLE{DELLORLETTA_2008_ARTICLE_DLMMPV_64541, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio}, YEAR = {2008}, ABSTRACT = {The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Natural Language Processing, Machine Learning, Knowledge extraction from texts, Ontology learning, Legal ontologies}, PAGES = {197-218}, URL = {https://publications.cnr.it/doc/64541}, VOLUME = {26}, PUBLISHER = {Aida (Roma, Italia)}, ISSN = {1594-2201}, JOURNAL = {Aida Informazioni (Online)}, } @ARTICLE{MONTEMAGNI_2008_ARTICLE_M_64543, AUTHOR = {Montemagni, S.}, TITLE = {The space of Tuscan dialectal variation. A correlation study}, YEAR = {2008}, ABSTRACT = {The paper illustrates the results of a correlation study focusing on linguistic variation in an Italian region, Tuscany. By exploiting a multi-level representation scheme of dialectal data, the study analyses attested patterns of phonetic and morpho-lexical variation with the aim of testing the degree of correlation between a) phonetic and morpho-lexical variation, and b) linguistic variation and geographic distance. The correlation analysis was performed by combining two complementary approaches proposed in dialectometric literature, namely by computing both global and place-specific correlation measures and by inspecting their spatial distribution. Achieved results demonstrate that phonetic and morpho-lexical variations in Tuscany seem to follow a different pattern than encountered in previous studies.}, KEYWORDS = {Computational dialectology, Dialectometry}, PAGES = {135-152}, URL = {http://www.euppublishing.com/doi/abs/10.3366/E1753854809000354}, VOLUME = {2}, DOI = {10.3366/E1753854809000354}, PUBLISHER = {Edinburgh University Press for the Association for History and Computing (Edinburgh, Regno Unito)}, ISSN = {1753-8548}, JOURNAL = {International journal of humanities and arts computing (Print)}, } @INCOLLECTION{MONTEMAGNI_2008_INCOLLECTION_M_136460, AUTHOR = {Montemagni, S.}, TITLE = {Analisi linguistico-computazionali del corpus dialettale dell'Atlante Lessicale Toscano. Primi risultati sul rapporto toscano-italiano}, YEAR = {2008}, KEYWORDS = {Corpus dialettale}, URL = {https://publications.cnr.it/doc/136460}, PUBLISHER = {Pacini (Pisa, ITA)}, } @INPROCEEDINGS{DELLORLETTA_2008_INPROCEEDINGS_DLMMPV_84707, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio}, YEAR = {2008}, ABSTRACT = {The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Natural Language Processing, Machine Learning, Knowledge extraction from texts, Ontology learning, Legal ontologies}, PAGES = {197-218}, URL = {http://www.assiterm91.it/wp-content/uploads/2010/11/Convegno-2008.pdf}, VOLUME = {Anno 26, numero 1-2}, PUBLISHER = {Aida (Roma, Italia)}, ISSN = {1594-2201}, CONFERENCE_NAME = {Atti del Convegno Nazionale Ass. I. Term}, CONFERENCE_PLACE = {Arcavacata di Rende (CS)}, CONFERENCE_DATE = {5-7/06/2008}, BOOKTITLE = {Terminologia analisi testuale e documentazione nella città digitale}, } @INPROCEEDINGS{DELLORLETTA_2008_INPROCEEDINGS_DLMMPV_84698, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Marchi, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Acquiring Legal Ontologies from Domain-specific Texts}, YEAR = {2008}, ABSTRACT = {The paper reports on methodology and preliminary results ofa case study in automatically extracting ontological knowledgefrom Italian legislative texts in the environmental domain. Weuse a fully-implemented ontology learning system (T2K) thatincludes a battery of tools for Natural Language Processing(NLP), statistical text analysis and machine language learn-ing. Tools are dynamically integrated to provide an incremen-tal representation of the content of vast repositories of unstruc-tured documents. Evaluated results, however preliminary, arevery encouraging, showing the great potential of NLP-poweredincremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.}, KEYWORDS = {Ontology learning, Document management, knowledge extraction from texts, Natural Language Processing}, PAGES = {98-101}, URL = {https://publications.cnr.it/doc/84698}, CONFERENCE_NAME = {LangTech 2008}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {28-29/02/2008}, } @INPROCEEDINGS{GIOVANNETTI_2008_INPROCEEDINGS_GMM_84706, AUTHOR = {Giovannetti, E. and Marchi, S. and Montemagni, S.}, TITLE = {Combining statistical techniques and lexico-syntactic patterns for semantic relations extraction from text}, YEAR = {2008}, ABSTRACT = {We describe here a methodology to combine two different techniques for Semantic Relation Extraction from texts. On the one hand, generic lexicosyntactic patterns are applied to the linguistically analyzed corpus to detect a first set of pairs of co-occurring words, possibly involved in "syntagmatic" relations. On the other hand, a statistical unsupervised association system is used to obtain a second set of pairs of "distributionally similar" terms, that appear to occur in similar contexts, thus possibly involved in "paradigmatic" relations. The approach aims at learning ontological information by filtering the candidate relations obtained through generic lexico-syntactic patterns and by labelling the anonymous relations obtained through the statistical system. The resulting set of relations can be used to enrich existing ontologies and for semantic annotation of documents or web pages.}, KEYWORDS = {Ontology Learning from Text, Semantic Relation Extraction, Lexico-syntactic Patterns, Distributional Similarity}, URL = {http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_54.pdf}, CONFERENCE_NAME = {SWAP 2008-Semantic Web Applications and Perspectives}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {15-17 December 2008}, EDITOR = {Gangemi, A. and Keizer, J. and Presutti, V. and Stoermer, H.}, } @INPROCEEDINGS{GIOVANNETTI_2008_INPROCEEDINGS_GMMB_84726, AUTHOR = {Giovannetti, E. and Marchi, S. and Montemagni, S. and Bartolini, R.}, TITLE = {Ontology Learning and Semantic Annotation: a Necessary Symbiosis}, YEAR = {2008}, ABSTRACT = {Semantic annotation of text requires the dynamic merging of linguistically structured information and a "world model", usually represented as a domain-specific ontology. On the other hand, the process of engineering a domain-ontology through semi-automatic ontology learning system requires the availability of a considerable amount of semantically annotated documents. Facing this bootstrapping paradox requires an incremental process of annotation-acquisition-annotation, whereby domain-specific knowledge is acquired from linguistically-annotated texts and then projected back onto texts for extra linguistic information to be annotated and further knowledge layers to be extracted. The presented methodology is a first step in the direction of a full "virtuous" circle where the semantic annotation platform and the evolving ontology interact in symbiosis. As a case study we have chosen the semantic annotation of product catalogues. We propose a hybrid approach, combining pattern matching techniques to exploit the regular structure of product descriptions in catalogues, and Natural Language Processing techniques which are resorted to analyze natural language descriptions. The semantic annotation involves the access to the ontology, semi-automatically bootstrapped with an ontology learning tool from annotated collections of catalogues.}, KEYWORDS = {Information Extraction, Information Retrieval, Ontologies, Tools, Systems}, PAGES = {2079-2085}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {2008}, BOOKTITLE = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Tapias, D.}, } @INPROCEEDINGS{LENCI_2008_INPROCEEDINGS_LMPM_84730, AUTHOR = {Lenci, A. and McGillivray, B. and Pirrelli, V. and Montemagni, S.}, TITLE = {Unsupervised Acquisition of Verb Subcategorization Frames from Shallow-Parsed Corpora}, YEAR = {2008}, KEYWORDS = {Acquisition, Machine Learning, Corpus (creation, annotation, etc.), Lexicon, Lexical database}, URL = {https://publications.cnr.it/doc/84730}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{SASAKI_2008_INPROCEEDINGS_SMPRMA_84703, AUTHOR = {Sasaki, Y. and Montemagni, S. and Pezik, P. and Rebholz Schuhmann, D. and McNaught, J. and Ananiadou, S.}, TITLE = {BioLexicon: A Lexical Resource for the Biology Domain}, YEAR = {2008}, KEYWORDS = {BioLexicon, Terminological verbs}, URL = {https://publications.cnr.it/doc/84703}, CONFERENCE_NAME = {Third International Symposium on Semantic Mining in Biomedicine}, CONFERENCE_PLACE = {Turku, Finland}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{THOMPSON_2008_INPROCEEDINGS_TCAMMTV_84704, AUTHOR = {Thompson, P. and Cotter, P. and Ananiadou, S. and McNaught, J. and Montemagni, S. and Trabucco, A. and Venturi, G.}, TITLE = {Building a Bio-Event Annotated Corpus for the Acquisition of Semantic Frames from Biomedical Corpora}, YEAR = {2008}, KEYWORDS = {Corpus (creation, annotation, etc.), Text mining, Semantics, Event Extraction}, PAGES = {2159-2166}, URL = {https://publications.cnr.it/doc/84704}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resouces and Evaluation}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {28-30 maggio 2014}, } @INPROCEEDINGS{THOMPSON_2008_INPROCEEDINGS_TVMMA_84705, AUTHOR = {Thompson, P. and Venturi, G. and McNaught, J. and Montemagni, S. and Ananiadou, S.}, TITLE = {Categorising Modality in Biomedical Texts}, YEAR = {2008}, ABSTRACT = {The accurate recognition of modal information is vital for the correct interpretation of statements. In this paper, we report on the collection a list of words and phrases that express modal information in biomedical texts, and propose a categorisation scheme according to the type of information conveyed. We have performed a small pilot study through the annotation of 202 MEDLINE abstracts according to our proposed scheme. Our initial results suggest that modality in biomedical statements can be predicted fairly reliably though the presence of particular lexical items, together with a small amount of contextual information.}, KEYWORDS = {Biomedical texts, Modality}, PAGES = {27-34}, URL = {https://publications.cnr.it/doc/84705}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation: Workshop 'Building and Evaluating Resources for Biomedical Text Mining'}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26 maggio 2008}, } @INPROCEEDINGS{MONTEMAGNI_2008_INPROCEEDINGS_M_112936, AUTHOR = {Montemagni, S.}, TITLE = {Exploring the correlation between phonetic and lexical variation in Tuscany}, YEAR = {2008}, KEYWORDS = {Dialectal variation, ALT-Web}, URL = {https://publications.cnr.it/doc/112936}, CONFERENCE_NAME = {Thirteenth International Conference on Methods in Dialectology}, CONFERENCE_PLACE = {Leeds}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{REBHOLZSCHUHMANN_2008_INPROCEEDINGS_RPLDKSMMMCA_112935, AUTHOR = {Rebholz Schuhmann, D. and Pezik, P. and Lee, V. and Del Gratta, R. and Kim, J. and Sasaki, Y. and McNaught, J. and Montemagni, S. and Monachini, M. and Calzolari, N. and Ananiadou, S.}, TITLE = {BioLexicon: Towards a reference terminological resource in the biomedical domain}, YEAR = {2008}, ABSTRACT = {The BioLexicon is a publicly available large-scale terminological resource which brings together potential terms from several resources representing selected semantic types (genes, proteins, chemicals, species, enzymes, selected ontological terms). The schema of the BioLexicon enables improved resolution of term ambiguity and follows lexical standards for terminological resources.}, KEYWORDS = {BioLexicon}, URL = {https://publications.cnr.it/doc/112935}, ISBN = {978-1-61567-371-1}, CONFERENCE_NAME = {16th Annual International Conference on Intelligent Systems for Molecular Biology}, CONFERENCE_PLACE = {Toronto, Canada}, CONFERENCE_DATE = {19-23 Luglio 2008}, } @TECHREPORT{MONTEMAGNI_2008_TECHREPORT_M_157448, AUTHOR = {Montemagni, S.}, TITLE = {Augmented version of the bio-lexicon extended with bio event information and term-to-term weighted links}, YEAR = {2008}, KEYWORDS = {Bio-lexicon}, URL = {https://publications.cnr.it/doc/157448}, } @MISC{PIRRELLI_2008_MISC_PM_151569, AUTHOR = {Pirrelli, V. and Montemagni, S.}, TITLE = {AnITA}, YEAR = {2008}, KEYWORDS = {NLP Tools}, URL = {https://publications.cnr.it/doc/151569}, } @ARTICLE{DELLORLETTA_2007_ARTICLE_DFLMP_64537, AUTHOR = {Dell'Orletta, F. and Federico, M. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Maximum Entropy for Italian PoS Tagging}, YEAR = {2007}, ABSTRACT = {L'articolo illustra le prestazioni del ILC-UniPi MaxEnt PoS Tagger in Evalita 2007. The report contains a description of the ILC-UniPi MaxEnt PoS Tagger performance in Evalita 2007.}, PAGES = {10-11}, URL = {https://publications.cnr.it/doc/64537}, VOLUME = {IV(2)}, } @INCOLLECTION{DELLORLETTA_2007_INCOLLECTION_DLMP_136459, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Corpus-based Modelling of Grammar Variation}, YEAR = {2007}, KEYWORDS = {Grammar variation, stochastic parsing, linguistic typology}, PAGES = {38-55}, URL = {https://publications.cnr.it/doc/136459}, PUBLISHER = {Angeli (Milano, ITA)}, ISBN = {9788846489449}, BOOKTITLE = {Language resources and linguistic theory}, EDITOR = {Sansò, A.}, } @INPROCEEDINGS{AGNOLONI_2007_INPROCEEDINGS_ABFSTMV_171352, AUTHOR = {Agnoloni, T. and Bacci, L. and Francesconi, E. and Spinosa, P. and Tiscornia, D. and Montemagni, S. and Venturi, G.}, TITLE = {Building an ontological support for multilingual legislative drafting}, YEAR = {2007}, PAGES = {9-18}, URL = {https://publications.cnr.it/doc/171352}, CONFERENCE_NAME = {International Conference on Legal Knowledge and Information Systems (JURIX 2007)}, CONFERENCE_PLACE = {Leiden}, CONFERENCE_DATE = {2007}, BOOKTITLE = {Legal Knowledge and information Systems}, EDITOR = {Ar, L. and Mommers, L.}, } @INPROCEEDINGS{DELLORLETTA_2007_INPROCEEDINGS_DFLMP_84696, AUTHOR = {Dell'Orletta, F. and Federico, M. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Maximum Entropy for Italian PoS Tagging}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84696}, CONFERENCE_NAME = {Evaluation of NLP Tools for Italian-EVALITA 2007}, CONFERENCE_PLACE = {Roma}, } @INPROCEEDINGS{DELLORLETTA_2007_INPROCEEDINGS_DLMMP_84687, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: una piattaforma linguistico-computazionale per l'estrazione di conoscenza da testi}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84687}, CONFERENCE_NAME = {XL Congresso Internazionale di Studi della Società di Linguistica Italiana (SLI 2006)}, CONFERENCE_PLACE = {Roma}, } @INPROCEEDINGS{GIOVANNETTI_2007_INPROCEEDINGS_GMMB_84690, AUTHOR = {Giovannetti, E. and Marchi, S. and Montemagni, S. and Bartolini, R.}, TITLE = {Ontology-based Semantic Annotation of Product Catalogues}, YEAR = {2007}, ABSTRACT = {This paper describes a methodology for the semantic annotation of product catalogues. We propose a hybrid approach, combining pattern matching techniques to exploit the regular structure of product descriptions in catalogues, and Natural Language Processing techniques which are resorted to analyze natural language descriptions. It also includes the access to an application ontology, semi-automatically bootstrapped from collections of catalogues with an ontology learning tool, which is used to drive the semantic annotation process.}, KEYWORDS = {Semantic Annotation of texts, Ontology Learning, Information Extraction for e-commerce}, PAGES = {235-239}, URL = {https://publications.cnr.it/doc/84690}, CONFERENCE_NAME = {Recent Advances in Natural Language Processing (RANLP-2007)}, CONFERENCE_PLACE = {Borovets}, CONFERENCE_DATE = {27-29 settembre 2007}, BOOKTITLE = {Proceedings of the International Conference "Recent Advances in Natural Language Processing"}, } @INPROCEEDINGS{LENCI_2007_INPROCEEDINGS_LMPV_84693, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {NLP-based ontology learning from legal texts. A case study}, YEAR = {2007}, ABSTRACT = {The paper reports on the methodology and preliminary results of a case study in automatically extracting ontological knowledge from Italian legislative texts in the environmental domain. We use a fully-implemented ontology learning system (T2K) that includes a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine language learning. Tools are dynamically integrated to provide an incremental representation of the content of vast repositories of unstructured documents. Evaluated results, however preliminary, are very encouraging, showing the great potential of NLP-powered incremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.}, PAGES = {113-129}, URL = {https://publications.cnr.it/doc/84693}, CONFERENCE_NAME = {II Workshop on Legal Ontologies and Artificial Intelligence Techniques (LOAIT'07)}, CONFERENCE_PLACE = {Stanford}, CONFERENCE_DATE = {4 giugno 2007}, } @INPROCEEDINGS{MONTEMAGNI_2007_INPROCEEDINGS_M_84692, AUTHOR = {Montemagni, S.}, TITLE = {Patterns of phonetic variation in Tuscany: using dialectometric techniques on multi-level representations of dialectal data}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84692}, CONFERENCE_NAME = {International Workshop on Computational Phonology}, CONFERENCE_PLACE = {Borovets}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{MONTEMAGNI_2007_INPROCEEDINGS_M_84694, AUTHOR = {Montemagni, S.}, TITLE = {Aree fonetiche e lessicali toscane a confronto: prime elaborazioni computazionale dei dati dell’Atlante Lessicale Toscano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84694}, CONFERENCE_NAME = {XL Congresso Internazionale di Studi della Società di Linguistica Italiana}, CONFERENCE_PLACE = {Vercelli}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{MONTEMAGNI_2007_INPROCEEDINGS_M_84695, AUTHOR = {Montemagni, S.}, TITLE = {Acquisizione automatica di termini da testi: primi esperimenti di estrazione e strutturazione di terminologia metalinguistica}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84695}, CONFERENCE_NAME = {DLM su Lessicologia e metalinguaggio}, CONFERENCE_PLACE = {Macerata}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{SORIA_2007_INPROCEEDINGS_SBLMP_84682, AUTHOR = {Soria, C. and Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Automatic Extraction of Semantics in Law Documents}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84682}, CONFERENCE_NAME = {V Legislative XML Workshop}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {2007}, } @TECHREPORT{AITMOKHTAR_2007_TECHREPORT_ABBDGGMSS_157418, AUTHOR = {Ait Mokhtar, S. and Barker, E. and Brunelli, R. and Demetriou, G. and Gaizauskas, R. and Giovannetti, E. and Montemagni, S. and Sándor, A. and Sun, H.}, TITLE = {Semantic Annotation Services for Virtual Information and Knowledge Environments}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157418}, } @TECHREPORT{BOUQUET_2007_TECHREPORT_BSMGSNSBCJ_157419, AUTHOR = {Bouquet, P. and Stoermer, H. and Montemagni, S. and Giovannetti, E. and Semeraro, G. and Niederee, C. and Stecher, R. and Brunelli, R. and Chanod, J. P. and Jacquin, T.}, TITLE = {Semantic Representation and Management Report}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157419}, } @TECHREPORT{MONTEMAGNI_2007_TECHREPORT_MMVBBRPT_157440, AUTHOR = {Montemagni, S. and Marchi, S. and Venturi, G. and Bartolini, R. and Bertagna, F. and Ruffolo, P. and Peters, W. and Tiscornia, D.}, TITLE = {Report on Ontology learning tool and testing}, YEAR = {2007}, ABSTRACT = {This deliverable documents the work done within the DALOS EU project for what concerns the definition and implementation of methodologies and techniques to bootstrap terminological and ontological knowledge from domain corpora. Starting from a corpus of legacy legislative texts in different languages, linguistic technologies combined with statistical techniques have been used to extract significant terms as well as to structure them in conceptual structures for the different languages dealt with within the project, namely Italian, English, Spanish and Dutch.}, KEYWORDS = {Ontology Learning, Term Extraction, Natural Language Processing, Conceptual Indexing}, URL = {https://publications.cnr.it/doc/157440}, } @TECHREPORT{MONTEMAGNI_2007_TECHREPORT_MS_157420, AUTHOR = {Montemagni, S. and Simi, M.}, TITLE = {The Italian dependency annotated corpus developed for the CoNLL-2007 Shared Task}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157420}, } @TECHREPORT{MONTEMAGNI_2007_TECHREPORT_MTV_157421, AUTHOR = {Montemagni, S. and Trabucco, A. and Venturi, G.}, TITLE = {Bio-Event Linguistic Annotation Tool. User Manual}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157421}, } @TECHREPORT{MONTEMAGNI_2007_TECHREPORT_MTVTCAMKRP_157422, AUTHOR = {Montemagni, S. and Trabucco, A. and Venturi, G. and Thompson, P. and Cotter, P. and Ananiadou, S. and McNaught, J. and Kim, J. and Rebholz, D. and Pezik, P.}, TITLE = {Event annotation of domain corpora}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157422}, } @TECHREPORT{SASAKI_2007_TECHREPORT_SMAPMMP_157423, AUTHOR = {Sasaki, Y. and McNaught, J. and Ananiadou, S. and Pezik, P. and McGillivray, B. and Montemagni, S. and Pirrelli, V.}, TITLE = {Augmented Version of Bio-Lexicon}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157423}, } @MISC{PICCHI_2007_MISC_PMCSP_157436, AUTHOR = {Picchi, E. and Montemagni, S. and Cucurullo, S. and Sassolini, E. and Paoli, M.}, TITLE = {ALT-Web. Sito dell’Atlante Lessicale Toscano (ALT) in rete}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157436}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BGMMABSB_84664, AUTHOR = {Bartolini, R. and Giovannetti, E. and Marchi, S. and Montemagni, S. and Andreatta, C. and Brunelli, R. and Stecher, R. and Bouquet, P.}, TITLE = {Multimedia Information Extraction in Ontology-based Semantic Annotation of Product Catalogues}, YEAR = {2006}, ABSTRACT = {The demand for efficient methods for extracting knowledge from multimedia content has led to a growing research community investigating the convergence of multimedia and knowledge technologies. In this paper we describe a methodology for extracting multimedia information from product catalogues empowered by the synergetic use and extension of a domain ontology. The methodology was implemented in the Trade Fair Advanced Semantic Annotation Pipeline of the VIKE-framework.}, KEYWORDS = {Semantic Web Technologies, ontology creation, ontology extraction, ontology evolution, semantic annotation of multimedia content}, URL = {https://publications.cnr.it/doc/84664}, CONFERENCE_NAME = {SWAP 2006}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {18-20 December 2006}, } @INPROCEEDINGS{CUCURULLO_2006_INPROCEEDINGS_CMPPS_84629, AUTHOR = {Cucurullo, S. and Montemagni, S. and Paoli, M. and Picchi, E. and Sassolini, E.}, TITLE = {Dialectal resources on-line: the ALT-Web experience}, YEAR = {2006}, ABSTRACT = {The paper presents an on-line dialectal resource, ALT-Web, which gives access to the linguistic data of the Atlante Lessicale Toscano, a specially designed linguistic atlas in which lexical data have both a diatopic and diastratic characterisation. The paper focuses on: the dialectal data representation model; the access modalities to the ALT dialectal corpus; ontology-based search.}, KEYWORDS = {Computational dialectology, Dialectal databases, Construction of lexical resources}, PAGES = {1846-1851}, URL = {http://www.lrec-conf.org/lrec2006/}, VOLUME = {Proceedings}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {24-25-26 Maggio 2006}, BOOKTITLE = {Dialectal resources on-line: the ALT-Web experience}, } @INPROCEEDINGS{CUCURULLO_2006_INPROCEEDINGS_CMPPS_84661, AUTHOR = {Cucurullo, S. and Montemagni, S. and Paoli, M. and Picchi, E. and Sassolini, E.}, TITLE = {Atlante Dialettale in rete: ALT-Web}, YEAR = {2006}, ABSTRACT = {The paper presents an on-line dialectal resource, ALT-Web, which gives access to the linguistic data of the Lexical Atlas of Tuscany or Atlante Lessicale Toscano, a specially designed linguistic atlas in which lexical data have both a diatopic and diastratic characterisation. The paper illustrates ALT-Web with particular emphasis on: 1) the dialectal data representation model; 2) the access modalities to the ALT dialectal corpus designed to produce an output tailored to the specific needs of the different classes of users (both professionals and common citizens); 3) ontology-based search. These represent three main features which differentiate ALT-Web both from the previous digitalised ALT version and, most interestingly, from other on-line dialectal resources. At the time of writing, this is the first resource of this type in Italy, and one of the few at the international level.}, KEYWORDS = {dialectal resources, information retrieval}, PAGES = {661-672}, URL = {http://www.euralex.org/publications/}, VOLUME = {2}, PUBLISHER = {Edizioni dell'ORSO (Alessandria, ITA)}, ISBN = {8876949186}, CONFERENCE_NAME = {12° EURALEX International Congress}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {6-9 Settembre 2006}, BOOKTITLE = {Proceedings in 12° EURALEX International Congress, Congresso internazionale di lessicografia}, EDITOR = {Corino, E. and Marello, C. and Onesti, C.}, } @INPROCEEDINGS{DELLORLETTA_2006_INPROCEEDINGS_DLMP_84630, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Searching treebanks for functional constraints: cross-lingual experiments in grammatical relation assignment}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84630}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, } @INPROCEEDINGS{DELLORLETTA_2006_INPROCEEDINGS_DLMP_84660, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Probing the space of grammatical variation: induction of cross-lingual grammatical constraints from treebanks}, YEAR = {2006}, ABSTRACT = {The paper reports on a detailed quantitative analysis of distributional language data of both Italian and Czech, highlighting the relative contribution of a number of distributed grammatical factors to sentence-based identification of subjects and direct objects. The work uses a Maximum Entropy model of stochastic resolution of conflicting grammatical constraints and is demonstrably capable of putting explanatory theoretical accounts to the test of usage-based empirical verification.}, PAGES = {21-28}, URL = {https://publications.cnr.it/doc/84660}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {1-932432-78-7}, CONFERENCE_NAME = {Coling/ACL 2006}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {22 July 2006}, BOOKTITLE = {Proceedings of the Workshop on Frontiers in Linguistically Annotated Corpora 2006 (LAC 06)}, } @INPROCEEDINGS{MONTEMAGNI_2006_INPROCEEDINGS_MPP_84659, AUTHOR = {Montemagni, S. and Paoli, M. and Picchi, E.}, TITLE = {ALT-WEB: l’'Atlante Lessicale Toscano in rete}, YEAR = {2006}, ABSTRACT = {Scopo dell'articolo è la presentazione di ALT-Web, ovvero l'Atlante Lessicale Toscano in rete. ALT-Web è stato ideato per rendere il patrimonio linguistico-culturale testimoniato dall'Atlante Lessicale Toscano una risorsa educativa realmente disponibile in modo che possa fornire un contributo alla conservazione della memoria dell'identità culturale toscana e al contempo costituisca un prezioso punto di riferimento per lo studio di dinamiche linguistiche sia a livello areale sia a livello socio-culturale. La sua collocazione in rete porta inevitabilmente ALT-Web a rivolgersi a una vasta gamma di utenti non più circoscritta agli addetti ai lavori (ovvero dialettologi, linguisti, etno-linguisti), ma che include anche insegnanti, operatori culturali (ad esempio, personale di musei e di istituzioni culturali pubbliche e private) fino al cittadino navigatore di Internet che voglia capire di più della propria identità linguistica e culturale. Il vasto e variegato bacino di utenza a cui intende rivolgersi ALT-Web ha portato alla trasformazione della versione informatizzata dell'Atlante Lessicale Toscano (conosciuta come DBT-ALT) in una rete ipertestuale con modalità e funzionalità di accesso differenziate in relazione alle diverse classi di utenza; a questo aspetto, è legata l'altra interpretazione dell'acronimo ALT-Web, ovvero quella di "ALT come rete". L'articolo illustra aspetti del processo di progettazione e realizzazione dell'opera che rivestono un qualche interesse per il linguista e il dialettologo. In particolare, dopo un breve excursus che riepiloga le caratteristiche principali della risorsa di partenza, l'articolo illustra la progettazione e realizzazione di ALT-Web, partendo dall'analisi dei requisiti e la definizione delle caratteristiche generali per arrivare ad aspetti più specifici che riguardano le modalità di accesso ai materiali e la normalizzazione dei materiali dialettali in trascrizione fonetica.}, KEYWORDS = {Dialettologia Computazionale-Risorse dialettali in rete-Atlante lessicale}, PAGES = {209-241}, URL = {https://publications.cnr.it/doc/84659}, PUBLISHER = {Antenore (Roma, ITA)}, ISBN = {88-8455-606-6}, CONFERENCE_NAME = {Lessicografia Dialettale. Ricordando Paolo Zolli. Atti del Convegno di Studi}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {9-11 dicembre 2004}, BOOKTITLE = {Lessicografia dialettale: ricordando Paolo Zolli-Atti del convegno di studi, Venezia, 9-11 dicembre 2004}, EDITOR = {Bruni, F. and Marcato, C.}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BGMMABSNBB_84663, AUTHOR = {Bartolini, R. and Giovannetti, E. and Marchi, S. and Montemagni, S. and Andreatta, C. and Brunelli, R. and Stecher, R. and Niederée, C. and Bouquet, P. and Bortoli, S.}, TITLE = {Ontology Learning in Multimedia Information Extraction from Product Catalogues}, YEAR = {2006}, ABSTRACT = {We propose a methodology for extracting multimedia information from product catalogues empowered by the synergetic use and extension of a domain ontology. The use of domain ontologies in this context additionally opens up innovative ways of catalogue use. The method is characterized by incrementally feeding and exploiting the ontology during an information extraction process, implemented by the semantic annotation of the analysed document, and by providing support for detecting existing similar ontologies to enable reuse of (parts of) them.}, KEYWORDS = {knowledge-drive multimedia analysis, ontology learning, semi-automatic content annotation tools}, URL = {https://publications.cnr.it/doc/84663}, CONFERENCE_NAME = {BOEMIE 2006}, CONFERENCE_PLACE = {Podebrady, Czech Republic}, CONFERENCE_DATE = {6 ottobre 2006}, } @INPROCEEDINGS{PIRRELLI_2006_INPROCEEDINGS_PLM_112916, AUTHOR = {Pirrelli, V. and Lenci, A. and Montemagni, S.}, TITLE = {Probing the space of grammatical variation: induction of cross-lingual grammatical constraints from treebanks}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/112916}, CONFERENCE_NAME = {Language resources and language research: typology, second language acquisition, English Linguistics}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {2006}, } @MISC{BARTOLINI_2006_MISC_BDLMMP_151563, AUTHOR = {Bartolini, R. and Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-to-Knowledge (T2K) Versione 2}, YEAR = {2006}, ABSTRACT = {Versione 2. Text-to-Knowledge (T2K) è una piattaforma software di supporto avanzato alla gestione documentale per la creazione dinamica di repertori terminologici e ontologie di dominio a partire da testi e per l'indicizzazione concettuale di documenti. Il sistema T2K si propone di offrire una batteria integrata di strumenti avanzati di analisi linguistica del testo, analisi statistica e apprendimento automatico del linguaggio, destinati a offrire una rappresentazione accurata del contenuto di una base documentale non strutturata, per scopi di indicizzazione avanzata e navigazione intelligente. I risultati di questo processo di acquisizione sono annotati in forma di metadati XML, offrendo in tal modo la prospettiva di una sempre crescente e diretta interoperabilità con sistemi automatici per la produzione di contenuti digitali selezionati e strutturati dinamicamente su misura, per diversi profili di utenza. Versioni prototipali di T2K sono già operative su alcuni portali della pubblica amministrazione e sono state applicate per l'indicizzazione di contenuti didattici multimediali. E' in corso l'integrazione della tecnologia T2K nel sistema di gestione informatica di documentazione scientifica del CNR.}, KEYWORDS = {text to knowledge, nlp, estrazione terminologica, ontology learning, indicizzazione terminologica}, URL = {https://publications.cnr.it/doc/151563}, } @MISC{MONTEMAGNI_2006_MISC_M_151556, AUTHOR = {Montemagni, S.}, TITLE = {La Treebank Sintattico Semantica dell'Italiano del progetto SI-TAL}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151556}, } @MISC{PICCHI_2006_MISC_PMSCP_151557, AUTHOR = {Picchi, E. and Montemagni, S. and Sassolini, E. and Cucurullo, S. and Paoli, M.}, TITLE = {ALTWEB}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151557}, } @ARTICLE{LENCI_2005_ARTICLE_LMP_64502, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Acquiring and Representing Meaning: Theoretical and Computational Perspectives}, YEAR = {2005}, PAGES = {19-66}, URL = {https://publications.cnr.it/doc/64502}, VOLUME = {22-23}, } @BOOK{LENCI_2005_BOOK_LMP_136436, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Acquiring and Representing Word Meaning: Computational perspectives}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/136436}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Pisa-Roma, ITA)}, ISBN = {88-8147-413-1}, } @BOOK{LENCI_2005_BOOK_LMP_136437, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Testo e computer-Elementi di linguistica computazionale}, YEAR = {2005}, ABSTRACT = {In che modo il computer può aiutarci a comprendere come funziona la nostra lingua? Cosa significa analizzare un testo con l'aiuto di un calcolatore? In che misura possiamo estendere le potenzialità del computer rendendolo capace di interagire con gli utenti umani nella loro lingua' Queste e altre domande sono l'oggetto di indagine della linguistica computazionale, una disciplina che ha al suo centro proprio il rapporto tra lingua e computer. Il libro fornisce gli elementi di base della linguistica computazionale partendo da un interesse primario per il testo, la sua struttura e il suo contenuto. Il volume propone una sintesi equilibrata e accessibile tra sapere e fare, nozioni di base e loro applicazione, ed è destinato in primo luogo agli studenti delle facoltà umanistiche e scientifiche interessati all'interazione tra scienze umane e informatica, ma anche agli studiosi che vogliano imparare a usare il computer come strumento di ricerca sul linguaggio.}, KEYWORDS = {Linguistica Computazionale}, PAGES = {255}, URL = {https://publications.cnr.it/doc/136437}, PUBLISHER = {Carocci (Roma, ITA)}, ISBN = {8843034251}, } @EDITORIAL{PIRRELLI_2005_EDITORIAL_PM_146069, AUTHOR = {Pirrelli, V. and Montemagni, S.}, TITLE = {Acquisition and Representation of Word Meaning: Theoretical and computational perspectives}, YEAR = {2005}, KEYWORDS = {Lexical semantics, Distributional semantics, Lexicon acquisition}, URL = {https://publications.cnr.it/doc/146069}, VOLUME = {XXII-XXIII}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Pisa-Roma, ITA)}, ISBN = {88-8147-413-1}, } @INPROCEEDINGS{BARTOLINI_2005_INPROCEEDINGS_BGLMP_84576, AUTHOR = {Bartolini, R. and Giorgetti, D. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Automatic Incremental Term Acquisition from Domain Corpora}, YEAR = {2005}, ABSTRACT = {We describe a technique for the acquisition of terms from Italian domain text corpora, which relies both on sophisticated linguistic analysis and on statistical measures applied to linguistically processed text rather than to raw text as it is usually the case. The main advantage of this technique is that minimal a priori knowledge of term structure is required, thus allowing to explore and discover terms in a given domain without imposing a strict pattern matching structure on them, and also to easily extend it to different domains. The approach we present in this paper is incremental as it may be iterated to discover terms of increasing complexity built on top of terms discovered in the previous iteration. The reason why it is convenient to adopt such an incremental approach is that it allows to "clean" data from noise in the first step, elicitating the constituent terms, and then to refine term acquisition on "skimmed" term data.}, PAGES = {293-300}, URL = {https://publications.cnr.it/doc/84576}, CONFERENCE_NAME = {7th International conference on Terminology and Knowledge Engineering (TKE2005)}, CONFERENCE_PLACE = {Copenhagen}, CONFERENCE_DATE = {2005}, BOOKTITLE = {Proceedings of TKE 2005-7th International Conference on Terminology and Knowledge Engineering}, } @INPROCEEDINGS{BIAGIOLI_2005_INPROCEEDINGS_BFPMS_172458, AUTHOR = {Biagioli, C. and Francesconi, E. and Passerini, A. and Montemagni, S. and Soria, C.}, TITLE = {Automatic semantics extraction in law documents}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/172458}, CONFERENCE_NAME = {Tenth International Conference on Artificial Intelligence and Law (ICAIL 2005)}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{DELLORLETTA_2005_INPROCEEDINGS_DLMP_84579, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Climbing the path to grammar: a maximum entropy model of subject/object learning}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84579}, CONFERENCE_NAME = {Psychocomputational Models of Human Language Acquisition (PsychoCompLA-2005)}, CONFERENCE_PLACE = {Ann Arbour (USA)}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BGMM_157366, AUTHOR = {Bartolini, R. and Giorgetti, D. and Marchi, S. and Montemagni, S.}, TITLE = {ILC-CNR Contribution to Deliverable 4. 1}, YEAR = {2005}, ABSTRACT = {The goal of the semantic annotation is the annotation of entities and relations starting from input documents conformant with the harmonisation output schema as defined within WP3. This harmonisation schema will focus on the structural and logical organisation of the documents, while WP4 will concentrate on the annotation of textual entities and image elements. The results of semantic annotation are intended to populate the domain ontology.}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157366}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMMP_157367, AUTHOR = {Bartolini, R. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Personalizzazione degli Italian NLP tools}, YEAR = {2005}, ABSTRACT = {Il presente documento intende offrire criteri e risultati della fase di personalizzazione dei moduli per l'analisi automatica del testo (Italian NLP tools o "AnITA") all'interno dell'architettura prevista nell'ambito del progetto FuLL.}, KEYWORDS = {NLP}, PAGES = {13}, URL = {https://publications.cnr.it/doc/157367}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMP_157369, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Modellazione del motore sintattico e delle strutture dati di supporto}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157369}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMMP_157370, AUTHOR = {Bartolini, R. and Lenci, L. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: Acquisizione semi-automatica di ontologie per l'indicizzazione semantica di documenti}, YEAR = {2005}, ABSTRACT = {Text-2-Knowledge, Acquisizione semi-automatica di ontologie per l'indicizzazione semantica di documenti}, KEYWORDS = {nlp, terminology extraction}, URL = {https://publications.cnr.it/doc/157370}, } @TECHREPORT{CUCURULLO_2005_TECHREPORT_CMPPS_157373, AUTHOR = {Cucurullo, S. and Montemagni, S. and Paoli, M. and Picchi, E. and Sassolini, E.}, TITLE = {Atlante Lessicale Toscano in rete (ALT-Web). Relazione finale}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157373}, } @TECHREPORT{GIORGETTI_2005_TECHREPORT_GMM_157380, AUTHOR = {Giorgetti, D. and Marchi, S. and Montemagni, S.}, TITLE = {ILC-CNR Contribution to Deliverable 5. 1}, YEAR = {2005}, ABSTRACT = {This document describes the high level infrastructure designed as part of the project VIKEF for creating a Virtual Information and Knowledge Environment (VIKE), namely an environment made up of explicit representation of the information and knowledge implicitly contained in one or more collections of Information-Content-Knowledge (ICK) resources, and of a collection of services operating on this explicit representation of information and knowledge; it is a virtual environment, as the representation and the services for accessing information and knowledge is almost completely independent from the physical properties of the original data.}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157380}, } @TECHREPORT{MARCHI_2005_TECHREPORT_MM_157384, AUTHOR = {Marchi, S. and Montemagni, S.}, TITLE = {ILC-CNR Contribution to Deliverable 3. 1}, YEAR = {2005}, ABSTRACT = {This document presents the first set of knowledge and content acquisition components. Starting from the Annotation Schema definition, it will then describe the Harmonization support and the Annotation components, as well as the various resources needed all along the current chain.}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157384}, } @MISC{BARTOLINI_2005_MISC_BDGMLMP_151548, AUTHOR = {Bartolini, R. and Dell'Orletta, F. and Giorgetti, D. and Marchi, S. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-to-Knowledge (T2K)}, YEAR = {2005}, ABSTRACT = {Piattaforma di estrazione e indicizzazione terminologica.}, KEYWORDS = {NLP, estrazione terminologica}, URL = {https://publications.cnr.it/doc/151548}, } @MISC{BARTOLINI_2005_MISC_BMLMP_151550, AUTHOR = {Bartolini, R. and Marchi, S. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {NLPtools}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151550}, } @MISC{PICCHI_2005_MISC_PMSCP_151532, AUTHOR = {Picchi, E. and Montemagni, S. and Sassolini, E. and Cucurullo, S. and Paoli, M.}, TITLE = {ALTWEB}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151532}, } @INCOLLECTION{AGOSTINIANI_2004_INCOLLECTION_AMPP_136438, AUTHOR = {Agostiniani, L. and Montemagni, S. and Paoli, M. and Picchi, E.}, TITLE = {Lessicografia dialettale e computer: questioni di rappresentazione e recupero dei dati}, YEAR = {2004}, KEYWORDS = {Lessicografia computazionale, Lessicografia Dialettale}, URL = {https://publications.cnr.it/doc/136438}, PUBLISHER = {Centro Interuniversitario di Studi Veneti (Venezia, ITA)}, } @INCOLLECTION{BARTOLINI_2004_INCOLLECTION_BLMPS_30867, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Automatic Classification and Analysis of Provisions in Italian Legal Texts: A Case Study}, YEAR = {2004}, ABSTRACT = {In this paper we address the problem of automatically enriching legal texts with semantic annotation, an essential pre–requisite to effective indexing and retrieval of legal documents. This is done through illustration of SALEM (Semantic Annotation for LEgal Management), a computational system developed for automated semantic annotation of (Italian) law texts. SALEM is an incremental system using Natural Language Processing techniques to perform two tasks: i) classify law paragraphs according to their regulatory content, and ii) extract relevant text fragments corresponding to specific semantic roles that are relevant for the different types of regulatory content. The paper sketches the overall architecture of SALEM and reports results of a preliminary case study on a sample of Italian law texts.}, KEYWORDS = {Annotazione semantica, Classificazione automatica}, PAGES = {593-604}, URL = {https://rdcu.be/dftjm}, VOLUME = {3292}, DOI = {10.1007/978-3-540-30470-8_72}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-3-540-23664-1}, BOOKTITLE = {On the Move to Meaningful Internet Systems 2004: OTM 2004 Workshops. OTM 2004}, EDITOR = {Meersman, R. and Tari, Z. and Corsaro, A.}, } @INCOLLECTION{PAOLI_2004_INCOLLECTION_PMP_136444, AUTHOR = {Paoli, M. and Montemagni, S. and Picchi, E.}, TITLE = {ALT Web: l'Atlante Lessicale Toscano in rete}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/136444}, PUBLISHER = {Centro Interuniversitario di Studi Veneti (Venezia, ITA)}, } @INPROCEEDINGS{BARTOLINI_2004_INPROCEEDINGS_BLMP_84570, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Hybrid Constraints for Robust Parsing: First Experiments and Evaluation}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84570}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BARTOLINI_2004_INPROCEEDINGS_BLMPS_84571, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Semantic Mark-up of Italian Legal Texts Through NLP-based Techniques}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84571}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{HEPPLE_2004_INPROCEEDINGS_HIAMMG_84609, AUTHOR = {Hepple, M. and Ireson, N. and Allegrini, P. and Marchi, S. and Montemagni, S. and Gómez Hidalgo, J. M.}, TITLE = {NLP-enhanced Content filtering within the POESIA Project}, YEAR = {2004}, ABSTRACT = {This paper introduces the POESIA internet filtering system, which is open-source, and which combines standard filtering methods, such as positive/negative URL lists, with more advanced techniques, such as image processing and NLP-enhanced text filtering. The description here focusses on components providing textual content filtering for three European languages (English, Italian and Spanish), employing NLP methods to enhance performance. We address also the acquisition of language data needed to develop these filters, and the evaluation of the system and its components.}, KEYWORDS = {Image processing, Natural language processing systems, Open systems}, PAGES = {1967-1970}, URL = {https://www.aclweb.org/anthology/L04-1507/}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {26-28 May 2004}, BOOKTITLE = {Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC 2004)}, EDITOR = {Lino, M. T. and Xavier, M. F. and Ferreira, F. and Costa, R. and Silva, R.}, } @INPROCEEDINGS{PIRRELLI_2004_INPROCEEDINGS_PAM_112920, AUTHOR = {Pirrelli, V. and Allegrini, P. and Montemagni, S.}, TITLE = {Classifying text through time: a complexity science approach to dynamic web page filtering}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112920}, CONFERENCE_NAME = {International Conference on Text Mining (CIFT)}, CONFERENCE_PLACE = {La Rochelle Francia}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{PIRRELLI_2004_INPROCEEDINGS_PLM_112923, AUTHOR = {Pirrelli, V. and Lenci, A. and Montemagni, S.}, TITLE = {The lexicon in context: distributional evidence and representational issues}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112923}, CONFERENCE_NAME = {International Colloquium: Word Structure and Lexical Systems: models and applications}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {2004}, } @TECHREPORT{BARTOLINI_2004_TECHREPORT_BGLMP_157375, AUTHOR = {Bartolini, E. and Giorgetti, D. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: Acquisizione automatica di ontologie per l'indicizzazione semantica di documenti}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157375}, } @ARTICLE{ALLEGRINI_2003_ARTICLE_AMP_64466, AUTHOR = {Allegrini, P. and Montemagni, S. and Pirrelli, V.}, TITLE = {Example-based automatic induction of semantic classes through entropic scores}, YEAR = {2003}, ABSTRACT = {Abstract - The paper deals in some detail with the application of examplebased machine learning techniques to the task of automatically acquiring semantic information from functionally annotated texts. Special emphasis is placed on the use of “analogical proportions” as a means of structuring the knowledge embodied in attested examples, and weighing up their contribution to a variety of lexico-semantic classification tasks. Careful quantitative analysis of automatically acquired information proves to shed considerable light on the semantic inter-connectivity of input data, their structure and organising principles.}, PAGES = {1-45}, URL = {https://publications.cnr.it/doc/64466}, VOLUME = {16-17}, } @ARTICLE{LENCI_2003_ARTICLE_LMP_64476, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Chunk-it. An Italian shallow parser for robust syntactic annotation}, YEAR = {2003}, PAGES = {353-386}, URL = {https://publications.cnr.it/doc/64476}, VOLUME = {16-17}, } @ARTICLE{MONTEMAGNI_2003_ARTICLE_MBBCCLPZFMRBPSZMPD_64477, AUTHOR = {Montemagni, S. and Barsotti, F. and Battista, M. and Calzolari, N. and Corazzari, O. and Lenci, A. and Pirrelli, V. and Zampolli, A. and Fanciulli, F. and Massetani, M. and Raffaelli, R. and Basili, R. and Pazienza, M. T. and Saracino, D. and Zanzotto, F. and Mana, N. and Pianesi, F. and Delmonte, R.}, TITLE = {The syntactic-semantic Treebank of Italian. An Overview}, YEAR = {2003}, PAGES = {461-492}, URL = {https://publications.cnr.it/doc/64477}, VOLUME = {16-17}, } @ARTICLE{MONTEMAGNI_2003_ARTICLE_MPB_64478, AUTHOR = {Montemagni, S. and Picchi, E. and Biagini, L.}, TITLE = {DBT-ALT: a system for storing and querying the data of the 'Atlante Linguistico Toscano'}, YEAR = {2003}, ABSTRACT = {Abstract - Computers can help dialectologists to make full use of the information they have so laboriously and painstakingly acquired: the basic dimensions of dialectal research can be enlarged and its possible outcomes can become more sophisticated. In this paper, we describe a lexical database for dialectal data, DBT-ALT, which has been designed and constructed to contain linguistic data collected for the Atlante Lessicale Toscano (ALT), a lexical atlas of Tuscany. DBT-ALT is illustrated in detail, with particular emphasis on its search functions which allow for complex queries taking into account a wide range of parameters interactively defined by the user on the basis of his/her research interests.}, PAGES = {493-517}, URL = {https://publications.cnr.it/doc/64478}, VOLUME = {18-19}, } @INCOLLECTION{ALLEGRINI_2003_INCOLLECTION_ALMP_136427, AUTHOR = {Allegrini, P. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Le forme del significato. Acquisizione e rappresentazione dell'informazione semantica}, YEAR = {2003}, KEYWORDS = {Acquisizione, Semantica Lessicale, Ontologia, Machine Learning}, URL = {https://publications.cnr.it/doc/136427}, } @INCOLLECTION{MONTEMAGNI_2003_INCOLLECTION_MBBCCLZRPMD_136422, AUTHOR = {Montemagni, S. and Barsotti, F. and Battista, M. and Calzolari, N. and Corazzari, O. and Lenci, A. and Zampolli, A. and Raffaelli, R. and Pazienza, M. T. and Mana, N. and Delmonte, R.}, TITLE = {Building the Italian Syntactic-Semantic Treebank}, YEAR = {2003}, KEYWORDS = {Corpora testuali, Annot. sintattica, Annot. semantica, Treebank}, URL = {https://publications.cnr.it/doc/136422}, } @TECHREPORT{ALLEGRINI_2003_TECHREPORT_ACMMHIGCDP_157348, AUTHOR = {Allegrini, P. and Calzolari, N. and Marchi, S. and Montemagni, S. and Hepple, M. and Ireson, N. and Gomez Hidalgo, J. M. and Carrero Garcia, F. and De Buenaga Rodriguez, M. and Puera Sanz, E.}, TITLE = {POESIA Lexical Resources and Tools for Each Language}, YEAR = {2003}, ABSTRACT = {The aim of this report is to review the various resources that the different language processing sites expect to use in the development of their language-specific text filtering components. Some of the required resources are ones that were developed before Poesia, possibly by one the Poesia partners, or possibly elsewhere but being now in the public domain. Such resources may require adaptation to the Poesia task. Other resources required for Poesia will be developed as part of the project. In some cases, this development has already been done or is in progress, whilst in others, it is yet to be undertaken. In what follows, the status of each of the resources described will be made clear in terms of these alternatives.}, KEYWORDS = {Lexical Resources, nlp}, PAGES = {30}, URL = {https://publications.cnr.it/doc/157348}, } @TECHREPORT{STARYNKEVITCH_2002_TECHREPORT_SDTZHIGACMMG_430635, AUTHOR = {Starynkevitch, B. and Daoudi, M. and Tombelle, C. and Zheng, H. and Hepple, M. and Ireson, N. and Gomez Hildago, J. and Allegrini, P. and Calzolari, N. and Marchi, S. and Montemagni, S. and Guerra, S.}, TITLE = {POESIA Software Architecture Definition Document}, YEAR = {2002}, ABSTRACT = {Software Architecture Definition Document}, KEYWORDS = {NLP, Software Engineering}, PAGES = {68-80}, URL = {https://publications.cnr.it/doc/430635}, } @ARTICLE{PICCHI_2001_ARTICLE_PMB_64487, AUTHOR = {Picchi, E. and Montemagni, S. and Biagini, L.}, TITLE = {DBT-ALT: a System for Storying and Querying the Data of the Atlante Lessicale Toscano (ALT)}, YEAR = {2001}, KEYWORDS = {Atlanti linguistici, Dialettologia comput, Lessicografia dialet, Geolinguistica, Sociolinguistica}, PAGES = {85-103}, URL = {https://publications.cnr.it/doc/64487}, VOLUME = {9}, } @INPROCEEDINGS{PETERS_1994_INPROCEEDINGS_PFMZ_409402, AUTHOR = {Peters, C. and Federici, S. and Montemagni, S. and Zamorani, C. N.}, TITLE = {From machine readable dictionaries to lexicons for NLP: the cobuild dictionaries-a different approach}, YEAR = {1994}, ABSTRACT = {We describe the results of a syntactic-semantic parser for Cobuild dictionary definitions. Unlike previous work on the automatic analysis of machine readable dictionaries, the particular structure of the Cobuild definition allows us to derive information that classifies the lexical item mainly in terms of the selectional restrictions or preferences encoded on its arguments. The resulting formalized lexical entries contain data that has generally been lacking in other lexical representations but which is expected to be very useful in a wide range of NLP purposes. We show how this information can be used in dictionary sense disambiguation by creating links throughout the lexicon both on the paradigmatic and the syntagmatic axes.}, KEYWORDS = {Lexical databases, Information storage and retrieval. Dictionaries}, PAGES = {147-157}, URL = {https://publications.cnr.it/doc/409402}, CONFERENCE_NAME = {6th International Congress on Lexicography}, CONFERENCE_PLACE = {Amsterdam, The Netherlands}, CONFERENCE_DATE = {1994}, BOOKTITLE = {Euralex 1994 Proceedings}, EDITOR = {Martin, W.}, } @TECHREPORT{BARNBROOK_1994_TECHREPORT_BCFHMPSS_446186, AUTHOR = {Barnbrook, G. and Calzolari, N. and Federici, S. and Hoelter, M. and Montemagni, S. and Peters, C. and Schnelle, H. and Sinclair, J.}, TITLE = {ET10/51-Deliverable 8: Evaluation Report}, YEAR = {1994}, ABSTRACT = {The objective of the work in Pisa has been to translate and produce instantiations of the syntactically parsed definitions of the Cobuild dictionary: provided by Birmingham in a Typed Feature Structure formalism. However, as described in Methodology above, our results have been produced at two different levels: intermediate results; final results in the form of TFS entries. In the following, we will discuss briefly the possible applications of these different results for the three user types recognized in the introduction to this section: i. Human user ii. Human user-assisted by the machine iii. The machine Obviously, the discussion here below refers entirely to the results that would be obtained once the parser has been applied to the whole dictionary.}, KEYWORDS = {Language, Computational linguistics, Formal Definitions and Theory}, PAGES = {38}, URL = {https://publications.cnr.it/doc/446186}, } @TECHREPORT{CALZOLARI_1994_TECHREPORT_CFMP_446200, AUTHOR = {Calzolari, N. and Federici, S. and Montemagni, S. and Peters, C.}, TITLE = {ET-10/51-Final Report: Par. 3-Extracting, representing and using syntactic-semantic information from cobuild definitions}, YEAR = {1994}, ABSTRACT = {In May 1992 a new research project brought together the authors of this report. With the help and support of several other people and institutions, they worked steadily for two years, trying to improve the design and building of machine-usable lexicons, for automatic translation and many other applications. The starting point was clear. Around 1989 Helmut Schnelle of the Ruhr-Universitat Bochum became interested in the way in which words were defined in a new kind of dictionary called Cobuild. He thought that since theywere couched in sentences of apparently ordinary English, and had distinctive and repetitive shapes according to their meanings, it should be possible to represent them in logical form by means of regular rules.}, KEYWORDS = {Language, Computational linguistics, Formal Definitions and Theory, Semantics}, PAGES = {162}, URL = {https://publications.cnr.it/doc/446200}, }