@ARTICLE{ALZETTA_2023_ARTICLE_ADMPV_488202, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Miaschi, A. and Prat, E. and Venturi, G.}, TITLE = {Tell me how you write and I'll tell you what you read: a study on the writing style of book reviews}, YEAR = {2023}, ABSTRACT = {Purpose: The authors' goal is to investigate variations in the writing style of book reviews published on different social reading platforms and referring to books of different genres, which enables acquiring insights into communication strategies adopted by readers to share their reading experiences. Design/methodology/approach: The authors propose a corpus-based study focused on the analysis of A Good Review, a novel corpus of online book reviews written in Italian, posted on Amazon and Goodreads, and covering six literary fiction genres. The authors rely on stylometric analysis to explore the linguistic properties and lexicon of reviews and the authors conducted automatic classification experiments using multiple approaches and feature configurations to predict either the review's platform or the literary genre. Findings: The analysis of user-generated reviews demonstrates that language is a quite variable dimension across reading platforms, but not as much across book genres. The classification experiments revealed that features modelling the syntactic structure of the sentence are reliable proxies for discerning Amazon and Goodreads reviews, whereas lexical information showed a higher predictive role for automatically discriminating the genre. Originality/value: The high availability of cultural products makes information services necessary to help users navigate these resources and acquire information from unstructured data. This study contributes to a better understanding of the linguistic characteristics of user-generated book reviews, which can support the development of linguistically-informed recommendation services. Additionally, the authors release a novel corpus of online book reviews meant to support the reproducibility and advancements of the research.}, KEYWORDS = {Stylometric analysis, Genre detection, Natural language processing, Book reviews}, PAGES = {23}, URL = {https://www.emerald.com/insight/content/doi/10.1108/JD-04-2023-0073/full/html}, VOLUME = {79}, DOI = {10.1108/JD-04-2023-0073}, PUBLISHER = {Emerald (Bingley, Regno Unito)}, ISSN = {0022-0418}, JOURNAL = {Journal of documentation}, } @ARTICLE{BACCO_2023_ARTICLE_BDLMN_488201, AUTHOR = {Bacco, L. and Dell'Orletta, F. and Lai, H. and Merone, M. and Nissim, M.}, TITLE = {A text style transfer system for reducing the physician-patient expertise gap: An analysis with automatic and human evaluations}, YEAR = {2023}, ABSTRACT = {Physicians and patients often come from different backgrounds and have varying levels of education, which can result in communication difficulties in the healthcare process. To address this expertise gap, we present a "Text Style Transfer" system. Our system uses Semantic Textual Similarity techniques based on Sentence Transformers models to create pseudo-parallel datasets from a large, non-parallel corpus of lay and expert texts. This approach allowed us to train a denoising autoencoder model (BART), overcoming the limitations of previous systems. Our extensive analysis, which includes both automatic metrics and human evaluations from both lay (patients) and expert (physicians) individuals, shows that our system outperforms state-of-the-art models and is comparable to human-provided gold references in some cases.}, KEYWORDS = {Healthcare, Natural language processing, Text style transfer, Text simplification}, PAGES = {1-18}, URL = {https://www.sciencedirect.com/science/article/pii/S0957417423013763}, VOLUME = {233}, DOI = {10.1016/j.eswa.2023.120874}, PUBLISHER = {Pergamon (Oxford, Regno Unito)}, ISSN = {0957-4174}, JOURNAL = {Expert systems with applications}, } @ARTICLE{BIFFI_2023_ARTICLE_BGMS_490948, AUTHOR = {Biffi, M. and Guadagnini, E. and Montemagni, S. and Sassolini, E.}, TITLE = {Il lemmario del «GDLI»: dati quantitativi e prime osservazioni}, YEAR = {2023}, ABSTRACT = {Dopo la realizzazione della versione elettronica del solo testo del "Grande dizionario della lingua italiana" (GDLI), si è avviato un progetto di graduale informatizzazione della sua struttura. Questo articolo ne presenta il primo risultato, vale a dire l'estrazione automatica del lemmario che è così per la prima volta quantificabile e individuabile. Una prima parte del testo è dedicata all'illustrazione della strutturazione dei contenuti del dizionario e la loro rappresentazione secondo standard internazionalmente riconosciuti (XML-TEI); la seconda presenta una prima elaborazione dei dati del lemmario estratto; la terza propone una prima analisi comparativa con i lemmari di altri dizionari della lingua italiana.}, KEYWORDS = {Lessicografia, Lessicografia digitale, Lessicografia storica}, PAGES = {331-351}, URL = {https://accademiadellacrusca.it/it/riviste/articoli/slei-xl-2023/8679}, VOLUME = {40}, PUBLISHER = {Le Lettere (Firenze, Italia)}, ISSN = {0392-5218}, JOURNAL = {Studi di lessicografia italiana}, } @ARTICLE{BURGASSI_2023_ARTICLE_BG_478887, AUTHOR = {Burgassi, C. and Guadagnini, E.}, TITLE = {Per studiare il vocabolario del passato. La posizione delle parole in epoca storica}, YEAR = {2023}, ABSTRACT = {This paper aims to propose a new method for describing the lexicon of a language in a specific period of its history. The first paragraph outlines the two main ideas to be found in the studies concerning both synchronic and diachronic lexicology. In the second para-graph our method for lexical inquiry is presented along with its core concepts, such as textual Corpus Representativeness, Connotation, Connotation Rate (Quoziente Connota-tivo, QC) and word Position in the Center-Periphery Vocabulary Model. The third para-graph sketches two possible research lines, the first one regarding the lexicon of a given historical period (Old Italian), the second dealing with the comparison between two differ-ent linguistic historical phases (Old Italian vs. Contemporary Italian).}, KEYWORDS = {Historical Lexicology, Corpus Linguistics, Word Connotation, Word Position, Center-Periphery Vocabulary Model}, PAGES = {1-18}, URL = {https://revistas.uam.es/chimera/article/view/15698}, VOLUME = {10}, DOI = {10.15366/chimera2023.10.001}, PUBLISHER = {UAM ([Madrid], Spagna)}, ISSN = {2386-2629}, JOURNAL = {Chimera (Madrid)}, } @ARTICLE{CERULLI_2023_ARTICLE_CBD_491082, AUTHOR = {Cerulli, A. and Brunato, D. and Dell'Orletta, F.}, TITLE = {Linguistic Profile of a Text and Human Ratings of Writing Quality: a Case Study on Italian L1 Learner Essays}, YEAR = {2023}, ABSTRACT = {This paper presents a study based on the linguistic profiling methodology to explore the relationship between the linguistic structure of a text and how it is perceived in terms of writing quality by humans. The approach is tested on a selection of Italian L1 learners essays, which were taken from a larger longitudinal corpus of essays written by Italian L1 students enrolled in the first and second year of lower secondary school. Human ratings of writing quality by Italian native speakers were collected through a crowdsourcing task, in which annotators were asked to read pairs of essays and rated which one they believed to be better written. By analyzing these ratings, the study identifies a variety of linguistic phenomena spanning across distinct levels of linguistic description that distinguish the essays considered as 'winners' and evaluates the impact of students' errors on the human perception of writing quality.}, KEYWORDS = {human ratings, text quality, Natural Language Processing, learner corpus}, PAGES = {7-34}, URL = {https://www.ai-lc.it/wp-content/uploads/2023/09/IJCOL_9_1_1_cerulli_et_al.pdf}, VOLUME = {1}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{CHIARELLA_2023_ARTICLE_C_485365, AUTHOR = {Chiarella, D.}, TITLE = {Towards Multi-AUV Collaboration and Coordination: A Gesture-Based Multi-AUV Hierarchical Language and a Language Framework Comparison System}, YEAR = {2023}, ABSTRACT = {The underwater environment is a harmful environment, yet one of the richest and least exploited. For these reasons the idea of a robotic companion with the task of supporting and monitoring divers during their activities and operations has been proposed. However, the idea of a platoon of robots at the diver's disposal has never been fully addressed in these proposals due to the high cost of implementation and the usability, weight and bulk of the robots. Nevertheless, recent advancements in swarm robotics, materials engineering, deep learning, and the decreasing cost of autonomous underwater vehicles (AUVs), have rendered this concept increasingly viable. Therefore, this paper introduces, in the first part, a novel framework that integrates a revised version of a gesture-based language for underwater human-robot interaction (Caddian) based on insights gained from extensive field trials. The newly introduced objective of this framework is to enable the cooperation and coordination of an AUV team by one or more human operators, while allowing a human operator to delegate a robot leader to instruct the other robotic team members. The work, in the second part, provides an evaluation of the new language proposed thanks to a fifty million sentence corpus and describes a comparison framework, which is used to estimate it with respect to other existing underwater human-robot interaction languages.}, KEYWORDS = {gesture-based language, underwater human-robot interaction, multi-AUV collaboration, language corpora and resources}, PAGES = {28}, URL = {https://publications.cnr.it/doc/485365}, VOLUME = {11}, DOI = {10.3390/jmse11061208}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2077-1312}, JOURNAL = {Journal of marine science and engineering}, } @ARTICLE{ERJAVEC_2023_ARTICLE_EOOLSPRKBSCDDAVPDNLCRMKDRVMF_470080, AUTHOR = {Erjavec, T. and Ogrodniczuk, M. and Osenova, P. and Ljubesic, N. and Simov, K. and Pancur, A. and Rudolf, M. and Kopp, M. and Barkarson, S. and Steingrimsson, S. and Coltekin, C. and De Does, J. and Depuydt, K. and Agnoloni, T. and Venturi, G. and Perez, M. C. and De Macedo, L. D. and Navarretta, C. and Luxardo, G. and Coole, M. and Rayson, P. and Morkevicius, V. and Krilavicius, T. and Dargis, R. and Ring, O. and Van Heusden, R. and Marx, M. and Fiser, D.}, TITLE = {The ParlaMint corpora of parliamentary proceedings}, YEAR = {2023}, ABSTRACT = {This paper presents the ParlaMint corpora containing transcriptions of the sessions of the 17 European national parliaments with half a billion words. The corpora are uniformly encoded, contain rich meta-data about 11 thousand speakers, and are linguistically annotated following the Universal Dependencies formalism and with named entities. Samples of the corpora and conversion scripts are available from the project's GitHub repository, and the complete corpora are openly available via the CLARIN.SI repository for download, as well as through the NoSketch Engine and KonText concordancers and the Parlameter interface for on-line exploration and analysis.}, KEYWORDS = {Parlamentary proceedings, Linguistic annotation, Universal Dependencies}, PAGES = {1-34}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85124105199\&origin=inward}, DOI = {10.1007/s10579-021-09574-0}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{FOLESANI_2023_ARTICLE_FBPTMTZNCBRDCG_482226, AUTHOR = {Folesani, F. and Belvederi, M. M. and Puggioni, C. and Tiberto, E. and Marella, M. and Toffanin, T. and Zerbinati, L. and Nanni, M. G. and Caruso, R. and Brunato, D. and Ravelli, A. A. and Dell'Orletta, F. and Chochinov, H. M. and Grassi, L.}, TITLE = {Linguistic markers of demoralization improvement in schizophrenia: A pilot study}, YEAR = {2023}, ABSTRACT = {Background and objectives: Individuals with schizophrenia display language impairments involving pragmatics, semantics and syntax. Language impairments may show diagnostic specificity and could relate to the ability of engaging in psychotherapy. This pilot study sought to: (1) identify linguistic features that might differentiate individuals with schizophrenia from distressed controls without psychotic symptoms; and (2) examine the association between linguistic abilities and clinical changes during psychotherapy. Methods: We recruited patients with schizophrenia and a comparison group of individuals with demoralization and distress due to cancer. Participants underwent Dignity Therapy (DT), an existentially-oriented brief psychotherapy focused on legacy and subjective dignity. Verbatim transcripts of the DT sessions were analysed using Natural Language Processing (NLP). In addition, we measured changes in levels of demoralization and dignity-related distress before and after DT, exploring the association with linguistic variables with network analysis. Results: Patients with schizophrenia could be differentiated from those with cancer-related distress using only three out of 141 linguistic variables: total number of words, number of prepositional chains and conversational elements. Across groups, better levels of discourse coherence and higher number of arguments controlled by a predicate (verb "arity") were associated with larger improvements in demoralization and, indirectly, dignity-related distress. Conclusions: Reproducible linguistic markers may be able to differentiate individuals with schizophrenia from those with less severe psychopathology, and to predict better uptake of psychotherapy independent from diagnosis. Future studies should explore whether linguistic features derived from NLP may be exploited as accessible diagnostic or prognostic markers to tailor psychotherapy and other interventions in schizophrenia.}, KEYWORDS = {Schizophrenia, Dignity Therapy, Natural Language Processing, Linguistic Profiling, Psychotherapy}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85153800425\&origin=inward}, DOI = {10.1016/j.ejpsy.2023.03.001}, PUBLISHER = {European Journal of Psychiatry (Saragosse, Spagna)}, ISSN = {0213-6163}, JOURNAL = {The European journal of psychiatry}, } @ARTICLE{GUADAGNINI_2023_ARTICLE_G_490947, AUTHOR = {Guadagnini, E.}, TITLE = {Una breve storia del 'cadavere': caduti latini, corpi morti romanzi e una postilla dantesca}, YEAR = {2023}, ABSTRACT = {The designations of death, dying, and the dead have been extensively studied, especially since they are often subject to linguistic taboo and are therefore named through euphemisms and dysphemisms. This contribution will reconstruct the history of the lexical type cadaver, in parallel with corpus (mortuum), from ancient Latin to modern Romance languages: the 'X-phemic' model will be discussed, but the study will adopt a semasiological perspective.}, KEYWORDS = {Lexicology, Romance Linguistics, Corpse, Dante Alighieri, Corpo morto}, PAGES = {129-152}, URL = {https://edizionicafoscari.unive.it/en/edizioni4/riviste/transcript/2023/2/una-breve-storia-del-cadavere-caduti-latini-corpi/}, VOLUME = {2}, DOI = {10.30687/TranScript/2785-5708/2023/04/001}, PUBLISHER = {Edizioni Ca' Foscari (Venezia, Italia)}, ISSN = {2785-5708}, JOURNAL = {TranScript (Venezia)}, } @ARTICLE{GUADAGNINI_2023_ARTICLE_G_490949, AUTHOR = {Guadagnini, E.}, TITLE = {[recensione] Toscana bilingue (1260 ca.-1430 ca.). Per una storia sociale del tradurre medievale}, YEAR = {2023}, KEYWORDS = {Volgarizzamenti, Traduttologia, Storia medievale}, PAGES = {239-243}, URL = {https://publications.cnr.it/doc/490949}, VOLUME = {87}, ISSN = {0035-1458}, JOURNAL = {Revue de linguistique romane}, } @ARTICLE{MARZI_2023_ARTICLE_MMV_490328, AUTHOR = {Marzi, C. and Melloni, C. and Vender, M.}, TITLE = {Finger-tracking reading profiles in monolingual and bilingual early graders}, YEAR = {2023}, ABSTRACT = {In this paper we propose an analysis of the reading behaviour of a group of Italian monolingual (n= 24) and bilingual (n= 35) 2nd schoolgraders, engaged in the tasks of reading aloud lists of isolated words and nonwords (from the DDE-2 test battery), as well as narrative connected texts displayed on the touch-screen of a common tablet, to be read either aloud or silently. A finger-tracking technique is illustrated, which provides detailed information about the reading behaviour and attention focus of early graders. Our results reveal various differences between groups. In particular, a different tracking pattern emerged in reading long, morphologically-complex word forms, correlating with a higher decoding error rate and comprehension difficulties in bilingual children compared with their monolingual peers. We suggest that the unsteady, discontinuous reading pattern for long noun and verb forms may be due to a (proto)-morphological reading strategy, with monolingual children being more successful in benefiting from a morpheme-based reading route. We also discuss the potentials of the finger-tracking technique as a tool to offer a more profound and comprehensive analysis of the reading profiles of both monolingual and bilingual readers.}, KEYWORDS = {developing readers, bilingualism, L2 literacy, connected text reading, morphological processing, finger-tracking}, PAGES = {327-361}, URL = {https://www.rivisteweb.it/doi/10.1418/109051}, VOLUME = {XXII}, DOI = {10.1418/109051}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{MARZI_2023_ARTICLE_MP_485504, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {A discriminative information-theoretical analysis of the regularity gradient in inflectional morphology}, YEAR = {2023}, ABSTRACT = {Over the last decades, several independent lines of research in morphology have questioned the hypothesis of a direct correspondence between sublexical units and their mental correlates. Word and paradigm models of morphology shifted the fundamental part-whole relation in an inflection system onto the relation between individual inflected word forms and inflectional paradigms. In turn, the use of artificial neural networks of densely interconnected parallel processing nodes for morphology learning marked a radical departure from a morpheme-based view of the mental lexicon. Lately, in computational models of Discriminative Learning, a network architecture has been combined with an uncertainty reducing mechanism that dispenses with the need for a one-to-one association between formal contrasts and meanings, leading to the dissolution of a discrete notion of the morpheme.The paper capitalises on these converging lines of development to offer a unifying information-theoretical, simulation-based analysis of the costs incurred in processing (ir)regularly inflected forms belonging to the verb systems of English, German, French, Spanish and Italian. Using Temporal Self-Organising Maps as a computational model of lexical storage and access, we show that a discriminative, recurrent neural network, based on Rescorla-Wagner's equations, can replicate speakers' exquisite sensitivity to widespread effects of word frequency, paradigm entropy and morphological (ir)regularity in lexical processing. The evidence suggests an explanatory hypothesis linking Word and paradigm morphology with principles of information theory and human perception of morphological structure. According to this hypothesis, the ways more or less regularly inflected words are structured in the mental lexicon are more related to a reduction in processing uncertainty and maximisation of predictive efficiency than to economy of storage.}, KEYWORDS = {Morphological inflection, Morphological regularity, Prediction-driven processing, Discriminative learning, Lexical self-organisation, Gradient structure, Information theory, Non-linear modelling}, PAGES = {1-51}, URL = {https://doi.org/10.1007/s11525-023-09415-6}, DOI = {10.1007/s11525-023-09415-6}, PUBLISHER = {Springer (Heidelberg, Paesi Bassi)}, ISSN = {1871-5621}, JOURNAL = {Morphology (Dordrecht)}, } @ARTICLE{MAZZARINO_2023_ARTICLE_MM_483114, AUTHOR = {Mazzarino, S. and Marzi, C.}, TITLE = {Morphological processing in Italian L2 developing readers: a pilot study}, YEAR = {2023}, ABSTRACT = {In this paper we focus on the morphological competence and awareness of 23 Italian second-language (L2) school children, by comparing the reading profiles of Italian L1 and L2 children attending primary school from 2nd to 5th grades. Reading data were collected through the experimental finger-tracking protocol developed within the ReadLet project, which supports collecting and structuring behavioural reading data of short narrative texts displayed on a tablet touch-screen. The analyses reproduced the main effects that are well-attested in the developmental literature, and pointed out some differences in the behavioural profile of L2 versus L1 children, with the former being more affected by word length and frequency effects, as well as by the aloud reading task than the latter. Interestingly, however, a functional morphological segmentation strategy emerges in L2 readers processing complex inflected forms during the aloud reading task. We interpret it as a possible strategy to alleviate the extra cognitive load associated with the overt articulation of morphologically complex words within the context of a connected text.}, KEYWORDS = {reading, bilingualism, morphological awareness, developing readers, word processing}, PAGES = {143-166}, URL = {https://publications.cnr.it/doc/483114}, VOLUME = {XXII}, DOI = {10.1418/107679}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{MIASCHI_2023_ARTICLE_MABDV_488203, AUTHOR = {Miaschi, A. and Alzetta, C. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Testing the Effectiveness of the Diagnostic Probing Paradigm on Italian Treebanks}, YEAR = {2023}, ABSTRACT = {The outstanding performance recently reached by neural language models (NLMs) across many natural language processing (NLP) tasks has steered the debate towards understanding whether NLMs implicitly learn linguistic competence. Probes, i.e., supervised models trained using NLM representations to predict linguistic properties, are frequently adopted to investigate this issue. However, it is still questioned if probing classification tasks really enable such investigation or if they simply hint at surface patterns in the data. This work contributes to this debate by presenting an approach to assessing the effectiveness of a suite of probing tasks aimed at testing the linguistic knowledge implicitly encoded by one of the most prominent NLMs, BERT. To this aim, we compared the performance of probes when predicting gold and automatically altered values of a set of linguistic features. Our experiments were performed on Italian and were evaluated across BERT's layers and for sentences with different lengths. As a general result, we observed higher performance in the prediction of gold values, thus suggesting that the probing model is sensitive to the distortion of feature values. However, our experiments also showed that the length of a sentence is a highly influential factor that is able to confound the probing model's predictions.}, KEYWORDS = {Neural language model, Probing tasks, Treebanks}, PAGES = {19}, URL = {https://www.mdpi.com/2078-2489/14/3/144}, VOLUME = {14}, DOI = {10.3390/info14030144}, PUBLISHER = {MDPI (Basel, Svizzera)}, ISSN = {2078-2489}, JOURNAL = {Information (Basel)}, } @ARTICLE{MURANO_2023_ARTICLE_MQDRZ_485254, AUTHOR = {Murano, F. and Quochi, V. and Del Grosso, A. M. and Rigobianco, L. and Zinzi, M.}, TITLE = {Describing Inscriptions of Ancient Italy. The ItAnt Project and Its Information Encoding Process}, YEAR = {2023}, ABSTRACT = {This paper discusses the challenges addressed in the digital scholarly encoding of the fragmentary texts of the languages of Ancient Italy according to the TEI/EpiDoc Guidelines in XML format. This contribution describes the solutions and customisations that have been adopted for dealing with the peculiarities of our epigraphical documentation and with the formalisation of epigraphical information deemed interesting for data retrieval in a historical linguistic perspective. The making of a digital corpus consisting of new critical editions of selected inscriptions is a work carried out in the context of the project "Languages and Cultures of Ancient Italy. Historical Linguistics and Digital Models", which aims to investigate the languages of Ancient Italy by combining the traditional methods, proper to historical linguistics, with methods and technologies proper to the digital humanities and computational lexicography. More specifically, the purpose of the project is to create a collection of interrelated digital language resources which comprise: 1) the digital corpus of texts editions; 2) a computational lexicon compliant with the Web Semantic requirements; 3) a relevant bibliographic reference dataset encoded according to the FRBRoo/LRMoo specifications. Additionally, selected textual data and scientific interpretations will be encoded by using CIDOC CRM and its extensions, namely CRMtex and CRMinf. The present contribution tackles one of the main aspects of the project, and proposes significant innovations in the encoding of critical editions for epigraphic texts of fragmentary languages, which will hopefully foster future interoperability and integration with other external datasets, a paramount concern of the project.}, KEYWORDS = {text encoding, ancient languages, digital epigraphy, TEI/EpiDoc}, PAGES = {15}, URL = {https://dl.acm.org/doi/pdf/10.1145/3606703}, VOLUME = {16}, DOI = {10.1145/3606703}, PUBLISHER = {Association for Computing Machinery (New York, NY, Stati Uniti d'America)}, ISSN = {1556-4711}, JOURNAL = {Journal on computing and cultural heritage (Online)}, } @ARTICLE{PROIETTI_2023_ARTICLE_PC_481807, AUTHOR = {Proietti, C. and Chiarella, D.}, TITLE = {The Role of Argument Strength and Informational Biases in Polarization and Bi-Polarization Effects}, YEAR = {2023}, ABSTRACT = {This simulation research explores the informational causes of polarization and bi-polarization of opinions within groups. We define 'polarization' here as a uniform change of the opinion of the whole group in the same direction, whereas 'bi-polarization' indicates a split of two subgroups towards opposite directions. For our purposes, we have expanded the model of the Argument Communication Theory of Bi-polarization. This is an argument-based multi-agent model of opinion dynamics inspired by Persuasive Argument Theory. The original model accounts for polarization as an outcome of pure informational influence and reproduces bipolarization effects by postulating an additional mechanism of homophilous selection of communication partners. The expanded model adds two dimensions: i.e., argument strength and more sophisticated protocols of informational influence (argument communication and opinion update). Adding the first dimension, allows us to investigate whether and how the presence of stronger or weaker arguments in a discussion influences polarization and bi-polarization dynamics, as suggested by the original framework of Persuasive Arguments Theory. The second feature allows us to test whether other mechanisms related to confirmation bias and epistemic vigilance can act as a driving force of bi-polarization. For the first issue, our simulations showed that argument strength has a measurable effect. For the second, our results would indicate that, in absence of homophily, only very strong types of informational bias can lead to bi-polarization.}, KEYWORDS = {Argumentation, Argument Communication Theory, Polarization, Bi-Polarization, Epistemic Vigilance, Opinion dynamics}, PAGES = {25}, URL = {https://www.jasss.org/26/2/5.html}, VOLUME = {26}, DOI = {10.18564/jasss.5062}, PUBLISHER = {SimSoc Consortium ([Guildford], Regno Unito)}, ISSN = {1460-7425}, JOURNAL = {JASSS (Guildf.)}, } @ARTICLE{SALES_2023_ARTICLE_SATD_488204, AUTHOR = {Sales, S. S. and Alzetta, C. and Tatay, C. M. and Dell'Orletta, F.}, TITLE = {Analysing Deception in Witness Memory Though Linguistic Styles in Spontaneous Language}, YEAR = {2023}, ABSTRACT = {The act of lying and its detection have raised interest in many fields, from the legal system to our daily lives. Considering that testimonies are commonly based on linguistic parameters, natural language processing, a research field concerned with programming computers to process and analyse natural language texts or speech, is a topic of interest on this front. This study aimed to examine the linguistic styles of simulated deception and true testimonies collected with the aim of studying witness memory. Study participants were asked to act as a witness of a crime by retelling the story they had just read. Cognitive interviewing techniques were used to collect testimony under two conditions: truth and simulated deception. A sample of 48 participants volunteered to participate in the study. Analyses of the linguistic indicators and content were carried out. Specifically, we performed a comparison of testimonies of the same participant by condition to analyse the variation between (i) lexical and (ii) linguistic features and (iii) content and speech characteristics (disfluencies) depending on the narrative condition. Concerning lexical properties, adjectives were the most-varying grammatical category between truthful and deceptive testimonies. Furthermore, in the linguistic analysis, we observed that truthful testimonies were generally longer than deceptive ones in terms of the number of words and sentences and also characterised by more articulated sentence structures, and these differences were also statistically significant. Regarding the analysis of the content, cognitive criteria (details) and admitting lack of memory were more present in truthful statements. By providing an objective measure, these results are of interest in developing NLP tools for assessing the credibility of testimonies in forensics.}, KEYWORDS = {Natural language processing, Simulated deception, Stylometric analysis}, PAGES = {26}, URL = {https://www.mdpi.com/2076-3425/13/2/317}, VOLUME = {13}, DOI = {10.3390/brainsci13020317}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2076-3425}, JOURNAL = {Brain sciences}, } @ARTICLE{VENUTI_2023_ARTICLE_VDBTPBCM_485318, AUTHOR = {Venuti, M. C. and Del Grosso, A. M. and Boschetti, F. and Tessarolo, L. and Prontera, A. and Bovet, D. and Cattaneo, G. and Melis, V.}, TITLE = {La 'Galassia MQDQ: ' un concetto di filologia tradizionale, digitale, sostenibile}, YEAR = {2023}, ABSTRACT = {The investigation of intertextuality within a corpus of Latin poetry is the main objective of the research functions of the so-called 'Musisque Deoque Galaxy': formular recurrences, and lexical and metric-verbal co-occurrences draw a dense network of relationships between texts, where poetic memory presents itself in various modalities. This contribution aims to analyse the latest developments in the "Musisque Deoque Galaxy" both from the point of view of the tools created to enable new textual acquisitions and new analyses, and from the technological point of view, with reference to the work carried out to ensure a long-term sustainability.}, KEYWORDS = {Digital textual corpora, Domain Specific Languages (DSL), Sustainability, Latin poetry, Carmina Latina Epigraphica, MQDQ Galaxy}, PAGES = {71-120}, URL = {https://hdl.handle.net/10278/5032220}, VOLUME = {4}, DOI = {10.30687/mag/2724-3923/2023/07/003}, PUBLISHER = {Edizioni Ca' Foscari (Venezia, Italia)}, ISSN = {2724-3923}, JOURNAL = {Magazèn}, } @BOOK{DUVAL_2023_BOOK_DG_489796, AUTHOR = {Duval, F. and Guadagnini, E.}, TITLE = {Le théâtre antique au Moyen Âge. Étude des mots et des concepts dans les textes en français et en italien}, YEAR = {2023}, ABSTRACT = {Longtemps s'est imposée l'idée d'une redécouverte tardive du théâtre antique après la longue parenthèse du Moyen Âge. Dans ce domaine, comme pour tant d'autres, l'« âge moyen » aurait représenté une coupure nette entre l'Antiquité, où le théâtre était une institution sociale répandue, et la Renaissance, qui aurait renoué avec les codes et pratiques antiques. Pour faire pièce à cette historiographie de la rupture, Frédéric Duval et Elisa Guadagnini ont recueilli et étudié les traces laissées par le théâtre antique dans l'encyclopédie et les langues vernaculaires du Moyen Âge. Le présent livre s'intéresse à l'histoire des langues et à l'histoire des idées, aux mots autant qu'aux concepts. Les auteurs partent toutefois de la représentation lexicale du théâtre antique. Sous cet angle, la documentation vernaculaire présente des avantages par rapport à la documentation latine. L'analyse porte à la fois sur la mise en place d'un lexique théâtral référant à l'Antiquité et sur le processus de conceptualisation du ?théâtre antique? en France et en Italie entre le XIIe et le XVe siècle.}, KEYWORDS = {Teatro, Linguistica romanza, Eredità dei classici, Letteratura medievale}, PAGES = {672}, URL = {https://publications.cnr.it/doc/489796}, PUBLISHER = {Droz (Genève, CHE)}, ISBN = {978-2-600-06468-2}, } @INCOLLECTION{BOSCHETTI_2023_INCOLLECTION_BBDMKBT_484489, AUTHOR = {Boschetti, F. and Bambaci, L. and Del Grosso, A. M. and Mugelli, G. and Khan, A. F. and Bellandi, A. and Taddei, A.}, TITLE = {Collaborative and Multidisciplinary Annotations of Ancient Texts: The Euporia System}, YEAR = {2023}, ABSTRACT = {Euporia is an annotation system originally created to study the ritual dynamics in ancient Greek tragedies from an anthropological perspective. The system is designed to be flexible enough so that it can be easily extended in other directions of multidisciplinary research. The system combines the simplicity of a web interface pared down to its essential elements with the expressivity of a domain-specific language parsed with ANTLR, that avoids the verbosity of general-purpose markup languages (such as XML-TEI) during the annotation phase. In this way, the user is focused on domain-specific tasks by writing concise annotations. Upon exportation of our data, interoperability is ensured by two measures: references to the annotated text are translated from a system based on progressive word numbers to the Canonical Text Services (CTS) system, and the annotations are translated into XML-TEI. An annotation is constituted by a text reference, a condition related to variant readings and interpretations of the same text, as well as by a sequence of tags. Tags are created by following a bottom-up approach: they are progressively introduced and reused by the domain-expert during the annotation process. During revisions, tags are grouped and mapped onto an ontology, in order to enable and to exploit the identification of relations among the tags in querying the annotated corpus. Being designed for interoperability, our approach can be extended to other research fields (e.g. philology, archaeology) through the creation of new domain-specific languages and domain-specific tagsets, in order to improve the functionalities of the Euporia system.}, KEYWORDS = {Digital Humanities, Euporia, Domain Specific Languages, CoPhiLab, Digital Philology}, PAGES = {172-223}, URL = {https://publications.cnr.it/doc/484489}, VOLUME = {6}, DOI = {10.1163/9789004527119_008}, PUBLISHER = {Brill Academic Publishers (Leiden, NLD)}, ISBN = {9789004527119}, BOOKTITLE = {The Ancient World Goes Digital}, EDITOR = {Juloux, V. B. and Di Ludovico, A. and Matskevich, S.}, } @INCOLLECTION{BURGASSI_2023_INCOLLECTION_BG_490570, AUTHOR = {Burgassi, C. and Guadagnini, E.}, TITLE = {La marcatezza lessicale nella ricostruzione del vocabolario di epoca storica}, YEAR = {2023}, ABSTRACT = {In questo contributo si descrive un possibile impiego del concetto di "marcatezza" nel campo della lessicologia storica. Come è noto, si tratta di un concetto molto ricorrente negli studi ma di difficile definizione e di vario utilizzo, del quale qui si propone un'applicazione specifica. Rispetto a una fase storica della lingua, in una serie di lessemi che risultano "omoionimici" per un significato, la marcatezza può stabilire un ordinamento che, di tali lessemi, rispecchi le relative posizioni nell'architettura del vocabolario. A parità di contenuto denotativo, la marcatezza così concepita valuta la variazione sul piano connotativo, là dove la connotazione non riguarda la competenza linguistica ma è estrapolata dalle caratteristiche di attestazione dei lessemi in un corpus rappresentativo di riferimento. Per sostanziare l'argomentazione portiamo due esempi, il primo relativo al significato 'essere umano nei primi anni di vita', il secondo relativo al significato 'atto di violazione di una norma', nel vocabolario italiano antico.}, KEYWORDS = {Lessicologia italiana, Linguistica storica, Marcatezza}, PAGES = {77-94}, URL = {https://amsacta.unibo.it/id/eprint/7465}, VOLUME = {7}, DOI = {10.6092/unibo/amsacta/7465}, ISBN = {9788854971264}, BOOKTITLE = {«CLUB Working Papers in Linguistics» Volume 7}, EDITOR = {Cervini, C. and Gagliardi, G.}, } @EDITORIAL{MARZI_2023_EDITORIAL_MP_490518, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Integrative Views on Representations and Processes in Morphology}, YEAR = {2023}, ABSTRACT = {One of the most enduring conceptualisations of the language architecture rests on a modular subdivision of work between lexical representations of stored items on the one hand, and dynamic processes, modelled as procedural rules working on such items, on the other hand. In morphology, network-based approaches have suggested an alternative "integrative" view of word representations and processes, where lexical representations consist of partially overlapping activation patterns spreading over several processing units. From this integrative perspective, the resulting network is both a lexicon and a word processor. We argue that the network-based view provides a stimulating research framework for several complementary levels of language inquiry (including theoretical, computational and neuro-psychological approaches) to be fruitfully integrated into a novel, comprehensive understanding of morphology. We discuss some implications of this view and delineate prospects of progress in this area.}, KEYWORDS = {morphology, mental lexicon, Connectionism, Network science, Discriminative Learning}, PAGES = {397-556}, URL = {https://link.springer.com/journal/11525/volumes-and-issues/33-4}, VOLUME = {33(4)}, DOI = {10.1007/s11525-023-09416-5}, PUBLISHER = {Springer (Dordrecht, NLD)}, } @EDITORIAL{MARZI_2023_EDITORIAL_MP_492243, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Integrative views of representations and processes in morphology: an introduction}, YEAR = {2023}, KEYWORDS = {Morphology, Mental Lexicon, Connectionism, Network science, Discriminative learning}, PAGES = {397-408}, URL = {https://link.springer.com/article/10.1007/s11525-023-09416-5}, VOLUME = {33}, DOI = {10.1007/s11525-023-09416-5}, PUBLISHER = {Springer (Heidelberg, Paesi Bassi)}, ISSN = {1871-5621}, BOOKTITLE = {Morphology (Dordrecht)}, } @INPROCEEDINGS{BRUNATO_2023_INPROCEEDINGS_BDDR_491078, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Dini, I. and Ravelli, A. A.}, TITLE = {Coherent or Not? Stressing a Neural Language Model for Discourse Coherence in Multiple Languages}, YEAR = {2023}, ABSTRACT = {In this study, we investigate the capability of a Neural Language Model (NLM) to distinguish between coherent and incoherent text, where the latter has been artificially created to gradually undermine local coherence within text. While previous research on coherence assessment using NLMs has primarily focused on English, we extend our investigation to multiple languages. We employ a consistent evaluation framework to compare the performance of monolingual and multilingual models in both in-domain and out-domain settings. Additionally, we explore the model's performance in a cross-language scenario.}, KEYWORDS = {text coherence, neural language models, multilingual corpora}, PAGES = {10690-10700}, URL = {https://aclanthology.org/2023.findings-acl.680}, DOI = {10.18653/v1/2023.findings-acl.680}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-959429-62-3}, CONFERENCE_NAME = {61st Annual Meeting of the Association for Computational Linguistics (ACL 2023)}, CONFERENCE_PLACE = {Toronto, Canada}, CONFERENCE_DATE = {9-14/07/2023}, } @INPROCEEDINGS{CHIARELLA_2023_INPROCEEDINGS_CCF_481820, AUTHOR = {Chiarella, D. and Cutugno, P. and Ferretti, M.}, TITLE = {A linguistic approach of sound characterisation and polarization: first steps}, YEAR = {2023}, ABSTRACT = {The activities of the "TRIPLO: TRasporti e collegamenti Innovativi e sostenibili tra Porti e piattaforme LOgistiche" project, funded with funds from the Interregional Operational Programme Italy-France Maritime 2014-2020, have as their particular goal to increase the sustainability of commercial ports and associated logistic platforms, helping to lessen noise pollution [1][2]. In some project activities, the acoustic impact on the people exposed to noise from back port activities is evaluated in connection to how each person perceives the noise. Only technical investigations, which cannot ensure a phenomena's universality in terms of perception, can objectively describe a phenomenon in environmental surveys [3]; A sound can be viewed as both a physical reality that can be measured using objective criteria and a sound perception phenomenon that is of a subjective character and related to the subject's psycho-physical-emotional state. Because these two traits are inextricably linked, it is not enough to just look at them independently. Driven by these motivations, we created questionnaires concerning the perception of sounds, the structure and first results of which can be consulted in [4] [5] [6]. In this article, in the first part we present a methodology to identify adjectives characterising each sound via TF-IDF (term frequency - inverse document frequency) [7][8][9][10]; in the second part we analyse the positive or negative emotions described by the adjectives given for each sounds with TexBlob, a sentiment analysis classifier, and subsequently we compare the results obtained with the ones shown in [6].}, KEYWORDS = {sentiment analysis, TF-IDF, sound polarization, sound characterisation}, PAGES = {86-91}, URL = {https://publications.cnr.it/doc/481820}, ISBN = {978-959-7174-41-7}, CONFERENCE_NAME = {XVIII° Simposio Internacional de Comunicación Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {23-27/01/2023}, BOOKTITLE = {Serie de Comunicación Social 2022-2023}, } @INPROCEEDINGS{SCIOLETTE_2023_INPROCEEDINGS_SMG_491771, AUTHOR = {Sciolette, F. and Marchi, S. and Giovannetti, E.}, TITLE = {Towards a New Computational Lexicon for Italian: building the morphological layer by harmonizing and merging existing resources}, YEAR = {2023}, ABSTRACT = {The present work illustrates the first steps towards the construction of a new computational lexicon for the Italian language. Following an analysis of existing lexical resources, it was decided to use LexicO as the reference base. In this first phase a resource of nearly 800,000 inflected forms was produced, accompanied by lemmas and morphological traits, obtained by integrating the available data in LexicO with those coming from two support sources: the tool MAGIC and a selection of Italian treebanks.}, KEYWORDS = {computational lexicon, lexical resources, morphology, morphological harmonization}, PAGES = {5}, URL = {https://ceur-ws.org/Vol-3596/short20.pdf}, VOLUME = {3596}, CONFERENCE_NAME = {9th Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {30/11/2023-01/12/2023}, BOOKTITLE = {Proceedings of the 9th Italian Conference on Computational Linguistics}, EDITOR = {Boschetti, F. and Lebani, G. E. and Magnini, B. and Novielli, N.}, } @INPROCEEDINGS{ZENZARO_2023_INPROCEEDINGS_ZDBR_484956, AUTHOR = {Zenzaro, S. and Del Grosso, A. M. and Boschetti, F. and Ranocchia, G.}, TITLE = {Ease the collaboration making Scholarly Editions: the GreekSchools case study}, YEAR = {2023}, ABSTRACT = {CophiEditor is a Digital Scholarly Editing Web platform based on Domain Specific Languages (DSL-based DSE). We are developing this platform in the context of the GreekSchools-885222 ERC project in which it is being used for the constitutio textus of Philodemus of Gadara's Arrangement of the Philosophers. The digital papyrological edition of these texts is challenging in many ways, as most of the Herculaneum papyri are highly fragmentary because they are carbonized and damaged. One of the main goals of the CophiEditor is to provide a full-fledged collaborative environment in order to support the scholar's editorial work. In this poster we show how the progress of CophiEditor eases the collaboration between scholars and the role played by the Web Annotation Data Model (WADM) for data representation and interchange.}, KEYWORDS = {Domain Specific Languages Computational Philology Digital Philology Web Annotation Data Model DSE tools}, PAGES = {230-232}, URL = {http://www.aiucd2023.unisi.it/atti/}, PUBLISHER = {Alma Mater Studiorum-Università di Bologna (Bologna, ITA)}, ISBN = {978-88-942535-7-3}, CONFERENCE_NAME = {LA MEMORIA DIGITALE: XII CONVEGNO ANNUALE AIUCD}, CONFERENCE_PLACE = {Siena}, CONFERENCE_DATE = {5-7 giugno 2023}, EDITOR = {Carbé, E. and Lo Piccolo, Gabrieleand Valenti, Alessia and Stella, F.}, } @INPROCEEDINGS{FRONTINI_2023_INPROCEEDINGS_F_478212, AUTHOR = {Frontini, F.}, TITLE = {Words and the Company they Keep: Digital corpora and infrastructures for the foreign language classroom}, YEAR = {2023}, ABSTRACT = {We give an overview of corpora \& language technologies and their use in foreign language teaching.}, KEYWORDS = {corpora, didattica L2, tecnologie del linguaggio}, URL = {https://publications.cnr.it/doc/478212}, CONFERENCE_NAME = {Didattica della lingua, della cultura e cittadinanza attiva: sfide educative contemporanee-Seminari LEND Modena}, CONFERENCE_DATE = {07/02/2023}, } @INPROCEEDINGS{SICILIANO_2023_INPROCEEDINGS_SD_491768, AUTHOR = {Siciliano, A. and Del Grosso, A. M.}, TITLE = {Giorgio Bassani's notes between tradition and innovation}, YEAR = {2023}, ABSTRACT = {This contribution illustrates the preliminary results of the project concerning Giorgio Bassani's personal library. The project provides both a printed traditional edition of the notes he wrote on his books and the development of a digital environment to browse and analyze them.}, KEYWORDS = {Digital Humanities, Digital Philology, Giorgio Bassani, Digital Scholarly Editions}, URL = {https://doi.org/10.5281/zenodo.8107868}, DOI = {10.5281/zenodo.7961822}, CONFERENCE_NAME = {Digital Humanities 2023. Collaboration as Opportunity}, CONFERENCE_PLACE = {Graz, Austria}, CONFERENCE_DATE = {10-14/07/2023}, BOOKTITLE = {Digital Humanities 2023. Collaboration as Opportunity}, EDITOR = {Scholger, W. and Vogeler, G. and Tasovac, T. and Baillot, A. and Helling, P.}, } @TECHREPORT{ALBANESI_2023_TECHREPORT_AGMPS_491776, AUTHOR = {Albanesi, D. and Giovannetti, E. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 23}, YEAR = {2023}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo gennaio 2023 - giugno 2023. Le principali attività tecniche svolte sul sistema Traduco attualmente in produzione sono state la risoluzione di bug e l'aggiornamento di alcune funzionalità. Parallelamente, è proseguito il lavoro di ricerca e sviluppo su due fronti: i) la realizzazione della nuova versione di Traduco, ii) l'ampliamento della risorsa lessicale per l'italiano contemporaneo a supporto della funzionalità di ricerca full-text sul testo del Talmud tradotto in italiano.}, KEYWORDS = {Lessici elettronici, rappresentazione della conoscenza, Linguistica Computazionale, traduzione di testi religiosi, traduzione assistita dal calcolatore}, PAGES = {13}, URL = {https://publications.cnr.it/doc/491776}, } @TECHREPORT{CARNIANI_2023_TECHREPORT_CP_490945, AUTHOR = {Carniani, E. and Papini, M.}, TITLE = {Maia: Una piattaforma aperta e collaborativa per la lessicografia elettronica, l'annotazione del testo e il linking testo-lessico-Consultazione e compilazione del lessico}, YEAR = {2023}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta sull'interfaccia web Maia nel periodo marzo 2023 - dicembre 2023. In particolare riportiamo le attività tecniche svolte sulla parte di creazione e manipolazione del lessico.}, KEYWORDS = {Linguistica Computazionale, Lessici elettronici, Lexicon editor, Text annotation, Digital lexicography, text-lexical connection}, PAGES = {13}, URL = {https://publications.cnr.it/doc/490945}, } @TECHREPORT{CASTELLI_2023_TECHREPORT_CDCCCCDGLMPR_482044, AUTHOR = {Castelli, D. and De Simone, G. and Cancedda, F. and Candela, L. and Colcelli, V. and Conte, R. and Di Donato, F. and Giannini, S. and Lazzeri, E. and Mangiaracina, S. and Puccinelli, R. and Ranchino, M. A.}, TITLE = {Roadmap Scienza Aperta}, YEAR = {2023}, ABSTRACT = {La scienza aperta è un paradigma che influenza le pratiche di produzione e condivisione di conoscenza. Obiettivo di questa roadmap è delineare un percorso per la realizzazione e diffusione di pratiche e politiche di scienza aperta all'interno del Consiglio Nazionale delle Ricerche.}, KEYWORDS = {Open Science, Open Access, Roadmap}, PAGES = {23}, URL = {https://publications.cnr.it/doc/482044}, PUBLISHER = {CNR (Roma, ITA)}, } @TECHREPORT{PAPINI_2023_TECHREPORT_P_490842, AUTHOR = {Papini, M.}, TITLE = {Maia: Una piattaforma aperta e collaborativa per la lessicografia elettronica, l'annotazione del testo e il linking testo-lessico-Autenticazione e gestione utenti}, YEAR = {2023}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta sull'interfaccia web Maia nel periodo marzo 2023 - dicembre 2023. In particolare riportiamo le attività tecniche svolte sulla parte di autenticazione e gestione utenti.}, KEYWORDS = {Linguistica Computazionale, Lessici elettronici, Lexicon editor, Text annotation, Digital lexicography, text-lexical connection}, PAGES = {7}, URL = {https://publications.cnr.it/doc/490842}, } @TECHREPORT{PAPINI_2023_TECHREPORT_P_490844, AUTHOR = {Papini, M.}, TITLE = {Maia: Una piattaforma aperta e collaborativa per la lessicografia elettronica, l'annotazione del testo e il linking testo-lessico-Gestione del corpus e annotazione del testo}, YEAR = {2023}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta sull'interfaccia web Maia nel periodo marzo 2023 - dicembre 2023. In particolare riportiamo le attività tecniche svolte sulla parte di manipolazione del Corpus e annotazione del testo.}, KEYWORDS = {Linguistica Computazionale, Lessici elettronici, Lexicon editor, Text annotation, Digital lexicography, text-lexical connection}, URL = {https://publications.cnr.it/doc/490844}, } @MISC{BARONI_2023_MISC_B_483769, AUTHOR = {Baroni, P.}, TITLE = {PRIN-20204EJYRX CWALM: Project Web Site}, YEAR = {2023}, ABSTRACT = {Sito Web del progetto CWALM - Un Modello Lessicale basato sul Corpus dell'Arabo Scritto Contemporaneo (Bando PRIN 2020 | Settori ERC SH4 e PE6 | Prot. n. 20204EJYRX), realizzato con WordPress, sviluppato in inglese}, KEYWORDS = {lexical model, corpus, Contemporary Written Arabic}, URL = {https://cwalm.ilc.cnr.it}, } @MISC{BARONI_2023_MISC_B_484291, AUTHOR = {Baroni, P.}, TITLE = {CoPhiLab Web Site}, YEAR = {2023}, ABSTRACT = {Sito Web del Laboratorio del CNR-ILC "CoPhiLab - Laboratorio di Filologia Collaborativa e Cooperativa", realizzato con WordPress, sviluppato in inglese}, KEYWORDS = {Filologia Collaborativa, Filologia Cooperativa, Digital Humanities, Digital Scholarly Editing, risorse digitali, strumenti digitali, mondo mediterraneo antico, mondo mediterraneo medievale, mondo mediterraneo rinascimentale, greco, latino, arabo, ebraico, italiano, dialetti italiani, lingue minoritarie europee}, URL = {https://cophilab.ilc.cnr.it}, } @MISC{CARLINO_2023_MISC_C_484296, AUTHOR = {Carlino, M.}, TITLE = {Sito web www. ilc. cnr. it}, YEAR = {2023}, ABSTRACT = {Progettazione della struttura, realizzazione con il CMS WordPress e aggiornamento dei contenuti del sito istituzionale del Cnr-Istituto di Linguistica Computazionale "Antonio Zampolli" (CNR-ILC): https://www.ilc.cnr.it Sito bilingue (italiano e inglese).}, KEYWORDS = {sito web, website, dissemination}, URL = {https://www.ilc.cnr.it}, } @MISC{DELGROSSO_2023_MISC_DS_484322, AUTHOR = {Del Grosso, A. M. and Spampinato, D.}, TITLE = {Bellini Digital Correspondece}, YEAR = {2023}, ABSTRACT = {Bellini Digital Correspondence (BDC) è un progetto di edizione scientifica digitale relativo alle lettere autografe di Vincenzo Bellini. Il fondo è conservato presso il Museo Belliniano di Catania. Il carteggio comprende 40 unita? testuali per 35 unita? codicologiche, riprodotte in 111 immagini digitali. BDC implementa una piattaforma per la filologia digitale applicata al testo belliniano la cui base critica si regge sul lavoro realizzato dalla prof.ssa Graziella Seminara nel 2017. L'edizione tiene conto di diverse tipologie di fruitori: specialisti, visitatori del museo, che usufruiscono dell'edizione all'interno del percorso museale. L'edizione digitale e? stata realizzata in accordo con le linee guida dettate dalla Text Encoding Initiative e integra una fruizione via Web grazie al software Edition Visualization Technology. BDC è completamente open source e open access. Il sito Web, che accompagna l'edizione, illustra con dovizia di particolari tutte le fasi del progetto, i principi editoriali, le scelte di rappresentazione digitale del testo e l'ampio gruppo di persone coinvolte.}, KEYWORDS = {Digital Humanities, Edizioni Scientifiche Digitali, Vincenzo Bellini, Filologia Digitale, Digital Scholarly Edition, Edition Visualization Technology}, URL = {http://bellinicorrespondence.cnr.it/}, } @MISC{GIOVANNETTI_2023_MISC_GABCGMPS_491773, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Carniani, E. and Guidi, L. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Maia}, YEAR = {2023}, ABSTRACT = {Maia is an open and collaborative web tool based on semantic web and linked open data technologies for text annotation, e-lexicography, and lexical linking.}, KEYWORDS = {maia, linked open data, e-lexicography, text annotation, lexical linking, collaborative tools}, URL = {https://github.com/klab-ilc-cnr/Maia}, } @ARTICLE{ACHENBACH_2022_ARTICLE_ABDDDFKV_465243, AUTHOR = {Achenbach, K. and Błaszczyńska, M. and De Paoli, S. and Di Donato, F. and Dumouchel, S. and Forbes, P. and Kraker, P. and Vignoli, M.}, TITLE = {Defining discovery: Is Google Scholar a discovery platform? An essay on the need for a new approach to scholarly discovery}, YEAR = {2022}, ABSTRACT = {This essay discusses the concept of discovery, intended as content discovery, and defines it in the new context of Open Science, with a focus on Social Sciences and Humanities (SSH). Starting from the example of Google Scholar, the authors argue that this well-established service does not address the current needs, practices, and variety of discovery. Alternatives in terms of technical choices, features, and governance, do however exist, offering richer and more open discovery. The paper presents, in particular, the implementations and research work of the H2020 project TRIPLE (Transforming Research through Innovative Practices for Linked Interdisciplinary Exploration). Dedicated to the building of a discovery platform for the SSH, the project is meant to address the specificities and evolution of discovery in this field. Prevailing scholarly resource platforms like Google Scholar limit discovery by focussing only on publications, and favouring through their algorithm well-cited papers, English content, and discipline-specific resources. A limitation in the context of cross-disciplinary and collaborative Open Science, such a service more specifically hinders discovery in the SSH. Characterized by a fragmented landscape, a variety of languages, data types, and outputs, research in the SSH requires services that fully exploit discovery potentialities. Moreover, a survey conducted within the TRIPLE project showed that most SSH researchers use Google Scholar as their starting point, and that they recognise the lack of control they have with this system. Beyond the extension of features and content, transparency is the other important criterion for the building of an open infrastructure serving the research community. In light of this, we present the GoTriple platform, which exploits today's technological potential and incorporates the best known functionalities, in order to unveil more and innovative scholarly outputs and lead to international and interdisciplinary research project collaborations.}, KEYWORDS = {discovery, TRIPLE}, URL = {https://open-research-europe.ec.europa.eu/articles/2-28/v1}, DOI = {10.12688/openreseurope.14318.1}, PUBLISHER = {F1000 Research Limited on behalf of the European Commission, London, United Kingdom}, ISSN = {2732-5121}, JOURNAL = {Open research Europe}, } @ARTICLE{BACCO_2022_ARTICLE_BRADVVDMPD_472298, AUTHOR = {Bacco, L. and Russo, F. and Ambrosio, L. and D'Antoni, F. and Vollero, L. and Vadala, G. and Dell'Orletta, F. and Merone, M. and Papalia, R. and Denaro, V.}, TITLE = {Natural language processing in low back pain and spine diseases: A systematic review}, YEAR = {2022}, ABSTRACT = {Natural Language Processing (NLP) is a discipline at the intersection between Computer Science (CS), Artificial Intelligence (AI), and Linguistics that leverages unstructured human-interpretable (natural) language text. In recent years, it gained momentum also in health-related applications and research. Although preliminary, studies concerning Low Back Pain (LBP) and other related spine disorders with relevant applications of NLP methodologies have been reported in the literature over the last few years. It motivated us to systematically review the literature comprised of two major public databases, PubMed and Scopus. To do so, we first formulated our research question following the PICO guidelines. Then, we followed a PRISMA-like protocol by performing a search query including terminologies of both technical (e.g., natural language and computational linguistics) and clinical (e.g., lumbar and spine surgery) domains. We collected 221 non-duplicated studies, 16 of which were eligible for our analysis. In this work, we present these studies divided into sub-categories, from both tasks and exploited models' points of view. Furthermore, we report a detailed description of techniques used to extract and process textual features and the several evaluation metrics used to assess the performance of the NLP models. However, what is clear from our analysis is that additional studies on larger datasets are needed to better define the role of NLP in the care of patients with spinal disorders.}, KEYWORDS = {natural language processing, Low Back Pain, Survey}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85135163810\&origin=inward}, VOLUME = {9}, DOI = {10.3389/fsurg.2022.957085}, PUBLISHER = {Frontiers Media (Lausanne, Svizzera)}, ISSN = {2296-875X}, JOURNAL = {Frontiers in surgery}, } @ARTICLE{BIFFI_2022_ARTICLE_BDFGMS_477716, AUTHOR = {Biffi, M. and De Blasi, F. and Favaro, M. and Guadagnini, E. and Montemagni, S. and Sassolini, E.}, TITLE = {Parole in rete / reti di parole. Possibili impieghi didattici dei grandi vocabolari storici digitalizzati}, YEAR = {2022}, ABSTRACT = {After a brief presentation of the great historical dictionaries of Italian, which are free to use online thanks to the digitalisation work carried out by the Accademia della Crusca, the contribution offers a number of examples of how these tools can be used for educational purposes. Finally, further didactic uses are described, which will be made possible thanks to the advanced digital tools that the Accademia della Crusca and the Istituto di Linguistica Computazionale "Antonio Zampolli" del Consiglio Nazionale delle Ricerche (ILC) are currently working on.}, KEYWORDS = {Lessicografia italiana, Didattica dell'italiano, Lessicografia digitale}, PAGES = {143-188}, URL = {https://italianoascuola.unibo.it/article/view/14866}, VOLUME = {4}, DOI = {10.6092/issn.2704-8128/14866}, PUBLISHER = {ABIS-AlmaDL (Bologna, Italia)}, ISSN = {2704-8128}, JOURNAL = {Italiano a scuola}, } @ARTICLE{BIFFI_2022_ARTICLE_BG_469340, AUTHOR = {Biffi, M. and Guadagnini, E.}, TITLE = {«Le citazioni riconducono il dizionario nell'ambito della letteratura e della vita»: un primo sguardo d'insieme sui citati del GDLI}, YEAR = {2022}, ABSTRACT = {Nel corso dei lavori di affinamento della versione informatizzata del Grande dizionario della lingua italiana, condotti dall'Accademia della Crusca in collaborazione con l'Istituto di linguistica computazionale (CNR-Pisa), è stato integralmente corretto e acquisito digitalmente l'Indice degli autori citati. Il contributo presenta alcuni dati, oggi disponibili per la prima volta grazie alla conversione in formato elettronico del vocabolario e della sua bibliografia, utili ad approfondire lo studio delle fonti impiegate nel Battaglia. Gli esempi citati nelle voci rappresentano, come è noto, il cuore del GDLI e insieme uno degli aspetti più intensamente discussi dagli studiosi. La percezione che si ha e si è avuta di questo aspetto del dizionario, tuttavia, non sempre corrisponde ai dati effettivi. Posta la mole del corpus delle allegazioni alle voci, infatti, soltanto la visione complessiva resa possibile dall'interrogazione digitale ne consente una valutazione oggettiva.}, KEYWORDS = {Lessicografia italiana, Storia della lingua italiana}, PAGES = {351-386}, URL = {https://accademiadellacrusca.it/it/riviste/articoli/slei-xxxix-2022/7599}, VOLUME = {XXXIX}, PUBLISHER = {Le Lettere (Firenze, Italia)}, ISSN = {0392-5218}, JOURNAL = {Studi di lessicografia italiana}, } @ARTICLE{BRUNATO_2022_ARTICLE_BDV_464954, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Linguistically-Based Comparison of Different Approaches to Building Corpora for Text Simplification: A Case Study on Italian}, YEAR = {2022}, ABSTRACT = {In this paper, we present an overview of existing parallel corpora for Automatic Text Simplification (ATS) in different languages focusing on the approach adopted for their construction. We make the main distinction between manual and (semi)-automatic approaches in order to investigate in which respect complex and simple texts vary and whether and how the observed modifications may depend on the underlying approach. To this end, we perform a two-level comparison on Italian corpora, since this is the only language, with the exception of English, for which there are large parallel resources derived through the two approaches considered. The first level of comparison accounts for the main types of sentence transformations occurring in the simplification process, the second one examines the results of a linguistic profiling analysis based on Natural Language Processing techniques and carried out on the original and the simple version of the same texts. For both levels of analysis, we chose to focus our discussion mostly on sentence transformations and linguistic characteristics that pertain to the morpho-syntactic and syntactic structure of the sentence.}, KEYWORDS = {linguistic complexity, Italian language, corpus construction, text simplification, aligned corpora}, PAGES = {1-19}, URL = {https://www.frontiersin.org/articles/10.3389/fpsyg.2022.707630/full}, VOLUME = {13}, DOI = {10.3389/fpsyg.2022.707630}, PUBLISHER = {Frontiers Research Foundation (Switzerland)}, ISSN = {1664-1078}, JOURNAL = {Frontiers in Psychology}, } @ARTICLE{BRUNATO_2022_ARTICLE_BMD_474123, AUTHOR = {Brunato, D. and Mattei, A. and Dell'Orletta, F.}, TITLE = {Analisi della scrittura giovanile da una prospettiva linguistico-computazionale: il caso di studio della Fanfiction}, YEAR = {2022}, ABSTRACT = {This paper presents a study aimed at characterizing the linguistic style of an emerging literary genre of the web, particularly appreciated by teens and young adults: fanfiction. By relying on Natural Language Processing approaches, and in particular on the methodology of linguistic profiling applied to a novel corpus of Italian fanfiction stories inspired by the fantasy saga "Harry Potter", we investigate the relationship between linguistic style and 'success', measured in terms of number of reviews obtained by the readers. We show that it is possible to detect a set of features, among a wide set of linguistic ones modeling lexical, morpho-syntactic and syntactic phenomena, which help more in discriminating between 'successful' and 'unsuccessful' fanfics.}, KEYWORDS = {Trattamento Automatico del Linguaggio, stilometria computazionale, linguistic profiling, corpora, fanfiction}, PAGES = {171-189}, URL = {https://publications.cnr.it/doc/474123}, VOLUME = {2021/3}, PUBLISHER = {Bulzoni (Roma, Italia)}, ISSN = {0033-9725}, JOURNAL = {Rassegna Italiana di Linguistica Applicata (Testo stamp.)}, } @ARTICLE{BRUNATO_2022_ARTICLE_BV_472409, AUTHOR = {Brunato, D. and Venturi, G.}, TITLE = {Why is this language complex? Cherry-pick the optimal set of features in multilingual treebanks}, YEAR = {2022}, ABSTRACT = {This paper investigates linguistic complexity across natural languages from a corpus-based perspective and relies on the assumptions of linguistic profiling as a methodological framework. We focus in particular on the domain of syntactic complexity and analyze the distribution of a set of features taken as proxies of complexity phenomena at the sentence level, which were extracted from 63 treebanks annotated according to the Universal Dependencies formalism. This dataset guarantees that the features considered are modeling the same linguistic phenomena in different treebanks, allowing reliable comparison among languages. We show that our approach is able to identify tendencies of structural proximity between languages not necessarily in line with typologically-supported classification, thus shedding light on new corpus-based findings.}, KEYWORDS = {Linguistic Complexity, Linguistic Profiling, Universal Dependencies, Syntactic Domain}, PAGES = {1-14}, URL = {https://www.degruyter.com/document/doi/10.1515/lingvan-2021-0017/html}, DOI = {10.1515/lingvan-2021-0017}, PUBLISHER = {De Gruyter Mouton (Berlin; New York NY, Germania)}, ISSN = {2199-174X}, JOURNAL = {Linguistics vanguard}, } @ARTICLE{DELFANTE_2022_ARTICLE_D_464869, AUTHOR = {Del Fante, D.}, TITLE = {Review: A Corpus-Based Analysis of Ideological Bias: Migration in the British Press}, YEAR = {2022}, KEYWORDS = {Migration Studies, Newspaper Discourse, Corpus Linguistics, Corpus Approaches to Discourse Analysis}, PAGES = {137-139}, URL = {https://journals.sagepub.com/doi/10.1177/14614456211073219a}, VOLUME = {24}, DOI = {10.1177/14614456211073219a}, PUBLISHER = {SAGE (London, Regno Unito)}, ISSN = {1461-4456}, JOURNAL = {Discourse studies (Print)}, } @ARTICLE{DELFANTE_2022_ARTICLE_D_470092, AUTHOR = {Del Fante, D.}, TITLE = {Metaphors and pandemics: Spanish Flu and Coronavirus in US newspapers. A case-study}, YEAR = {2022}, ABSTRACT = {The international outbreak of Coronavirus has challenged the stability of our contemporary societies. However, this is not the first time that humanity is facing a global pandemic. The 1918 Spanish Flu pandemic led to one of the most lethal pandemics. Metaphors play a fundamental role in influencing how we think and talk about health and illness. With an understanding of how the Coronavirus and the Spanish Flu are metaphorically represented in newspaper discourse, it would be easier to shed light on the linguistic process through which metaphors work and to understand to what extent socio-historical-cultural conditions may affect the actualisation of a metaphor. This paper shows that metaphors are consistently present in both time contexts and Coronavirus and Spanish Flu are similarly metaphorically represented. This might suggest the existence of a rhetoric of pandemics which goes beyond the specific socio-cultural and political context: a response to a threat as a pandemic is deeply related with human nature}, KEYWORDS = {conceptual metaphor, corpus assisted discourse studies, health communication, corpus linguistics}, PAGES = {143-184}, URL = {https://www.metaphorik.de/sites/www.metaphorik.de/files/journal-pdf/32-2022_6_del-fante_0.pdf}, VOLUME = {32}, PUBLISHER = {Metaphorik. de c/o D. Osthus c/o Universität Bonn, Romanisches Seminar (Bonn, Germania)}, ISSN = {1618-2006}, JOURNAL = {Metaphorik. de (Internet)}, } @ARTICLE{DELFANTE_2022_ARTICLE_DD_463185, AUTHOR = {Del Fante, D. and Di Nunzio, G. M.}, TITLE = {OCR Correction for Corpus-assisted Discourse Studies: A Case Study of Old Newspapers}, YEAR = {2022}, ABSTRACT = {The use of OCR software to convert printed characters to digital text is a fundamental tool within diachronic approaches to Corpus-assisted discourse Studies. However, OCR software is not totally accurate, and the resulting error rate may compromise the qualitative analysis of the studies. This paper proposes a mixed qualitative-quantitative approach to OCR error detection and correction in order to develop a methodology for enhancing the quality of historical corpora. We applied the developed methodology to two case studies on newspapers of the beginning of the 20th century for the linguistic analysis of the metaphors representing migration and pandemics. The outcome of this project consists in a set of rules which are, eventually, valid for different contexts and applicable to different corpora and which can be reproduced and reused. The proposed procedure, in terms of computational readability, is aimed at making more readable and searchable the vast array of historical text corpora which are, at the moment, only partially usable given the high error rate introduced by an OCR software.}, KEYWORDS = {Corpus-assisted Discourse Studies, OCR detection, OCR correction, OCR post-processing, Text Mining}, PAGES = {99-124}, URL = {https://umanisticadigitale.unibo.it/article/view/13689}, VOLUME = {11}, DOI = {10.6092/issn.2532-8816/13689}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @ARTICLE{DELGRATTA_2022_ARTICLE_DDZBB_472290, AUTHOR = {Del Gratta, R. and Del Grosso, A. M. and Zenzaro, S. and Boschetti, F. and Bambaci, L.}, TITLE = {La Filologia come sistema dinamico}, YEAR = {2022}, ABSTRACT = {Introduciamo un approccio formale all'evoluzione del contenuto informativo veicolato da documenti umanistici, con particolare attenzione alla prospettiva filologica e alle problematiche tipiche ad essa connesse (studio della tradizione, confronto tra testimoni, selezione e scelta delle lezioni, edizione di un testo, etc). Proponiamo un modello matematico in grado di formalizzare diversi fenomeni complessi in vari ambiti di ricerca quali la Linguistica Computazionale, la Filologia Digitale e l'Ingegneria del Software, soprattutto quando questi vengono applicati all'analisi di documenti e testi di interesse storico-letterario.}, KEYWORDS = {computational philology, formal philology, digital humanities}, PAGES = {1-20}, URL = {https://umanisticadigitale.unibo.it/article/view/13684}, VOLUME = {13}, DOI = {10.6092/issn.2532-8816/13684}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @ARTICLE{FERRARI_2022_ARTICLE_FPBVV_473649, AUTHOR = {Ferrari, A. and Pirrotta, L. and Bonciani, M. and Venturi, G. and Vainieri, M.}, TITLE = {Higher readability of institutional websites drives the correct fruition of the abortion pathway: A cross-sectional study}, YEAR = {2022}, ABSTRACT = {In Italy, abortion services are public: therefore, health Institutions should provide clear and easily readable web-based information. We aimed to 1) assess variation in abortion services utilisation; 2) analyse the readability of institutional websites informing on induced abortion; 3) explore whether easier-to-read institutional websites influenced the correct fruition of abortion services. We identified from the 2021 administrative databases of Tuscany all women having an abortion, and-among them-women having an abortion with the certification provided by family counselling centres, following the pathway established by law. We assessed variation in total and certified abortion rates by computing the Systematic Component of Variation. We analysed the readability of the Tuscan health authorities' websites using the readability assessment tool READ-IT. We explored how institutional website readability influenced the odds of having certified abortions by running multilevel logistic models, considering health authorities as the highest-level variables. We observed high variation in the correct utilization of the abortion pathway in terms of certified abortion rates. The READ-IT scores showed that the most readable text was from the Florence Teaching Hospital website. Multilevel models revealed that higher READ-IT scores, corresponding to more difficult texts, resulted in lower odds of certified abortions. Large variation in the proper fruition of abortion pathways occurs in Tuscany, and such variation may depend on readability of institutional websites informing on induced abortion. Therefore, health Institutions should monitor and improve the readability of their websites to ensure proper and more equitable access to abortion.}, KEYWORDS = {abortion services, readability assessment}, PAGES = {1-13}, URL = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0277342}, VOLUME = {17}, DOI = {10.1371/journal.pone.0277342}, PUBLISHER = {Public Library of Science (San Francisco, CA, Stati Uniti d'America)}, ISSN = {1932-6203}, JOURNAL = {PloS one}, } @ARTICLE{GUADAGNINI_2022_ARTICLE_G_477583, AUTHOR = {Guadagnini, E.}, TITLE = {Alessandro Manzoni tra l'ancudine e l'incudine}, YEAR = {2022}, ABSTRACT = {Nel Fermo e Lucia compare la locuzione tra l'incudine e il martello : dopo averla eliminata nella "seconda minuta", nella Ventisettana Alessandro Manzoni reinserisce (essere) tra l'ancudine e il martello, e con questa forma l'espressione idiomatica resta nella Quarantana. Il contributo presenta alcuni elementi utili a spiegare le ragioni che possono aver portato Manzoni a preferire ancudine a incudine, prima e dopo il viaggio in Toscana.}, KEYWORDS = {Alessandro Manzoni, Toscanismo, Latinismo}, PAGES = {161-169}, URL = {https://publications.cnr.it/doc/477583}, VOLUME = {18}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Pisa, Italia)}, ISSN = {1724-9074}, JOURNAL = {La Lingua italiana}, } @ARTICLE{MARZI_2022_ARTICLE_MNMMP_471441, AUTHOR = {Marzi, C. and Narzisi, A. and Milone, A. and Masi, G. and Pirrelli, V.}, TITLE = {Reading behaviors through patterns of finger-tracking in Italian children with autism spectrum disorder}, YEAR = {2022}, ABSTRACT = {The paper proposes an ecological and portable protocol for the large-scale collection of reading data in high-functioning autism spectrum disorder (ASD) children based on recording the finger movements of a subject reading a text displayed on a tablet touchscreen. By capitalizing on recent evidence that movements of a finger that points to a scene or text during visual exploration or reading may approximate eye fixations, we focus on recognition of written content and function words, pace of reading, and accuracy in reading comprehension. The analysis showed significant differences between typically developing and ASD children, with the latter group exhibiting greater variation in levels of reading ability, slower developmental pace in reading speed, less accurate comprehension, greater dependency on word length and word frequency, less significant prediction-based processing, as well as a monotonous, steady reading pace with reduced attention to weak punctuation. Finger-tracking patterns provides evidence that ASD readers may fail to integrate single word processing into major syntactic structures and lends support to the hypothesis of an impaired use of contextual information to predict upcoming stimuli, suggesting that difficulties in perception may arise as difficulties in prediction.}, KEYWORDS = {reading, autism, finger-tracking, deleloping readers, prediction-driven processing}, PAGES = {1-17}, URL = {https://www.mdpi.com/2076-3425/12/10/1316}, VOLUME = {12}, DOI = {10.3390/brainsci12101316}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2076-3425}, JOURNAL = {Brain sciences}, } @ARTICLE{MIASCHI_2022_ARTICLE_MBDV_475015, AUTHOR = {Miaschi, A. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {On Robustness and Sensitivity of a Neural Language Model: A Case Study on Italian L1 Learner Errors}, YEAR = {2022}, ABSTRACT = {In this paper, we propose a comprehensive linguistic study aimed at assessing the implicit behavior of one of the most prominent Neural Language Models (NLM) based on Transformer architectures, BERT (Devlin et al., 2019), when dealing with a particular source of noisy data, namely essays written by L1 Italian learners containing a variety of errors targeting grammar, orthography and lexicon. Differently from previous works, we focus on the pre-training stage and we devise two complementary evaluation tasks aimed at assessing the impact of errors on sentence-level inner representations in terms of semantic robustness and linguistic sensitivity. While the first evaluation perspective is meant to probe the model's ability to encode the semantic similarity between sentences also in the presence of errors, the second type of probing task evaluates the influence of errors on BERT's implicit knowledge of a set of raw and morpho-syntactic properties of a sentence. Our experiments show that BERT's ability to compute sentence similarity and to correctly encode multi-leveled linguistic information of a sentence are differently modulated by the category of errors and that the error hierarchies in terms of robustness and sensitivity change across layer-wise representations.}, KEYWORDS = {nlp, interpretability, transformers, learner errors}, PAGES = {426-438}, URL = {https://doi.org/10.1109/TASLP.2022.3226333}, DOI = {10.1109/TASLP.2022.3226333}, PUBLISHER = {[Institute of Electrical and Electronics Engineers] ([Piscataway NJ], Stati Uniti d'America)}, ISSN = {2329-9304}, JOURNAL = {IEEE/ACM transactions on audio, speech, and language processing (Online)}, } @ARTICLE{MIASCHI_2022_ARTICLE_MSBDV_469733, AUTHOR = {Miaschi, A. and Sarti, G. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Probing Linguistic Knowledge in Italian Neural Language Models across Language Varieties}, YEAR = {2022}, ABSTRACT = {In this paper, we present an in-depth investigation of the linguistic knowledge encoded by the transformer models currently available for the Italian language. In particular, we investigate how the complexity of two different architectures of probing models affects the performance of the Transformers in encoding a wide spectrum of linguistic features. Moreover, we explore how this implicit knowledge varies according to different textual genres and language varieties.}, KEYWORDS = {nlp, transformer models, interpretability}, PAGES = {25-44}, URL = {http://www.aaccademia.it/ita/scheda-libro?aaref=1518}, DOI = {10.4000/ijcol.965}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{PIRROTTA_2022_ARTICLE_PGTBVD_472171, AUTHOR = {Pirrotta, L. and Guidotti, E. and Tramontani, C. and Bignardelli, E. and Venturi, G. and De Rosis, S.}, TITLE = {COVID-19 vaccinations: An overview of the Italian national health system's online communication from a citizen perspective}, YEAR = {2022}, ABSTRACT = {COVID-19 vaccine hesitancy is still widespread. During the pandemic, the internet has been the preferred channel for health-related information, especially for less-educated citizens who tend to be the most hesitant about vaccination. A well-structured web communication strategy could help both to overcome vaccine hesitancy and to ensure equity in healthcare service access. This study investigated how the various regional and local health authorities in Italy used their institutional websites to inform users about COVID-19 vaccinations between March and April 2021. We browsed 129 institutional websites, checking the availability, quality and quantity, actionability and readability of information using a literature-based common grid. Descriptive statistics and statistical tests were performed. The online public dissemination of COVID-19 vaccination information in Italy was fragmented, both across and within regions. The side effects of vaccinations, were often not reported on the websites, thus missing an opportunity to enhance vaccination uptake. More focus should also be placed on readability, since readability indexes showed that they were difficult to understand. Our research revealed that several actions could be implemented to enhance online communication on COVID-19 vaccination. For instance, simplifying texts can make them more understandable and the information reported actionable.}, KEYWORDS = {Vaccinationa Communication, Readability Assessment, Online Information, Covid-19}, PAGES = {970-979}, URL = {https://www.sciencedirect.com/science/article/pii/S0168851022002184}, VOLUME = {10}, DOI = {10.1016/j.healthpol.2022.08.001}, PUBLISHER = {Elsevier (Amsterdam, Paesi Bassi)}, ISSN = {0168-8510}, JOURNAL = {Health policy (Amst. Print)}, } @ARTICLE{SAPONARO_2022_ARTICLE_SGS_484519, AUTHOR = {Saponaro, D. and Giovannetti, E. and Sciolette, F.}, TITLE = {From Religious Sources to Computational Resources: Approach and Case Study on Hebrew Terms and Concepts}, YEAR = {2022}, ABSTRACT = {This paper introduces a methodology for the creation of a digital representation of a religious case study integrating textual, linguistic, and conceptual entities. the description of a holistic model of text, to be used as the backbone of the computational resource that needs to be built, is provided. the proposed case study, focusing on the semantic field of "face" in Jewish religion, is first introduced from the point of view of the scholar and then translated, with the support of digital tools, into the relative computational representation}, KEYWORDS = {religious studies, Hebrew terminology, ontology}, PAGES = {21}, URL = {https://publications.cnr.it/doc/484519}, VOLUME = {XXVII(2022)}, PUBLISHER = {Giuntina (Firenze, Italia)}, ISSN = {2282-4499}, JOURNAL = {Materia giudaica Print}, } @ARTICLE{SICILIANO_2022_ARTICLE_SD_472277, AUTHOR = {Siciliano, A. and Del Grosso, A. M.}, TITLE = {Dalla Stampa al Digitale: Un Modello di Codifica per l'Edizione delle Postille di Giorgio Bassani}, YEAR = {2022}, ABSTRACT = {Le postille annotate da Giorgio Bassani sui suoi libri rappresentano una via d'accesso privilegiata alla sua officina, consentendo di ricostruire la genesi delle opere e del profilo intellettuale di un importante scrittore del Novecento. In sede di edizione pongono però numerose problematiche, di classificazione, rappresentazione e sistematizzazione dei dati, di fronte a cui lo strumento digitale si rivela oltremodo vantaggioso. Il presente contributo riflette sul tema, presentando il prototipo di edizione digitale costruito sul caso di studio delle annotazioni alla Scuola dell'uomo di Guido Calogero (Firenze, Sansoni, 1939) e ragionando sulle importanti potenzialità del markup XML-TEI nel trattamento e nella rappresentazione di un oggetto testuale complesso, strutturato e a volte sfuggente qual è la postilla.}, KEYWORDS = {Digital Humanities, Digital Philology, Postille, Giorgio Bassani, Computational Philology}, PAGES = {1-26}, URL = {https://umanisticadigitale.unibo.it/article/view/13688}, VOLUME = {13}, DOI = {10.6092/issn.2532-8816/13688}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @BOOK{VENTURI_2022_BOOK_VCD_464964, AUTHOR = {Venturi, G. and Cimino, A. and Dell'Orletta, F.}, TITLE = {La fede dichiarata. Un'analisi linguistico-computazionale}, YEAR = {2022}, ABSTRACT = {Il volume indaga l'apporto di tecnologie basate sul Natural Language Processing (NLP) all'analisi di un corpus di trascrizioni di 164 interviste orali raccolte durante la ricerca 2017 sulla "Religiosità in Italia". Gli autori illustrano metodologie e strumenti che permettono di trasformare l'informazione implicitamente contenuta nelle interviste in informazione esplicitamente strutturata. Il risultato finale di questo processo interpretativo spazia dall'acquisizione di conoscenze lessicali e terminologiche complesse alla loro organizzazione in strutture proto-concettuali, fino ad arrivare alla qualificazione dell'atteggiamento con il quale l'intervistato si esprime. Il lettore viene accompagnato a scoprire quale sia il valore aggiunto delle analisi basate su NLP e quali nuovi orizzonti di ricerca siano aperti da queste analisi.}, KEYWORDS = {Knowledge Extraction, Knowledge Organization}, PAGES = {1-181}, URL = {https://publications.cnr.it/doc/464964}, PUBLISHER = {Franco Angeli Editore (Milano, ITA)}, ISBN = {978-88-351-2146-6}, } @INCOLLECTION{CALAMAI_2022_INCOLLECTION_CPPCSM_467178, AUTHOR = {Calamai, S. and Piccardi, D. and Pretto, N. and Candeo, G. and Stamuli, M. F. and Monachini, M.}, TITLE = {Not Just Paper: Enhancement of Archive Cultural Heritage}, YEAR = {2022}, ABSTRACT = {Oral archives and digital technologies have gone hand-in-hand for a very long time. Both sides benefit from this interdisciplinary junction: technology enhances the preservation and diffusion of oral materials, while exploiting them to develop cutting-edge tools for their treatment. This chapter deals with an Italian instantiation of this mutual relationship: the Archivio Vi.Vo. project. Offering innovative solutions concerning metadata, audio restoration, description, and access, Archivio Vi.Vo. aims to build an online platform to host the oral archives from Tuscany. The project is powered by CLARIN-IT, which guarantees its compliance with standards and offers resources for data access and discoverability. Archivio Vi.Vo. has not been built from scratch: it is instead a cross-fertilization of previous initiatives and research projects (e.g., the Gra.fo project). Moreover, the chapter presents the related, contemporary work of a multidisciplinary group striving to synthesize a Vademecum for future generations of oral archive researchers. Lastly, a brief list of tentative ideas for future developments of the Archivio Vi.Vo. platform will be presented.}, KEYWORDS = {digital oral archives, research infrastructures, models for digital preservation}, URL = {https://www.degruyter.com/document/isbn/9783110767377/html}, VOLUME = {1}, DOI = {10.1515_9783110767377-025}, PUBLISHER = {Walter De Gruyter \& Co (Berlin, DEU)}, ISBN = {9783110767377}, BOOKTITLE = {CLARIN The Infrastructure for Language Resources}, EDITOR = {Fišer, D. and Witt, A.}, } @INCOLLECTION{CREPALDI_2022_INCOLLECTION_CFMNPT_471258, AUTHOR = {Crepaldi, D. and Ferro, M. and Marzi, C. and Nadalini, A. and Pirrelli, V. and Taxitari, L.}, TITLE = {Finger movements and eye movements during adults' silent and oral reading}, YEAR = {2022}, ABSTRACT = {Using a common tablet and a web application, we can record the finger movements of a reader that is concurrently reading and finger-pointing a text displayed on the tablet touchscreen. In a preliminary analysis of "finger-tracking" data of early-graders we showed that finger movements can replicate established reading effects observed in more controlled settings. Here, we analyse and discuss reading evidence collected by (i) tracking the finger movements of adults reading a short essay displayed on a tablet touchscreen, and (ii) tracking the eye movements of adults reading a comparable text displayed on the screen of a computer. Texts in the two conditions were controlled for linguistic complexity and page layout. In addition, we tested adults' comprehension in both silent and oral reading, by asking them multiple-choice questions after reading each text. We show and discuss the reading evidence that the two (optical and tactile) protocols provide, and to what extent they show comparable effects. We conclude with some remarks on the importance of ecology and portability of protocols for large-scale collection of naturalistic reading data.}, KEYWORDS = {Reading, finger-tracking, digital technology}, PAGES = {443-471}, URL = {https://link.springer.com/book/9783030998905}, VOLUME = {23}, PUBLISHER = {Springer (Dordrecht, NLD)}, ISBN = {978-3-030-99890-5}, BOOKTITLE = {Developing language and literacy-Studies in Honor of Dorit Diskin Ravid}, EDITOR = {Levie, R. and Bar On, A. and Ashkenazi, O. and Dattner, E. and Brandes, G.}, } @INCOLLECTION{DEJONG_2022_INCOLLECTION_DM_472304, AUTHOR = {De Jong, F. and Monachini, M.}, TITLE = {Introduction. Selected papers from the CLARIN Annual Conference 2021}, YEAR = {2022}, ABSTRACT = {CLARIN, the Common Language Resources and Technology Infrastructure, is a virtual platform that is accessible for everyone interested in language. CLARIN offers access to language resources, technology, and knowledge, and enables cross-country collaboration among academia, industry, policy-makers, cultural institutions, and the general public. Researchers, students, and citizens are offered access to digital language resources and technology services to deploy, connect, analyse and sustain such resources. Inline with the Open Science agenda, CLARIN enables scholars from the Social Sciences and Humanities(SSH) and beyond to engage in and contribute to cutting-edge, data-driven research based on language data in a range of formats and modalities.}, KEYWORDS = {Language Resource Infrastructure}, PAGES = {i-v}, URL = {https://publications.cnr.it/doc/472304}, VOLUME = {189}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, BOOKTITLE = {SELECTED PAPERS FROM THE CLARIN ANNUAL CONFERENCE 2021}, EDITOR = {Monachini, M. and Eskevich, M.}, } @INCOLLECTION{DEJONG_2022_INCOLLECTION_DVFVFW_472288, AUTHOR = {De Jong, F. and Van Uytvanck, D. and Frontini, F. and Van Den Bosch, A. and Fišer, D. and Witt, A.}, TITLE = {Language Matters. The European Research Infrastructure CLARIN, Today and Tomorrow}, YEAR = {2022}, ABSTRACT = {LARIN stands for "Common Language Resources and Technology Infrastructure". In 2012 CLARIN ERIC was established as a legal entity with the mission to create and maintain a digital infrastructure to support the sharing, use, and sustainability of language data (in written, spoken, or multimodal form) available through repositories from all over Europe, in support of research in the humanities and social sciences and beyond. Since 2016 CLARIN has had the status of Landmark research infrastructure and currently it provides easy and sustainable access to digital language data and also offers advanced tools to discover, explore, exploit, annotate, analyse, or combine such datasets, wherever they are located. This is enabled through a networked federation of centres: language data repositories, service centres, and knowledge centres with single sign-on access for all members of the academic community in all participating countries. In addition, CLARIN offers open access facilities for other interested communities of use, both inside and outside of academia. Tools and data from different centres are interoperable, so that data collections can be combined and tools from different sources can be chained to perform operations at different levels of complexity. The strategic agenda adopted by CLARIN and the activities undertaken are rooted in a strong commitment to the Open Science paradigm and the FAIR data principles. This also enables CLARIN to express its added value for the European Research Area and to act as a key driver of innovation and contributor to the increasing number of industry programmes running on data-driven processes and the digitalization of society at large.}, KEYWORDS = {research infrastructure, language resources, language technology, open science, service interoperability, innovation, SSH}, PAGES = {31-58}, URL = {https://www.degruyter.com/document/doi/10.1515/9783110767377-002/html}, VOLUME = {1}, DOI = {10.1515/9783110767377-002}, PUBLISHER = {Walter De Gruyter Inc (Boston/Berlin/Munich, USA)}, ISBN = {978-3-11-076737-7}, BOOKTITLE = {CLARIN: The Infrastructure for Language Resources}, EDITOR = {Fišer, D. and Witt, A.}, } @INCOLLECTION{DELFANTE_2022_INCOLLECTION_DFMQ_469112, AUTHOR = {Del Fante, D. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {Italian Language Resources. From CLARIN-IT to the VLO and Back: Sketching a Methodology for Monitoring LRs Visibility}, YEAR = {2022}, ABSTRACT = {This paper sketches a user-oriented, qualitative methodology for both (i) monitoring the existence and availability of language resources relevant for a given CLARIN national community and language and (ii) assessing the offering potential of CLARIN, in terms of Language Resources provided to national consortia. From the user perspective, the methodology has been applied to investigate the visibility of language resources available for Italian within the CLARIN central services, in particular the Virtual Language Observatory. As a proof-of-concept, the methodology has been tested on the resources available through the CLARIN-IT data centres, but, ideally, it could be applied by any national data centre aiming to assess the existence of LRs in CLARIN for any given languages and check their accessibility for the interested users. It is thus argued that such an assessment might be a useful instrument in the hands of national coordinators and centre managers for (i) bringing to the fore both strengths and critical issues about their data providing community and (ii) for planning targeted actions to improve and increase both visibility and accessibility of their LRs.}, KEYWORDS = {Virtual Language Observatory, CLARIN-IT, CLARIN-ERIC, Qualitative Assessment Methodology, User Involvement}, PAGES = {10-22}, URL = {https://ecp.ep.liu.se/index.php/clarin/article/view/413/371}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, BOOKTITLE = {Selected Papers from the CLARIN Annual Conference 2021}, EDITOR = {Monachini and Monica and Eskevich and Maria}, } @INCOLLECTION{MARZI_2022_INCOLLECTION_MP_464598, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Psycholinguistic Research on Inflectional Morphology in the Romance Languages}, YEAR = {2022}, ABSTRACT = {Over the past decades, psycholinguistic aspects of word processing have made a considerable impact on views of language theory and language architecture. In the quest for the principles governing the ways human speakers perceive, store, access, and produce words, inflection issues have provided a challenging realm of scientific inquiry, and a battlefield for radically opposing views. It is somewhat ironic that some of the most influential cognitive models of inflection have long been based on evidence from an inflectionally impoverished language like English, where the notions of inflectional regularity, (de)composability, predictability, phonological complexity, and default productivity appear to be mutually implied. An analysis of more "complex" inflection systems such as those of Romance languages shows that this mutual implication is not a universal property of inflection, but a contingency of poorly contrastive, nearly isolating inflection systems. Far from presenting minor faults in a solid, theoretical edifice, Romance evidence appears to call into question the subdivision of labor between rules and exceptions, the on-line processing vs. long-term memory dichotomy, and the distinction between morphological processes and lexical representations. A dynamic, learning-based view of inflection is more compatible with this data, whereby morphological structure is an emergent property of the ways inflected forms are processed and stored, grounded in universal principles of lexical self-organization and their neuro-functional correlates.}, KEYWORDS = {Romance language morphology, paradigms, inflectional classes, lexical self-organisation, frequency effects, priming, discriminative learning, lexical blocking, long-term and short-term memory}, PAGES = {1-44}, URL = {https://oxfordre.com/linguistics/view/10.1093/acrefore/9780199384655.001.0001/acrefore-9780199384655-e-709}, DOI = {10.1093/acrefore/9780199384655.013.709}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {9780199384655}, BOOKTITLE = {Oxford Encyclopedia of Romance Linguistics}, EDITOR = {Loporcaro, M.}, } @INCOLLECTION{SCIOLETTE_2022_INCOLLECTION_SG_470011, AUTHOR = {Sciolette, F. and Giovannetti, E.}, TITLE = {Un modello per domarli tutti: verso una rappresentazione del testo come esplicitazione di documento, lingua e contenuto}, YEAR = {2022}, ABSTRACT = {The aim of this research is to describe the first steps towards the theoretical elaboration of a holistic model to represent textual information. The focus of the model is the definition of "text", with its different dimensions, as a "diasystem". The set of elements, organized into distinct but strictly interconnected systems, wherein each element has an effect on the whole diasystem, is described in a model structured in the following components: graphic, linguistic, documental, discursive, and conceptual. In this work, the first attempts in the modeling of text will be shown through two case studies: the Babylonian Talmud and the DiTMAO (Dictionary of Old Occitan medico-botanical terminology).}, KEYWORDS = {textual model, holystic model, diasystem, model of text}, PAGES = {145-157}, URL = {http://www.aitla.it/images/pdf/StudiAItLA14/009_AItLA14_ScioletteGiovannetti.pdf}, VOLUME = {14}, PUBLISHER = {Officinaventuno (Milano, ITA)}, ISBN = {978-88-97657-51-4}, BOOKTITLE = {Fare linguistica applicata con le digital humanities}, EDITOR = {Saturno, J. and Spreafico, L.}, } @INCOLLECTION{VAGIONAKIS_2022_INCOLLECTION_VDBBDMM_472291, AUTHOR = {Vagionakis, I. and Del Gratta, R. and Boschetti, F. and Baroni, P. and Del Grosso, A. M. and Mancinelli, T. and Monachini, M.}, TITLE = {'Cretan Institutional Inscriptions' Meets CLARIN-IT}, YEAR = {2022}, ABSTRACT = {This paper presents 'Cretan Institutional Inscriptions', a resource in the domain of Digital Epigraphy developed at the Ca' Foscari University of Venice and supported by CLARIN-IT as part of its actions addressed to initiatives, projects and events in the field of Social Sciences and Humanities. The paper begins with a brief outline of the project within which the resource was created and then goes into a more in-depth description of the main methodologies used to develop the resource (EpiDoc and EFES) and of their benefits. The paper then focuses on the cooperation of the project with the Venice Centre of Digital and Public Humanities and the Italian node of CLARIN, also illustrating the dockerization process applied to the resource hosted on the CLARIN-IT servers. Some desiderata for future developments are outlined as well. The paper ends with some remarks about the widening of CLARIN horizons towards Digital Epigraphy and on the role of its K-Centres in this respect.}, KEYWORDS = {Digital Epigraphy, Digital Classics, Ancient Greek, Crete, Institutions, Text Encoding Initiative, TEI, EpiDoc, EpiDoc Front-End Services, EFES, Virtual Language Observatory, Dockerization, ILC4CLARIN, CLARIN-IT, CLARIN}, PAGES = {139-150}, URL = {https://ecp.ep.liu.se/index.php/clarin/article/view/424/382}, VOLUME = {189}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, BOOKTITLE = {Selected Papers from the CLARIN Annual Conference 2021}, EDITOR = {Monachini, M. and Eskevich, M.}, } @EDITORIAL{MELERO_2022_EDITORIAL_MSS_472132, AUTHOR = {Melero, M. and Sakriani, S. and Soria, C.}, TITLE = {Proceedings of The 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages (SIGUL2022)}, YEAR = {2022}, ABSTRACT = {Proceedings of the SIGUL 2022 workshop.}, KEYWORDS = {conference proceedings, less-resourced languages, language resources, NLP, ù}, URL = {https://aclanthology.org/events/lrec-2022/#2022-sigul-1}, ISBN = {979-10-95546-91-7}, } @EDITORIAL{MONACHINI_2022_EDITORIAL_ME_472302, AUTHOR = {Monachini, M. and Eskevich, M.}, TITLE = {Selected Papers from the CLARIN Annual Conference 2021}, YEAR = {2022}, ABSTRACT = {This volume presents the highlights of the 10th CLARIN Annual Conference 2021. The conference was held on 27th --29th September 2021 and because of the COVID-19 pandemic, for the second year in row a virtual format had te be adopted. CLARIN, the Common Language Resources and Technology Infrastructure, is a virtual platform that is accessible for everyone interested in language. CLARIN offers access to language resources, technology, and knowledge, and enables cross-country collaboration among academia, industry, policy-makers, cultural institutions, and the general public. Researchers, students, and citizens are offered access to digital language resources and technology services to deploy, connect, analyse and sustain such resources. In line with the Open Science agenda, CLARIN enables scholars from the Social Sciences and Humanities (SSH) and beyond to engage in and contribute to cutting-edge, data-driven research based on language data in a range of formats and modalities.}, KEYWORDS = {Language Resource Infrastructure}, PAGES = {1-212}, URL = {https://publications.cnr.it/doc/472302}, VOLUME = {189}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, } @INPROCEEDINGS{AGNOLONI_2022_INPROCEEDINGS_ABFMMQRV_472294, AUTHOR = {Agnoloni, T. and Bartolini, R. and Frontini, F. and Montemagni, S. and Marchetti, C. and Quochi, V. and Ruisi, M. and Venturi, G.}, TITLE = {Making Italian Parliamentary Records Machine-Actionable: the Construction of the ParlaMint-IT corpus}, YEAR = {2022}, ABSTRACT = {This paper describes the process of acquisition, cleaning, interpretation, coding and linguistic annotation of a collection of parliamentary debates from the Senate of the Italian Republic covering the COVID-19 pandemic emergency period and a former period for reference and comparison according to the CLARIN ParlaMint prescriptions. The corpus contains 1199 sessions and 79,373 speeches for a total of about 31 million words, and was encoded according to the ParlaCLARIN TEI XML format. It includes extensive metadata about the speakers, sessions, political parties and parliamentary groups. As required by the ParlaMint initiative, the corpus was also linguistically annotated for sentences, tokens, POS tags, lemmas and dependency syntax according to the universal dependencies guidelines. Named entity annotation and classification is also included. All linguistic annotation was performed automatically using state-of-the-art NLP technology with no manual revision. The Italian dataset is freely available as part of the larger ParlaMint 2.1 corpus deposited and archived in CLARIN repository together with all other national corpora. It is also available for direct analysis and inspection via various CLARIN services and has already been used both for research and educational purposes.}, KEYWORDS = {parliamentary debates, CLARIN ParlaMint, corpus creation, corpus annotation}, PAGES = {117-124}, URL = {https://aclanthology.org/2022.parlaclarin-1.17/}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, CONFERENCE_NAME = {Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {20/06/2022}, } @INPROCEEDINGS{BOSCHETTI_2022_INPROCEEDINGS_BBDDGNZ_472289, AUTHOR = {Boschetti, F. and Burgassi, C. and Del Gratta, R. and Del Grosso, A. M. and Guadagnini, E. and Nahli, O. and Zenzaro, S.}, TITLE = {Il Laboratorio di Filologia Collaborativa e Cooperativa (CoPhiLab) del CNR-ILC: dati, strumenti, servizi e infrastrutture}, YEAR = {2022}, ABSTRACT = {Questo contributo illustra le attività e le risorse del Laboratorio di Filologia Collaborativa e Cooperativa (CoPhiLab) dell'Istituto di Linguistica Computazionale "A. Zampolli" del Consiglio Nazionale delle Ricerche (CNR-ILC), con particolare attenzione all'uso delle infrastrutture di ricerca nazionali e internazionali.}, KEYWORDS = {Filologia Computazionale, Modelli Formali, Lingua Araba, Domain-Specific Languages, Ingegneria del Software}, PAGES = {45-50}, URL = {https://www.eventi.garr.it/it/conf22}, DOI = {10.26314/GARR-Conf22-proceedings}, PUBLISHER = {Associazione Consortium GARR (Roma, ITA)}, ISBN = {978-88-946629-1-7}, CONFERENCE_NAME = {CondiVisioni. La rete come strumento per costruire il futuro}, CONFERENCE_PLACE = {Palermo}, CONFERENCE_DATE = {18/05/2022-20/05/2022}, BOOKTITLE = {CONDIVISIONI La rete come strumento per costruire il futuro}, EDITOR = {Mieli, M. and Volpe, C.}, } @INPROCEEDINGS{CACIOLI_2022_INPROCEEDINGS_CCDMDZ_472285, AUTHOR = {Cacioli, G. and Cerretini, G. and Di Pietro, C. and Maenza, S. and Del Turco, R. R. and Zenzaro, S.}, TITLE = {There and back again: what to expect in the next EVT version}, YEAR = {2022}, ABSTRACT = {Developing software as complex as EVT requires a significant amount of time and resources. As a result, the release frequency of new versions has never been particularly high, especially for major versions. The first release of EVT 1 dates back to 2014, in 2016 we published the first alpha version of EVT 2, after which more complete versions followed: a beta1 in 2017, and a beta2 in 2020. Not only did it take three years between the two beta versions of EVT 2, but we still don't see the light for a 1.0 version. What has happened in the meantime? How is it possible that a stable version for EVT 1 was achieved relatively quickly, and development slowed down significantly thereafter? This talk aims at updating the Italian DH community about the current status of EVT and its future prospects with regard to the next version (EVT 3).}, KEYWORDS = {XML-TEI, filologia d'autore, mark-up, varianti, Saba}, PAGES = {212-217}, URL = {http://amsacta.unibo.it/6848/1/Proceedings_AIUCD2022.pdf#page=222}, DOI = {10.6092/unibo/amsacta/6848}, ISBN = {9788894253566}, CONFERENCE_NAME = {AIUCD 2022. Culture digitali. Intersezioni: filosofia, arti, media}, CONFERENCE_PLACE = {Lecce}, CONFERENCE_DATE = {1-3/06/2022}, BOOKTITLE = {AIUCD 2022-Proceedings. Culture digitali. Intersezioni: filosofia, arti, media}, EDITOR = {Ciracì, F. and Miglietta, G. and Gatto, C.}, } @INPROCEEDINGS{CASELLI_2022_INPROCEEDINGS_CDD_472144, AUTHOR = {Caselli, T. and Dini, I. and Dell'Orletta, F.}, TITLE = {How About Time? Probing a Multilingual Language Model for Temporal Relations}, YEAR = {2022}, ABSTRACT = {This paper presents a comprehensive set of probing experiments using a multilingual language model, XLM-R, for temporal relation classification between events in four languages. Results show an advantage of contextualized embeddings over static ones and a detrimental role of sentence level embeddings. While obtaining competitive results against state-of-the-art systems, our probes indicate a lack of suitable encoded information to properly address this task.}, KEYWORDS = {Natural Language Processing, Neural Language Models, Temporal Relation Classification}, URL = {https://aclanthology.org/2022.coling-1.283/}, CONFERENCE_NAME = {International Conference on Computational Linguistics (COLING)}, CONFERENCE_PLACE = {Gyeongju, Republic of Kore}, CONFERENCE_DATE = {12-17 ottobre 2022}, BOOKTITLE = {Proceedings of the 29th International Conference on Computational Linguistics, COLING 2022}, } @INPROCEEDINGS{COLOMBO_2022_INPROCEEDINGS_CG_463100, AUTHOR = {Colombo, M. and Giovannetti, E.}, TITLE = {La Visualizzazione Grafica di Sensi e Relazioni Semantiche di un Lessico Computazionale della Lingua Italiana}, YEAR = {2022}, ABSTRACT = {La visualizzazione, intesa come ausilio alla consultazione e alla ricerca, può giocare un ruolo fondamentale nella fruizione di dati linguistici e di conoscenza. Attraverso la rappresentazione di una risorsa lessicale o di una ontologia sotto forma di grafo, ad esempio, uno studioso ha la possibilità di farsi un'idea immediata delle relazioni che intercorrono tra i vari elementi di un lessico (di lingua o specialistico) o dei concetti che formalizzano un determinato dominio. In questo contributo presentiamo un'applicazione web based per la visualizzazione grafica della componente semantica di un lessico computazionale dell'italiano. Il grafo è stato sviluppato con Cytoscape.js, una libreria Javascript per la creazione di grafi. L'interfaccia al grafo, sviluppata come applicazione Angular, offre un primo insieme di funzionalità di interazione per la navigazione e la manipolazione della risorsa lessico-semantica.}, KEYWORDS = {human-computer interaction, lessico computazionale, grafi, Visualizzazione grafica di risorse linguistiche, rappresentazione della conoscenza}, PAGES = {155-160}, URL = {http://amsacta.unibo.it/6848/1/Proceedings_AIUCD2022.pdf}, DOI = {10.6092/unibo/amsacta/6848}, ISBN = {9788894253566}, CONFERENCE_NAME = {AIUCD 2022}, CONFERENCE_PLACE = {Lecce}, CONFERENCE_DATE = {1-3/06/2022}, BOOKTITLE = {AIUCD 2022-Culture digitali. Intersezioni: filosofia, arti, media. Proceedings della 11a conferenza nazionale}, } @INPROCEEDINGS{DELFANTE_2022_INPROCEEDINGS_DFMQ_468964, AUTHOR = {Del Fante, D. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {CLARIN-IT: An Overview on the Italian Clarin Consortium After Six Years of Activity}, YEAR = {2022}, ABSTRACT = {This paper offers an overview of the Italian CLARIN consortium after six years since its establishment. The members, the centres and the repositories and the most important collections are described. Lastly, in order to showcase the visibility and the accessiblity of Language Resources provided by CLARIN-IT from a user-perspective, we show how Italian resources are findable within CLARIN ERI}, KEYWORDS = {Language Resources, Data Repositories and Archives, Research Infrastructures, CLARIN}, PAGES = {8}, URL = {http://ceur-ws.org/Vol-3160/short21.pdf}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Italian Research Conference on Digital Libraries}, CONFERENCE_PLACE = {Università degli Studi di Padova}, CONFERENCE_DATE = {24/02/2022}, BOOKTITLE = {Proceedings of the 18th Italian Research Conference on Digital Libraries}, EDITOR = {Di Nunzio, G. M. and Portelli, B. and Redavid, D. and Silvello, G.}, } @INPROCEEDINGS{GAMBA_2022_INPROCEEDINGS_GFBM_472292, AUTHOR = {Gamba, F. and Frontini, F. and Broeder, D. and Monachini, M.}, TITLE = {Language Technologies for the Creation of Multilingual Terminologies. Lessons Learned from the SSHOC Project}, YEAR = {2022}, ABSTRACT = {This paper is framed in the context of the SSHOC project and aims at exploring how Language Technologies can help in promoting and facilitating multilingualism in the Social Sciences and Humanities (SSH). Although most SSH researchers produce culturally and societally relevant work in their local languages, metadata and vocabularies used in the SSH domain to describe and index research data are currently mostly in English. We thus investigate Natural Language Processing and Machine Translation approaches in view of providing resources and tools to foster multilingual access and discovery to SSH content across different languages. As case studies, we create and deliver as freely, openly available data a set of multilingual metadata concepts and an automatically extracted multilingual Data Stewardship terminology. The two case studies allow as well to evaluate performances of state-of-the-art tools and to derive a set of recommendations as to how best apply them. Although not adapted to the specific domain, the employed tools prove to be a valid asset to translation tasks. Nonetheless, validation of results by domain experts proficient in the language is an unavoidable phase of the whole workflow.}, KEYWORDS = {Multilingual terminologies, data curation, language resource infrastructures}, PAGES = {154-163}, URL = {https://aclanthology.org/2022.lrec-1.17}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, CONFERENCE_NAME = {13th Conference on Language Resources and Evaluation (LREC 2022)}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {22/06/2022-24/06/2022}, } @INPROCEEDINGS{HIRSCH_2022_INPROCEEDINGS_HFDD_469567, AUTHOR = {Hirsch, F. and Frontini, F. and Didirková, I. and Drengubiak, J.}, TITLE = {Esthétique de la voix dans les livres audio en langue française}, YEAR = {2022}, ABSTRACT = {Aesthetics of voice in French-language audio books. This research aims at studying listeners' preferences in audiobooks' voices. Samples of 8 male and 7 female voices were extracted from different audiobooks and analyzed. A survey has been carried out to obtain 69 listeners' points of view by answering questions on vocal features. Results show that the participants' choices depend on the literary genre. Indeed, male voices are preferred for science-fiction novels and female voices for juvenile literature and contemporary novels. Nevertheless, other literary genres that were tested do not match with a specific voice. On the other hand, essays are expected to be read with a slower speech rate, whereas listeners prefer faster speech rates in erotic novels.}, KEYWORDS = {audiobooks, voice esthetics, speech}, URL = {https://doi.org/10.1051/shsconf/202213808004}, DOI = {10.1051/shsconf/202213808004}, CONFERENCE_NAME = {8e Congrès Mondial de Linguistique Française}, CONFERENCE_PLACE = {Université d'Orléans, France}, CONFERENCE_DATE = {04-08/07/2022}, BOOKTITLE = {138}, } @INPROCEEDINGS{MARCHI_2022_INPROCEEDINGS_MCDG_463120, AUTHOR = {Marchi, S. and Colombo, M. and Dattilo, D. and Giovannetti, E.}, TITLE = {Un esperimento di visualizzazione grafica della terminologia del Talmud babilonese}, YEAR = {2022}, ABSTRACT = {L'impiego di tecnologie di information visualization nel settore delle digital humanities può aprire nuove frontiere di ricerca. Le informazioni veicolate attraverso modalità grafiche, infatti, possono apparire agli studiosi più immediatamente comprensibili e le interfacce grafiche realizzate fornire inediti paradigmi di studio e di manipolazione dei dati analizzati. Il caso d'uso sperimentale illustrato in questo contributo è stato concepito per fornire allo studioso una modalità visiva, immediata, per l'analisi comparativa del contenuto terminologico di un corpus testuale.}, KEYWORDS = {Visualizzazione grafica di risorse testuali, terminologia, linguistica computazionale, tf-idf, grafi}, PAGES = {239-241}, URL = {http://amsacta.unibo.it/6848/1/Proceedings_AIUCD2022.pdf}, DOI = {10.6092/unibo/amsacta/6848}, ISBN = {9788894253566}, CONFERENCE_NAME = {AIUCD 2022}, CONFERENCE_PLACE = {Lecce}, CONFERENCE_DATE = {1-3/06/2022}, BOOKTITLE = {AIUCD 2022-Culture digitali. Intersezioni: filosofia, arti, media. Proceedings della 11a conferenza nazionale}, } @INPROCEEDINGS{MERENDI_2022_INPROCEEDINGS_MDV_472145, AUTHOR = {Merendi, F. and Dell'Orletta, F. and Venturi, G.}, TITLE = {On the Nature of BERT: Correlating Fine-Tuning and Linguistic Competence}, YEAR = {2022}, ABSTRACT = {Several studies in the literature on the interpretation of Neural Language Models (NLM) focus on the linguistic generalization abilities of pre-trained models. However, little attention is paid to how the linguistic knowledge of the models changes during the fine-tuning steps. In this paper, we contribute to this line of research by showing to what extent a wide range of linguistic phenomena are forgotten across 50 epochs of fine-tuning, and how the preserved linguistic knowledge is correlated with the resolution of the fine-tuning task. To this end, we considered a quite understudied task where linguistic information plays the main role, i.e. the prediction of the evolution of written language competence of native language learners. In addition, we investigate whether it is possible to predict the fine-tuned NLM accuracy across the 50 epochs solely relying on the assessed linguistic competence. Our results are encouraging and show a high relationship between the model's linguistic competence and its ability to solve a linguistically-based downstream task.}, KEYWORDS = {Natural Language Processing, Neural Language Models, Linguistic Generalization Abilities}, URL = {https://aclanthology.org/2022.coling-1.275}, CONFERENCE_NAME = {International Conference on Computational Linguistics (COLING)}, CONFERENCE_PLACE = {Gyeongju, Republic of Kore}, CONFERENCE_DATE = {12-17 ottobre 2022}, } @INPROCEEDINGS{MIASCHI_2022_INPROCEEDINGS_MRD_469732, AUTHOR = {Miaschi, A. and Ravelli, A. A. and Dell'Orletta, F.}, TITLE = {Punctuation Restoration in Spoken Italian Transcripts with Transformers}, YEAR = {2022}, ABSTRACT = {In this paper, we propose an evaluation of a Transformer-based punctuation restoration model for the Italian language. Experimenting with a BERT-base model, we perform several fine-tuning with different training data and sizes and tested them in an in- and cross-domain scenario. Moreover, we conducted an error analysis of the main weaknesses of the model related to specific punctuation marks. Finally, we test our system either quantitatively and qualitatively, by offering a typical task-oriented and a perception-based acceptability evaluation.}, KEYWORDS = {nlp, transformer models, puncutation restoration}, PAGES = {245-260}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85135083576\&origin=inward}, VOLUME = {13196 LNAI}, DOI = {10.1007/978-3-031-08421-8_17}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, CONFERENCE_NAME = {AIxIA 2021-Advances in Artificial Intelligence}, CONFERENCE_DATE = {1-3/12/2021}, BOOKTITLE = {Lecture notes in computer science}, } @INPROCEEDINGS{PAPUCCI_2022_INPROCEEDINGS_PDMD_474890, AUTHOR = {Papucci, M. and De Nigris, C. and Miaschi, A. and Dell'Orletta, F.}, TITLE = {Evaluating Text-To-Text Framework for Topic and Style Classification of Italian texts}, YEAR = {2022}, ABSTRACT = {In this paper, we propose an extensive evaluation of the first text-to-text Italian Neural Language Model (NLM), IT5 [1], on a classification scenario. In particular, we test the performance of IT5 on several tasks involving both the classification of the topic and the style of a set of Italian posts. We assess the model in two different configurations, single- and multi-task classification, and we compare it with a more traditional NLM based on the Transformer architecture (i.e. BERT). Moreover, we test its performance in a few-shot learning scenario. We also perform a qualitative investigation on the impact of label representations in modeling the classification of the IT5 model. Results show that IT5 could achieve good results, although generally lower than the BERT model. Nevertheless, we observe a significant performance improvement of the Text-to-text model in a multi-task classification scenario. Finally, we found that altering the representation of the labels mainly impacts the classification of the topic.}, KEYWORDS = {bert, style classification, t5, text-to-text, topic classification, transformers}, PAGES = {56-70}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85143252156\&origin=inward}, VOLUME = {3287}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Sixth Workshop on Natural Language for Artificial Intelligence, NL4AI 2022}, CONFERENCE_DATE = {30/11/2022}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{PELLINO_2022_INPROCEEDINGS_PSDS_465656, AUTHOR = {Pellino, S. and Sichera, P. and Del Grosso, A. M. and Spampinato, D.}, TITLE = {Dalla codifica alla fruizione: l'edizione digitale Bellini Digital Correspondence}, YEAR = {2022}, ABSTRACT = {L'articolo illustra le attività svolte per la realizzazione dell'edizione scientifica digitale delle lettere autografe belliniane (Bellini Digital Correspondence). Il progetto si inserisce in un contesto didattico finalizzato alla rappresentazione e fruizione della corrispondenza del maestro catanese. In seno alle attivita? e? stato sviluppato un tool di gestione delle regioni d'interesse e la personalizzazione del software EVT.}, KEYWORDS = {Digital scholarly edition, correspondence, software design, XSLT, Vincenzo Bellini}, PAGES = {163-168}, URL = {http://amsacta.unibo.it/6848/1/Proceedings_AIUCD2022.pdf}, DOI = {10.6092/unibo/amsacta/6848}, PUBLISHER = {Alma Mater Studiorum-Università di Bologna (Bologna, ITA)}, ISBN = {9788894253566}, CONFERENCE_NAME = {XI Conferenza annuale AIUCD. Culture digitali. Intersezioni: filosofia, arti, media}, CONFERENCE_PLACE = {Lecce}, CONFERENCE_DATE = {1-3/6/2022}, BOOKTITLE = {Proceedings della XI conferenza nazionale AIUCD-Culture digitali. Intersezioni: filosofia, arti, media}, EDITOR = {Ciracì, F. and Miglietta, G. and Gatto, C.}, } @INPROCEEDINGS{QUOCHI_2022_INPROCEEDINGS_QBKMMPRTZ_472419, AUTHOR = {Quochi, V. and Bellandi, A. and Khan, F. and Mallia, M. and Murano, F. and Piccini, S. and Rigobianco, L. and Tommasi, A. and Zavattari, C.}, TITLE = {From Inscriptions to Lexica and Back: A Platform for Editing and Linking the Languages of Ancient Italy}, YEAR = {2022}, ABSTRACT = {Available language technology is hardly applicable to scarcely attested ancient languages, yet their digital semantic representation, though challenging, is an asset for the purpose of sharing and preserving existing cultural knowledge. In the context of a project on the languages and cultures of ancient Italy, we took up this challenge. This paper thus describes the development of a user friendly web platform, EpiLexO, for the creation and editing of an integrated system of language resources for ancient fragmentary languages centered on the lexicon, in compliance with current digital humanities and Linked Open Data principles. EpiLexo allows for the editing of lexica with all relevant cross-references: for their linking to their testimonies, as well as to bibliographic information and other (external) resources and common vocabularies. The focus of the current implementation is on the languages of ancient Italy, in particular Oscan, Faliscan, Celtic and Venetic; however, the technological solutions are designed to be general enough to be potentially applicable to different contexts and scenarios.}, KEYWORDS = {Digital Epigraphy, Restsprachen, Lexicon Editing and Linking, tools for DH}, PAGES = {59-67}, URL = {https://aclanthology.org/2022.lt4hala-1.0/}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {979-10-95546-78-8}, CONFERENCE_NAME = {Second Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2022)}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {25/06/2022}, BOOKTITLE = {Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2022)}, EDITOR = {Sprugnoli, R. and Passarotti, M.}, } @INPROCEEDINGS{QUOCHI_2022_INPROCEEDINGS_QBMTZ_472422, AUTHOR = {Quochi, V. and Bellandi, A. and Mallia, M. and Tommasi, A. and Zavattari, C.}, TITLE = {Supporting Ancient Historical Linguistics and Cultural Studies with EpiLexO}, YEAR = {2022}, ABSTRACT = {This contribution presents a system of independent software components meant to support the creation of ecosystems of interrelated language data (i.e. lexica linked to textual testimonies, concepts, metadata, bibliographic references, and other external lexical resources) according to the current state-of-the-art representational models for the semantic web. The system is implemented as a set of autonomous servers exposing Restful APIs that in principle can serve different frontend applications and use cases. In this work they serve the EpiLexO GUI application designed and geared to support scholars of ancient languages of fragmentary attestation in their studies. The development of both the back-ends and the front-end is still work-in progress, but a first version is ready for use.}, KEYWORDS = {tools for DH, ancient languages, restsprachen, lexicon editor, corpus management, lexicon-text linking}, PAGES = {39-43}, URL = {https://office.clarin.eu/v/CE-2022-2118-CLARIN2022_ConferenceProceedings.pdf}, VOLUME = {ISSN 2773-2177}, CONFERENCE_NAME = {CLARIN Annual Conference 2022}, CONFERENCE_PLACE = {Prague, Czechia}, CONFERENCE_DATE = {10-12/10/2022}, BOOKTITLE = {CLARIN Annual Conference Proceedings 2022}, EDITOR = {Erjavec, T. and Eskevich, M.}, } @INPROCEEDINGS{ZAMPARELLI_2022_INPROCEEDINGS_ZCBCDHV_470081, AUTHOR = {Zamparelli, R. and Chowdhury, S. A. and Brunato, D. and Chesi, C. and Dell'Orletta, F. and Hasan, A. and Venturi, G.}, TITLE = {SemEval-2022 Task 3: PreTENS-Evaluating Neural Networks on Presuppositional Semantic Knowledge}, YEAR = {2022}, ABSTRACT = {We report the results of the SemEval 2022 Task 3, PreTENS, on evaluation the acceptability of simple sentences containing constructions whose two arguments are presupposed to be or not to be in an ordered taxonomic relation. The task featured two sub-tasks articulated as: (i) binary prediction task and (ii) regression task, predicting the acceptability in a continuous scale. The sentences were artificially generated in three languages (English, Italian and French). 21 systems, with 8 system papers were submitted for the task, all based on various types of fine-tuned transformer systems, often with ensemble methods and various data augmentation techniques. The best systems reached an F1-macro score of 94.49 (sub-task1) and a Spearman correlation coefficient of 0.80 (sub-task2), with interesting variations in specific constructions and/or languages.}, KEYWORDS = {Neural Networks, Presuppositional Knowledge, Evaluation}, PAGES = {228-238}, URL = {https://aclanthology.org/2022.semeval-1.29.pdf}, CONFERENCE_NAME = {16th International Workshop on Semantic Evaluation (SemEval-2022)}, CONFERENCE_PLACE = {Seattle}, CONFERENCE_DATE = {14-15/07/2022}, } @INPROCEEDINGS{ZENZARO_2022_INPROCEEDINGS_ZDBR_472278, AUTHOR = {Zenzaro, S. and Del Grosso, A. M. and Boschetti, F. and Ranocchia, G.}, TITLE = {Verso la definizione di criteri per valutare soluzioni di scholarly editing digitale: il caso d'uso GreekSchools}, YEAR = {2022}, ABSTRACT = {Il contributo propone alcuni criteri di valutazione per l'analisi degli approcci all'editing testuale. Dopo averli presentati brevemente, si mostra come sono stati applicati alle attività finora svolte nel contesto del progetto ERC-885222 GreekSchools per quanto riguarda la progettazione e lo sviluppo di una piattaforma web collaborativa che fa uso di linguaggi specifici di dominio (DSL) finalizzata allo studio scientifico e alla pubblicazione di testi papiracei mediante edizioni critiche digitali.}, KEYWORDS = {DSL linguaggi specifici di dominio, filologia computazionale, ingegneria del software, papirologia digitale, Domain Driven Design}, PAGES = {20-25}, URL = {http://amsacta.unibo.it/6848/1/Proceedings_AIUCD2022.pdf#page=30}, DOI = {10.6092/unibo/amsacta/6848}, ISBN = {9788894253566}, CONFERENCE_NAME = {AIUCD 2022. Culture digitali. Intersezioni: filosofia, arti, media}, CONFERENCE_PLACE = {Lecce}, CONFERENCE_DATE = {1/06/2022-3/06/2022}, BOOKTITLE = {AIUCD 2022-Proceedings. Culture digitali. Intersezioni: filosofia, arti, media}, EDITOR = {Ciracì, F. and Miglietta, G. and Gatto, C.}, } @INPROCEEDINGS{BOSCHETTI_2022_INPROCEEDINGS_BDGZ_484475, AUTHOR = {Boschetti, F. and Del Grosso, A. M. and Guadagnini, E. and Zenzaro, S.}, TITLE = {L'annotazione del testo}, YEAR = {2022}, ABSTRACT = {Il contributo illustra le potenzialità offerte dall'uso dei Linguaggi Specifici di Dominio (DSL) per la produzione di note, commenti, apparati e testi scientifici. Il modello Euporia e l'approccio DSL-Based DSE sono richiamati come possibile alternativa alla codifica mediante XML/TEI per la produzione di edizioni scientifiche digitali.}, KEYWORDS = {Digital Humanities, Filologia Digitale, Domain Specific Languages, Euporia, DSL-based DSE}, URL = {https://publications.cnr.it/doc/484475}, CONFERENCE_NAME = {Filologia Digitale e Testi Italiani Antichi. Verso un sistema integrato di ricerca}, CONFERENCE_PLACE = {Pisa, Scuola Normale Superiore}, CONFERENCE_DATE = {27-28/06/2022}, } @INPROCEEDINGS{DELGROSSO_2022_INPROCEEDINGS_D_484398, AUTHOR = {Del Grosso, A. M.}, TITLE = {Forme di elaborazione per il Codice Pelavicino Digitale. Usare, Riusare ed Estendere l'Edizione}, YEAR = {2022}, ABSTRACT = {Il contributo illustra alcune attività condotte in seno al progetto di edizione digitale denominato "Codice Pelavicino". In particolare la presentazione si focalizza sulla rappresentazione dei termini notevoli, sulla presentazione del testo in formato PDF e sulla specificità della codifica come componente di un sistema FAIR.}, KEYWORDS = {Codice Pelavicino, Digital Humanities, Filologia Digitale, Edition Visualization Technology, XML/TEI}, URL = {https://pelavicino.labcd.unipi.it/}, CONFERENCE_NAME = {L'edizione digitale del Codice Pelavicino-Presentazione del lavoro completo}, CONFERENCE_PLACE = {Sarzana}, CONFERENCE_DATE = {26/03/2022}, } @INPROCEEDINGS{DELGROSSO_2022_INPROCEEDINGS_DZB_484339, AUTHOR = {Del Grosso, A. M. and Zenzaro, S. and Boschetti, F.}, TITLE = {CNR-ILC team e il progetto ERC 885222-GreekSchools}, YEAR = {2022}, ABSTRACT = {Presentazione del team di lavoro e delle attività svolte in seno al progetto ERC 885222-GreekSchools durante il convegno di Papirologia Ercolanese organizzato dal 5 al 9 settembre 2022 presso il Castello Pasquini di Castiglioncello (Livorno).}, KEYWORDS = {Digital Humanities, Digital Papyrology, Digital Scholarly Edition, ERC, GreekSchools, DSL-based DSE}, URL = {https://greekschools.eu}, CONFERENCE_NAME = {Convegno di Papirologia Ercolanese}, CONFERENCE_PLACE = {Castiglioncello}, CONFERENCE_DATE = {5-9/09/2022}, } @INPROCEEDINGS{MARINETTI_2022_INPROCEEDINGS_MMQBBDPRSZMM_479194, AUTHOR = {Marinetti, A. and Murano, F. and Quochi, V. and Ballerini, M. and Boschetti, F. and Del Grosso, A. M. and Piccini, S. and Rigobianco, L. and Solinas, P. and Zinzi, M. and Mallia, M. and Middei, E.}, TITLE = {Challenges in Encoding Fragmentary Attested Languages}, YEAR = {2022}, ABSTRACT = {The ItAnt project investigates the langages of ancient Italy, whose only attestation consist in epigraphic evidence, focusing on Venetic, Oscan, Faliscan and Celtic languages. For this purpose, the project combines the traditional method proper to historical linguistics with the setting up of digital technologies, developing computational tools specifically designes to create a digital set of interrelated resources.}, KEYWORDS = {digital epigraphy, eLexicography, Linguistic Linked Open Data, Text Encoding}, URL = {https://ciegl2022.sciencesconf.org/resource/page/id/30}, CONFERENCE_NAME = {XVI Congresso Internazionale di Epigrafia greca e latina}, CONFERENCE_PLACE = {Bordeaux}, CONFERENCE_DATE = {29 agosto-settembre 2022}, } @INPROCEEDINGS{MARZI_2022_INPROCEEDINGS_MNFMMVPTP_471602, AUTHOR = {Marzi, C. and Narzisi, A. and Ferro, M. and Masi, G. and Milone, A. and Viglione, V. and Pelagatti, S. and Tomassini, I. and Pirrelli, V.}, TITLE = {Patterns of finger-tracking in Italian early readers with Autism Spectrum Disorder}, YEAR = {2022}, ABSTRACT = {Background: Of late, the synergistic interaction of eye and hand movements in the exploration of a visual scene displayed on a computer touchscreen was shown to provide a congruent signature of the "attention maps" of subjects with autism spectrum disorders (ASD). A familiar context where this visual and tactile interaction is exploited is when children use the finger of their dominant hand to point the letters of written words as they are reading, particularly at early stages of their literacy development. In the present work, a dedicated app running on a common tablet is used to capture and analyse the finger-tracking behaviour of children with ASD while they are reading few episodes of a connected text on the tablet touchscreen. The reader's voice is also recorded through the tablet built-in microphone. The sliding movements of the finger across the tablet touchscreen are discretized into a series of densely distributed "touch events", which are then mapped onto the text lines in much the same way eye fixations are projected onto a sequence of words using an eye-tracker. Reading texts are linguistically annotated, to control for levels of reading difficulty, and finger-tracking times are associated with linguistic glosses. Objectives: Investigate patterns of finger-tracking as a potential non biological marker for identification of children with ASD . Methods: A preliminary analysis is offered of evidence of the finger-tracking behaviour of 20 Italian children with high functioning ASD, aged 7-11 years, while they are engaged in reading. A grade-matched control group of children with typical development was included. Patterns of finger-tracking are assessed in connection with three complementary aspects of reading behaviour: (1) word recognition, (2) pace of reading of multi-word intonation units, and (3) text comprehension, controlled by asking children a few multiple-choice questions on text content after each reading session. Results: Considerable variation in levels of reading ability was observed in the ASD sample, with a few children showing clear evidence of impaired reading comprehension. However, fluent readers with ASD exhibit the same correlation between accurate decoding (assessed by measuring per-word reading speed) and high levels of reading comprehension found in controls. Likewise, decoding rates were found to significantly increase with increasing grade levels, following the typical developmental pattern observed in controls. On a less local level of linguistic analysis, the reading pace of ASD readers fails to be modulated according to major syntactic structures, punctuation marks and direct speech turns, an effect concomitant with a flat prosodic intonation of oral reading. Conclusions: Preliminary findings confirm the heterogeneous nature of reading skills in children with ASD, showing that the use of a tablet screen as a tactile interface for visual perception analysis can offer a robust experimental protocol for large-scale, multimodal collection of naturalistic data for extensive assessment of readers with ASD.}, KEYWORDS = {reading, autism, finger-tracking, developing readers, prediction-driven processing}, PAGES = {192-192}, URL = {https://cdn.ymaws.com/www.autism-insar.org/resource/resmgr/files/insar_2022/2022_Abstract_Book.pdf}, VOLUME = {2022}, CONFERENCE_NAME = {INSAR}, CONFERENCE_PLACE = {Austin, Texas}, CONFERENCE_DATE = {11-14/05/2022}, BOOKTITLE = {2022 annual meeting abstract book}, } @INPROCEEDINGS{MARZI_2022_INPROCEEDINGS_MP_471259, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {An information-theoretic analysis of the inflectional regular-irregular gradient for optimal processing units}, YEAR = {2022}, ABSTRACT = {Prediction-driven word processing defines the human ability to anticipate upcoming input words in recognition. From this perspective, input word forms need to be processed as quickly and efficiently as possible. Under the reasonable assumption that spoken words are memorized and processed as word trees (e.g. Marslen-Wilson's "cohorts"), the larger the size of the cohort of an input word at a certain point in time (and the later its uniqueness point), the harder and slower to process the word is. Regularly and irregularly inflected verb forms have different stem family sizes and different uniqueness points. Using a Recurrent Neural Network (RNN) as a computational model of the human lexical proces- sor, we explore here how their distributional and structural properties may affect (optimal) processing strategies.}, KEYWORDS = {Morphological inflection, prediction-driven processing, discriminability, non-linearity, learnability}, PAGES = {50-51}, URL = {http://www.nytud.hu/imm20/abstracts/main.pdf}, CONFERENCE_NAME = {20th International Morphology Meeting-(Dedicated to the memory of Ferenc Kiefer)}, CONFERENCE_PLACE = {Budapest}, CONFERENCE_DATE = {01-04/09/2022}, } @INPROCEEDINGS{RANOCCHIA_2022_INPROCEEDINGS_RPVPFVAMRRCPPZBDE_472284, AUTHOR = {Ranocchia, G. and Puglia, E. and Vassallo, C. and Pernigotti, C. and Fleischer, K. and Verhasselt, G. and Alessandrelli, M. and Miliani, C. and Romano, F. P. and Rosi, F. and Caliri, C. and Pavone, D. P. and Preisler, Z. and Zenzaro, S. and Boschetti, F. and Del Grosso, A. M. and Enea, A.}, TITLE = {The Greek philosophical schools according to Europe's earliest history of philosophy. Towards a new pioneering critical edition of Philodemus' Arrangement of the Philosophers}, YEAR = {2022}, ABSTRACT = {Our knowledge about Greek philosophical schools is mostly second-hand and based on Diogenes Laërtius' Lives of Eminent Philosophers (3rd century AD) and Philodemus' Arrangement of the Philosophers (75-50 BC), a treatise in several books which represents the earliest 'history of philosophy' to have reached us directly from antiquity. From this work exclusively preserved by the Herculaneum papyri we may derive a virtually systematic account of the history of Greek philosophical schools, which is unique in its kind.}, KEYWORDS = {papyrology, digital papyrology, digital humanities, digital cultural heritage}, URL = {https://publications.cnr.it/doc/472284}, CONFERENCE_NAME = {XXXth International Congress of Papyrology}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {25/07/2022-30/07/2022}, } @INPROCEEDINGS{VENUTI_2022_INPROCEEDINGS_VD_484399, AUTHOR = {Venuti, M. and Del Grosso, A. M.}, TITLE = {La Galassia Musisque Deoque}, YEAR = {2022}, ABSTRACT = {Il contributo illustra il lavoro di riorganizzazione della galassia MQDQ in un sistema distribuito e collaborativo, facendo uso di Domain Specific Languages accanto alla modalità tradizionale di codifica mediante XML/TEI.}, KEYWORDS = {Digital Humanities, Filologia Digitale, MQDQ, Filologia Collaborativa, Domain Specific Languages}, URL = {https://publications.cnr.it/doc/484399}, CONFERENCE_NAME = {Maratona DH}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {13/10/2022}, } @TECHREPORT{ALBANESI_2022_TECHREPORT_ABCGMPS_470012, AUTHOR = {Albanesi, D. and Bellandi, A. and Colombo, M. and Giovannetti, E. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 21}, YEAR = {2022}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo novembre 2021 - aprile 2022. Le principali attività tecniche svolte sul sistema Traduco sono state la risoluzione di bug e l'implementazione di nuove funzionalità richieste. Parallelamente, è stata realizzata una applicazione per la consultazione del corpus biblico e sono proseguite le attività volte alla visualizzazione di risorse lessicali tramite grafi e per l'analisi, l'allineamento di testi, l'aggiornamento e la conversione del lessico computazionale PSC da utilizzarsi per la consultazione avanzata, su base morfologica e semantica, del testo talmudico tradotto in italiano.}, KEYWORDS = {Traduzione Assistita dal Calcolatore, Traduzione Collaborativa, Lessici elettronici, rappresentazione della conoscenza, Linguistica Computazionale, traduzione di testi religiosi}, PAGES = {23}, URL = {https://publications.cnr.it/doc/470012}, } @TECHREPORT{ALBANESI_2022_TECHREPORT_ACGMPS_475381, AUTHOR = {Albanesi, D. and Colombo, M. and Giovannetti, E. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 22}, YEAR = {2022}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo maggio 2022 - dicembre 2022. Le principali attività tecniche svolte sul sistema Traduco sono state la risoluzione di bug e l'implementazione di nuove funzionalità richieste. Parallelamente, è proseguito il lavoro di ricerca e sviluppo su tre fronti: i) la nuova versione di Traduco, ii) la visualizzazione grafica di risorse lessicali, e iii) la ricerca full-text sul testo del Talmud tradotto in italiano.}, KEYWORDS = {Traduzione Assistita dal CalcolatoreTraduzione Collaborativa, Lessici elettronici, rappresentazione della conoscenza, Linguistica Computazionale, traduzione di testi religiosi, ricerca full-text}, PAGES = {40}, URL = {https://publications.cnr.it/doc/475381}, } @TECHREPORT{ALBANESI_2022_TECHREPORT_AGP_470013, AUTHOR = {Albanesi, D. and Giovannetti, E. and Papini, M.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-rapporto integrativo 4}, YEAR = {2022}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto delle attività di progetto previste dalla convenzione integrativa stipulata tra PTTB e ILC-CNR in data 10/07/2018 e condotte nel periodo febbraio 2021 - gennaio 2022.}, KEYWORDS = {Linguistica Computazionale, Traduzione di Testi Religiosi, Traduzione Assistita dal Calcolatore, Traduzione Collaborativa, Sviluppo front-end}, PAGES = {15}, URL = {https://publications.cnr.it/doc/470013}, } @TECHREPORT{CARDILLO_2022_TECHREPORT_C_484278, AUTHOR = {Cardillo, F. A.}, TITLE = {DEEPHEALTH-D6. 2 Validation of DeepHealth platforms and use cases}, YEAR = {2022}, ABSTRACT = {Final report on the validation of DeepHealth platforms and use cases (WP6, T6.2)}, KEYWORDS = {Artificial Intelligence HPC Machine Learning}, URL = {https://publications.cnr.it/doc/484278}, } @TECHREPORT{DIDONATO_2022_TECHREPORT_DEPKVVKTTLPTCB_474551, AUTHOR = {Di Donato, F. and Eskevich, M. and Provost, L. and Kraker, P. and Van Uytvanck, D. and Vignoli, M. and König, A. and Thiel, C. and Tetteh Ocansey, J. and Lombardo, T. and Pohle, S. and Tóth Cifra, E. and Chen, Y. and Blotière, E.}, TITLE = {D6. 5 Report on Open Science within the EOSC}, YEAR = {2022}, ABSTRACT = {The report Open Science within the EOSC collects the findings of T6.3 EOSC guidelines training and advocacy on Open Science, whose main goal is to produce, adapt and reuse general and specific guidelines to implement Open Science practices in disciplinary contexts, as well as to provide training to the SSH community on Open Science practices, tools and EOSC related content. In T6.3 we delivered 12 training sessions and all training materials are published in open access1. In addition, we designed, developed and implemented an open workflow for the definition and management of the Open Science Training series, through the definition of a TRIPLE Training Toolkit, which reproduces the guidelines that have been designed and implemented to produce FAIR-by design training events. This set of documents constitutes section 1 of the present deliverable. Moreover, section 2 of the report presents four use-cases which address Open Science-related issues from different perspectives. The first subsection (CNR) sheds light on pathways for interdisciplinary collaboration and managing networking challenges. The second subsection (OKMAPS) addresses the need for improved discoverability of resources across research publications, research data and research projects. The third (CLARIN) shows how the connection between data, tools and publications can be implemented and made public. A model is explored to enrich the metadata about language resource data and tools from the CLARIN Resource Families with related publications, making use of the GoTriple platform. This cross-connects the CLARIN Virtual Language Observatory, the SSHOC Open Marketplace and the GoTriple platform. The last subsection (CESSDA) examines the balance between making data more accessible and aligning with legal restrictions. It explains how data is aligned internally to make them easily harvestable, and how, in turn, enrichment features from EOSC (OpenAIRE) and GoTriple systems will be used to improve data at their original sources.}, KEYWORDS = {open science, training}, URL = {https://zenodo.org/record/7360227#.Y42-zezMJfU}, } @TECHREPORT{MARTELLI_2022_TECHREPORT_MMCNVUFQKKLDTTCSKIDGM_472421, AUTHOR = {Martelli, F. and Maru, M. and Campagnano, C. and Navigli, R. and Velardi, P. and Ureña Ruiz, R. and Frontini, F. and Quochi, V. and Kallas, J. and Koppel, K. and Langemets, M. and De Does, J. and Tempelaars, R. and Tiberius, C. and Costa, R. and Salgado, A. and Krek, S. and Čibej, J. and Dobrovoljc, K. and Gantar, P. and Munda, T.}, TITLE = {D3. 8 Lexical-semantic analytics for NLP}, YEAR = {2022}, ABSTRACT = {The present document illustrates the work carried out in task 3.3 (work package 3) focused on lexicalsemantic analytics for Natural Language Processing (NLP). This task aims at computing analytics for lexicalsemantic information such as words, senses and domains in the available resources, investigating their role in NLP applications. Specifically, this task concentrates on three research directions, namely i) which grouping senses based on their semantic similari sense clustering , in ty improves the performance of NLP tasks such as Word Sense Disambiguation (WSD), ii) domain labeling of text , in which the lexicographic resources made available by the ELEXIS project for research purposes allow better performances to be achieved, and fin senses ally iii) analysing the , for which a software package is made available. diachronic distribution of In this deliverable, we illustrate the research activities aimed at achieving the aforementioned goals and put forward suggestions for future works. Importantly, we stress the crucial role played by highquality lexicalsemantic r esources when investigating such linguistic aspects and their impact on NLP applications. To this end, as an additional contribution, we address the paucity of manually the ELEXIS parallelannotated data in the lexical senseannotated datasetsemantic research field and introduce , a novel entirely manuallyavailable in 10 European languages and featuring 5 annotation layers.}, KEYWORDS = {research infrastructures, lexicography, lexical resources, word-sense disambiguation, WSD, sense-annotated language data, multilinguality}, PAGES = {67}, URL = {https://elex.is/wp-content/uploads/ELEXIS_D3_8_Lexical-Semantic_Analytics_for_NLP_final_report.pdf}, } @TECHREPORT{TASOVAC_2022_TECHREPORT_TTBBBCUFHHJKKKKMMMMMQRRSSVWWZ_463877, AUTHOR = {Tasovac, T. and Tiberius, C. and Bamberg, C. and Bellandi, A. and Burch, T. and Costa, R. and Ďurčo, M. and Frontini, F. and Hennemann, J. and Heylen, K. and Jakubíček, M. and Khan, F. and Klee, A. and Kosem, I. and Kovář, V. and Matuška, O. and McCrae, J. and Monachini, M. and Mörth, K. and Munda, T. and Quochi, V. and Repar, A. and Roche, C. and Salgado, A. and Sievers, H. and Váradi, T. and Weyand, S. and Woldrich, A. and Zhanial, S.}, TITLE = {D5. 3 Overview of Online Tutorials and Instruction Manuals}, YEAR = {2022}, ABSTRACT = {The ELEXIS Curriculum is an integrated set of training materials which contextualizes ELEXIS tools and services inside a broader, systematic pedagogic narrative. This means that the goal of the ELEXIS Curriculum is not simply to inform users about the functionalities of particular tools and services developed within the project, but to show how such tools and services are a) embedded in both lexicographic theory and practice; and b) representative of and contributing to the development of digital skills among lexicographers. The scope and rationale of the curriculum are described in more detail in the Deliverable D5.2 Guidelines for Producing ELEXIS Tutorials and Instruction Manuals. The goal of this deliverable, as stated in the project DOW, is to provide "a clear, structured overview of tutorials and instruction manuals developed within the project."}, KEYWORDS = {ELEXIS, lexicography, training materials}, PAGES = {31}, URL = {https://elex.is/wp-content/uploads/ELEXIS_D5_3_Overview-of-Online-Tutorials-and-Instruction-Manuals.pdf}, } @MISC{BARONI_2022_MISC_B_484305, AUTHOR = {Baroni, P.}, TITLE = {SIGUL Web Site}, YEAR = {2022}, ABSTRACT = {Sito Web di SIGUL - Gruppo di Interesse Speciale sulle Lingue con Risorse Insufficienti}, KEYWORDS = {under-resourced languages, digital diversity, digital survival}, URL = {https://www.sigul.eu}, } @MISC{BOSCHETTI_2022_MISC_BD_484483, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {Una bussola per navigare nei mari digitali: il ruolo di un K-Centre CLARIN per le esigenze degli studiosi}, YEAR = {2022}, ABSTRACT = {The contribution will give an overview about the structure of the Common Language Research Infrastructure (CLARIN) and its initiatives devoted to the DH, both at the European and at the national level as well as the activities of a new CLARIN Knowledge Centre: the DiPText-KC. Furthermore, it will introduce the practices that we suggest for a workflow to digit(al)ize texts and how to host projects in ILC4CLARIN through the assistance of the DiPText-KC.}, KEYWORDS = {Digital Humanities, CLARIN, DiPText-KC, CoPhiLab, Digital Philology}, URL = {https://publications.cnr.it/doc/484483}, } @MISC{BOSCHETTI_2022_MISC_BDMTP_484468, AUTHOR = {Boschetti, F. and Del Grosso, A. M. and Macchiarelli, A. and Tanozzi, F. and Puliero, J.}, TITLE = {The Remains of the Text-Natural Language Processing}, YEAR = {2022}, ABSTRACT = {The seminar presents some digital technologies for the study of texts of literary nature mediated by the use of computational tools. Specifically, the following topics are addressed: a) Coronelli's Epitome Cosmografica; b) Aratus' Phaenomena and the art of composing acrostics; c)Latin poets between astronomy and mythology d) Lemmatisation and Semi-automated linguistic analysis of Latin texts e) Musisque Deoque: a powerful instrument to study intertextuality.}, KEYWORDS = {Digital Humanities, Digital Philology, Summer School, MQDQ, NLP}, URL = {https://www.unive.it/data/33113/2/60398}, } @MISC{DELGRATTA_2022_MISC_D_471685, AUTHOR = {Del Gratta, R.}, TITLE = {Installing and configuring CLARIN-DSPACE on UBUNTU 22. 04 LTS}, YEAR = {2022}, ABSTRACT = {Il Manuale tecnico descrive le varie procedure per installare iò software CLARIN-DSPACE presso l'Istituto di Linguistica Computazionale}, KEYWORDS = {CLARIN, CLARIN-DSPACE, Repository, Installation Guide}, PAGES = {1-67}, URL = {https://publications.cnr.it/doc/471685}, } @MISC{DIDONATO_2022_MISC_DPLVPTCB_465244, AUTHOR = {Di Donato, F. and Provost, L. and Lombardo, T. and Vignoli, M. and Pohle, S. and Tóth Czifra, E. and Chen, Y. and Blotière, E.}, TITLE = {TRIPLE Training Toolkit (0. 1)}, YEAR = {2022}, ABSTRACT = {The TRIPLE Training Toolkit is part of the work performed by Work Package 6 (WP6) under Task 6.3 in the TRIPLE Project (Transforming Research through Linked Interdisciplinary Exploration). The project is funded by the European Commission, under Grant Agreement No. 863420 and will run for 42 months starting from October 2019. The TRIPLE Open Science Training Series focuses on the design and delivery of competence-oriented training to address the specific and general needs of the research community on Open Science topics and on the EOSC. The experiment enabled a reflection on the current challenges to make FAIR-by-design training resources and how to overcome them. The following files are deposited in Zenodo to serve as a reference for those wishing to reproduce this experiment within their own institution or for their own training activities. Please note that the training series are still ongoing and as such the present document and the files listed below will be followed by updated versions by the end of the project (2023).}, KEYWORDS = {open science, training, TRIPLE}, URL = {https://doi.org/10.5281/zenodo.6256198}, } @MISC{FRONTINI_2022_MISC_FBQMMZUW_463506, AUTHOR = {Frontini, F. and Bellandi, A. and Quochi, V. and Monachini, M. and Mörth, K. and Zhanial, S. and Ďurčo, M. and Woldrich, A.}, TITLE = {CLARIN Tools and Resources for Lexicographic Work}, YEAR = {2022}, ABSTRACT = {This course introduces lexicographers to the CLARIN Research Infrastructure and highlights language resources and tools useful for lexicographic practices. The course consists of two parts. In Part 1, you will learn about CLARIN, its technical and knowledge infrastructure, and about how to deposit and find lexical resources in CLARIN. In Part 2, you will become acquainted with CLARIN tools that can be used to create lexical resources.}, KEYWORDS = {CLARIN, lexicography}, URL = {https://elexis.humanistika.org/id/UnwYPq70Dewbn7XDEjsMM}, } @MISC{MARTELLI_2022_MISC_MNKKGKNPOLKKDUSLVGLQMFTTCSIM_472295, AUTHOR = {Martelli, F. and Navigli, R. and Krek, S. and Kallas, J. and Gantar, P. and Koeva, S. and Nimb, S. and Pedersen, B. S. and Olsen, S. and Langemets, M. and Koppel, K. and Üksik, T. and Dobrovoljc, K. and Ureña Ruiz, R. and Sancho Sánchez, J. and Lipp, V. and Váradi, T. and Győrffy, A. and László, S. and Quochi, V. and Monachini, M. and Frontini, F. and Tiberius, C. and Tempelaars, R. and Costa, R. and Salgado, A. and Čibej, J. and Munda, T.}, TITLE = {Parallel sense-annotated corpus ELEXIS-WSD 1. 0}, YEAR = {2022}, ABSTRACT = {ELEXIS-WSD is a parallel sense-annotated corpus in which content words (nouns, adjectives, verbs, and adverbs) have been assigned senses. Version 1.0 contains sentences for 10 languages: Bulgarian, Danish, English, Spanish, Estonian, Hungarian, Italian, Dutch, Portuguese, and Slovene. The corpus was compiled by automatically extracting a set of sentences from WikiMatrix (Schwenk et al., 2019), a large open-access collection of parallel sentences derived from Wikipedia, using an automatic approach based on multilingual sentence embeddings. The sentences were manually validated according to specific formal, lexical and semantic criteria (e.g. by removing incorrect punctuation, morphological errors, notes in square brackets and etymological information typically provided in Wikipedia pages). To obtain a satisfying semantic coverage, we filtered out sentences with less than 5 words and less than 2 polysemous words were filtered out. Subsequently, in order to obtain datasets in the other nine target languages, for each selected sentence in English, the corresponding WikiMatrix translation into each of the other languages was retrieved. If no translation was available, the English sentence was translated manually. The resulting corpus is comprised of 2,024 sentences for each language.}, KEYWORDS = {Word Sense Disambiguation, corpus parallelo, disambiguazione automatica del senso, annotazione semantica multilingue}, URL = {http://hdl.handle.net/11356/1674}, } @MISC{QUOCHI_2022_MISC_QB_463856, AUTHOR = {Quochi, V. and Bellandi, A.}, TITLE = {LexO editor: the basics-video tutorial}, YEAR = {2022}, ABSTRACT = {Video tutorial sull'uso di LexO, un editor di lessici secondo il modello Ontolex-lemon. Il tutoria è parte dell' ELEXIS training programme disponibile sulla piattaforma DARIAH-teach.}, KEYWORDS = {lexicon editor, video tutorial, training material, lexO, online web application}, URL = {https://www.youtube.com/watch?v=9KE0laMaTAs\&list=PLoD829qNERpYKq8JRkY4EIGgZCdi0QHOd}, } @MISC{SCIOLETTE_2022_MISC_SFGEMS_484511, AUTHOR = {Sciolette and Flavia and Giovannetti and Emiliano and Marchi and Simone}, TITLE = {LexicO}, YEAR = {2022}, ABSTRACT = {LexicO is a resource deriving from Parole-Simple-Clips (http://hdl.handle.net/20.500.11752/ILC-88). This resource contains all four levels of linguistic information represented in PSC (phonology, morphology, syntax, and semantics) which have been automatically analysed to find redundant, erroneous and missing data. The process of updating that conducted to the current version of LexicO starting from PSC included: i) the removal of all sure redundant entries (i.e. duplicates) belonging to all four linguistic levels; ii) the creation of tables dedicated to candidate redundants, detected by considering specific similarities amongst entries; iii) the correction of missing semantic and syntax-semantics interface relations amongst the entries of lexicon.}, KEYWORDS = {computational lexicon, Lexical Database, semantics, morphology, syntax, phonology}, URL = {https://dspace-clarin-it.ilc.cnr.it/repository/xmlui/handle/20.500.11752/ILC-977}, } @MISC{ZENZARO_2022_MISC_ZBD_472305, AUTHOR = {Zenzaro, S. and Boschetti, F. and Del Grosso, A. M.}, TITLE = {Relazione sugli avanzamenti della piattaforma per il progetto GreekSchools}, YEAR = {2022}, ABSTRACT = {Relazione sugli avanzamenti della piattaforma per il progetto GreekSchools}, KEYWORDS = {GreekSchools, Digital Humanities, Digital papyrology}, URL = {https://publications.cnr.it/doc/472305}, } @MISC{ZENZARO_2022_MISC_ZDB_484324, AUTHOR = {Zenzaro, S. and Del Grosso, A. M. and Boschetti, F.}, TITLE = {CophiEditor & Viewer}, YEAR = {2022}, ABSTRACT = {Piattaforma di Filologia Digitale, Computazionale, Collaborativa e Cooperativa in sviluppo presso CNR-ILC nel contesto del progetto ERC Advanced Grant 885222-GreekSchools "The Greek Philosophical Schools according to Europe's earliest 'history of philosophy'. Towards a new pioneering critical edition of Philodemus' Arrangement of the Philosopher". La piattaforma implementa il modello di Edizioni Scientifiche Digitali DSL-based DSE.}, KEYWORDS = {Digital Humanities, Filologia Digitale, Filologia Computazionale, Filologia Collaborativa, Edizione Scientifica Digitale, GreekSchools, DSL-based DSE}, URL = {https://cophi.github.io/gs-data-service-api/#/}, } @ARTICLE{BACCO_2021_ARTICLE_BCDM_472153, AUTHOR = {Bacco, L. and Cimino, A. and Dell'Orletta, F. and Merone, M.}, TITLE = {Explainable sentiment analysis: A hierarchical transformer-based extractive summarization approach}, YEAR = {2021}, ABSTRACT = {In recent years, the explainable artificial intelligence (XAI) paradigm is gaining wide research interest. The natural language processing (NLP) community is also approaching the shift of paradigm: building a suite of models that provide an explanation of the decision on some main task, without affecting the performances. It is not an easy job for sure, especially when very poorly interpretable models are involved, like the almost ubiquitous (at least in the NLP literature of the last years) transformers. Here, we propose two different transformer-based methodologies exploiting the inner hierarchy of the documents to perform a sentiment analysis task while extracting the most important (with regards to the model decision) sentences to build a summary as the explanation of the output. For the first architecture, we placed two transformers in cascade and leveraged the attention weights of the second one to build the summary. For the other architecture, we employed a single transformer to classify the single sentences in the document and then combine the probability scores of each to perform the classification and then build the summary. We compared the two methodologies by using the IMDB dataset, both in terms of classification and explainability performances. To assess the explainability part, we propose two kinds of metrics, based on benchmarking the models' summaries with human annotations. We recruited four independent operators to annotate few documents retrieved from the original dataset. Furthermore, we conducted an ablation study to highlight how implementing some strategies leads to important improvements on the explainability performance of the cascade transformers model.}, KEYWORDS = {Natural Language Processing, Sentiment Analysis, Explainable IA}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85114289346\&origin=inward}, VOLUME = {10}, DOI = {10.3390/electronics10182195}, PUBLISHER = {MDPI (Basel)}, ISSN = {2079-9292}, JOURNAL = {Electronics (Basel)}, } @ARTICLE{BOSCHETTI_2021_ARTICLE_BM_472309, AUTHOR = {Boschetti, F. and Mugelli, G.}, TITLE = {Il metodo Euporia per creare nuovi archivi digitali sulla tragedia greca}, YEAR = {2021}, ABSTRACT = {This article illustrates Euporia, i.e. a method for annotating literary texts based on Domain-Specific Languages. The annotation systems developed using this method are modeled on the needs of the users, their specific habits of studying and annotating texts, and the aims of their research. The two case studies discussed in this contribution show the application of the method in the context of two projects focused on the texts of Greek tragedy: on the one hand, a didactic project based on Aeschylus' Persians, on the other hand, a research project in the field of anthropology of the ancient world, based on the entire corpus of the Greek tragedy. The discussion aims to illustrate how text annotation through DSL can have both a didactic function and a scientific interest.}, KEYWORDS = {Domain-Specific Languages, textual annotation, Ancient Greek Tragedy}, PAGES = {83-113}, URL = {https://ojs.cimedoc.uniba.it/index.php/fc/article/view/1381}, VOLUME = {7}, DOI = {10.15162/2465-0951/1381}, PUBLISHER = {Centro Interuniversitario di Ricerca di Studi sulla Tradizione CIRST (Bari, Italia)}, ISSN = {2465-0951}, JOURNAL = {FuturoClassico FCl}, } @ARTICLE{CARDILLO_2021_ARTICLE_CS_458770, AUTHOR = {Cardillo, F. A. and Straccia, U.}, TITLE = {Fuzzy OWL-Boost: learning fuzzy concept inclusions via real-valued boosting}, YEAR = {2021}, ABSTRACT = {OWL ontologies are nowadays a quite popular way to describe structured knowledge in terms of classes, relations among classes and class instances. In this paper, given an OWL ontology and a target class T, we address the problem of learning fuzzy concept inclusion axioms that describe sufficient conditions for being an individual instance of T (and to which degree). To do so, we present FUZZY OWL-BOOST that relies on the Real AdaBoost boosting algorithm adapted to the (fuzzy) OWL case. We illustrate its effectiveness by means of an experimentation with several ontologies.}, KEYWORDS = {OWL Ontology, Machine Learning, Fuzzy Logic, Boosting}, PAGES = {164-186}, URL = {https://www.sciencedirect.com/science/article/abs/pii/S0165011421002426}, VOLUME = {438}, DOI = {10.1016/j.fss.2021.07.002}, PUBLISHER = {North-Holland (Amsterdam, Paesi Bassi)}, ISSN = {0165-0114}, JOURNAL = {Fuzzy sets and systems}, } @ARTICLE{DELGRATTA_2021_ARTICLE_DBBS_461498, AUTHOR = {Del Gratta, R. and Boschetti, F. and Bambaci, L. and Sarnari, F.}, TITLE = {Document analysis and Textual philology: A Formal Perspective}, YEAR = {2021}, ABSTRACT = {We introduce a formal approach to document and text analysis. The method proposed herein results in a mathematical model/framework which can formalize different challenges in research fields such as computational linguistics, digital philology, and software engineering, principally if applied to document and text analysis. We examine texts and documents from an evolutionary perspective, where both corruption and correction are involved. We describe document evolution via fibre bundles formalism. We also provide other examples to demonstrate the capabilities of the model.}, KEYWORDS = {Formal model, document analysis, evolutionary approach, fibre bundles}, PAGES = {5-15}, URL = {https://www.innove.org/ijist/index.php/ijist/article/view/192}, VOLUME = {5}, PUBLISHER = {[El Mohajir Mohammed] ([S. l. ], Marocco)}, ISSN = {2550-5114}, JOURNAL = {International Journal of Information Science and Technology}, } @ARTICLE{DELGRATTA_2021_ARTICLE_DGPC_451726, AUTHOR = {Del Gratta, R. and Goggi, S. and Pardelli, G. and Calzolari, N.}, TITLE = {The LRE Map: what does it tell us about the last decade of our field?}, YEAR = {2021}, ABSTRACT = {The LRE Map of Language Resources was introduced at LREC 2010. Its intended purpose was: "to shed light on the vast amount of resources that represent the background of the research presented at LREC" (Calzolari et al. in: Calzolari et al. (eds) Proceedings of the seventh international conference on language resources and evaluation (LREC'10). European Language Resources Association (ELRA), Valletta, 2010). It also aimed at a change of culture in the field, actively engaging each researcher both in the documentation task about resources and in sharing resources. When we started to use it regularly also in other conferences, it became clear that it was an innovative instrument able to provide a picture of the field and its evolution as reflected by the creation and use of Language Resources. After 9 years we revisit the Map, considerably extending the data analysed in an LREC 2018 paper. The LRE Map data analysed here have been provided by the authors of 21 conferences during the phase of submission of papers, and contain information about 9405 resources. We analyse the LRE Map data from many different viewpoints and the paper reports on the global picture, along the many Map dimensions, on different trends emerging from a diachronic perspective and finally on some comparisons between five editions of the two major conferences present in the Map: LREC and COLING.}, KEYWORDS = {LR infrastucture, metadata, LR documentation}, PAGES = {259-283}, URL = {https://link.springer.com/article/10.1007/s10579-020-09520-6}, VOLUME = {Volume 55}, DOI = {10.1007/s10579-020-09520-6}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{DELGROSSO_2021_ARTICLE_DFMTN_458287, AUTHOR = {Del Grosso, A. M. and Fihri, D. F. and Mohajir, M. E. and Tonazzini, A. and Nahli, O.}, TITLE = {Challenges in the digital analysis of historical laminated manuscripts}, YEAR = {2021}, ABSTRACT = {In this paper, we analyze and discuss the characteristics of a system for the effective digital preservation and fruition of historical manuscripts degraded by the process of lamination. The most significant degradation caused by lamination is that the parchment or paper support loses its flatness, and usually presents ripples and warnings. This, together with the affixed translucent varnish, dramatically impair the digital acquisition process, so that light reflections in the more disparate directions affect the digital images. A digital system to contrast this irreversible and progressive degradation and to enable an effective access to the fragile asset should provide a number of functionalities: specialized digitization, able to avoid reflections as much as possible; image enhancement, devised to correct the residual degradations and enhance the text for an easier legibility; semi-automatic transcription of the virtually restored pages; and, finally, scholarly encoding and linguistic analysis, which should adapt existing tools to the specificity of the primary source (writing system and language). As a case study, we will make reference to the "Poem in Rajaz on medicine", written by Abubacer in the XII century, and conserved in the Al Quaraouiyine Library located in Fez, Morocco. The feasibility study for the realization of such a system is of general utility, in that it can provide guidelines for the digitization, the enhancement and the text encoding of the many laminated manuscripts conserved in other historical archives. On the other hand, from the cultural heritage point of view, the experimentation on the "Poem in Rajaz on medicine" could foster the systematic philological and ontological study of a unique piece of our documental heritage: the longest poem of medieval Islamic medical literature.}, KEYWORDS = {Cultural Heritage Digital Safeguard, Historical Manuscript Digitization, Document Image Processing, Linguistic Analysis, Ontological Analysis}, PAGES = {34-43}, URL = {https://innove.org/ijist/index.php/ijist/article/view/190}, VOLUME = {5}, DOI = {10.57675/IMIST.PRSM/ijist-v5i1.190}, PUBLISHER = {[El Mohajir Mohammed] ([S. l. ], Marocco)}, ISSN = {2550-5114}, JOURNAL = {International Journal of Information Science and Technology}, } @ARTICLE{GIOVANNETTI_2021_ARTICLE_GABDDM_457778, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Dattilo, D. and Del Grosso, A. M. and Marchi, S.}, TITLE = {An ontology of masters of the Babylonian Talmud}, YEAR = {2021}, ABSTRACT = {The purpose of this research is to build an ontology of the masters appearing in the Babylonian Talmud (BT). The ontology built so far has been shared as a Linked Open Data and it will be linked to existing vocabularies. This work has been developed in the context of the Babylonian Talmud Translation Project, where more than eighty Talmudists are working together, since 2012, at the translation (comprehensive of explicative notes and glossaries) of the Talmud into Italian. The construction of the resource has involved the application of tools leveraging on computational linguistics approaches. The ontology, already describing more than 500 masters, constitutes the first portion of a more comprehensive Talmudic Knowledge Base where the text itself, the terminology, the entities, and the concepts constituting the BT will be formalized and linked to each other.}, KEYWORDS = {ontology, babylonian talmud, terminology, word alignment, linked open data, semantic web, knowledge representation}, PAGES = {725-737}, URL = {https://academic.oup.com/dsh/article-abstract/37/3/725/6410110}, VOLUME = {37}, DOI = {10.1093/llc/fqab043}, PUBLISHER = {Oxford University Press (Oxford, UK, Regno Unito)}, ISSN = {2055-7671}, JOURNAL = {Digital Scholarship in the Humanities}, } @ARTICLE{GUADAGNINI_2021_ARTICLE_G_449622, AUTHOR = {Guadagnini, E.}, TITLE = {3. 5. Paolo e Francesca per bambini, Paolo e Francesca per adulti: censure, revisioni, riletture}, YEAR = {2021}, ABSTRACT = {Selezione e commento di alcune riletture della vicenda di Paolo e Francesca (Dante, Inf. 5).}, KEYWORDS = {Dante Alighieri, riscritture, fumetto, letteratura per l'infanzia}, URL = {http://www.arabeschi.it/35-paolo-e-francesca-per-bambini-adulti-censure-revisioni-riletture/}, VOLUME = {17}, PUBLISHER = {s. n (Catania; Pisa, Italia)}, ISSN = {2282-0876}, JOURNAL = {Arabeschi}, } @ARTICLE{GUADAGNINI_2021_ARTICLE_G_461428, AUTHOR = {Guadagnini, E.}, TITLE = {Hélène Miesse, Un laboratorio di carte. Il linguaggio della politica nel «carteggio» di Francesco Guicciardini}, YEAR = {2021}, KEYWORDS = {Guicciardini, lessico, politica}, PAGES = {206-212}, URL = {https://publications.cnr.it/doc/461428}, VOLUME = {85}, ISSN = {0035-1458}, JOURNAL = {Revue de linguistique romane}, } @ARTICLE{MARZI_2021_ARTICLE_MGSV_447049, AUTHOR = {Marzi, C. and Greco, A. and Scilingo, E. P. and Vanello, N.}, TITLE = {Towards a model of arousal change after affective word pronunciation based on electrodermal activity and speech analysis}, YEAR = {2021}, ABSTRACT = {In this paper, we explore the possibility of building a model of subject arousal by exploiting the acquisition and the analysis of speech and electrodermal activity (EDA). Several issues have to be addressed to reach this goal as the estimation of the relationship between arousal and behavioral measures and the reliability of EDA signal during speech production. To accomplish this task, we will investigate the relation among EDA, speech activity and subject arousal, during isolated affective word pronunciation. Our results show that significant information on subject arousal can be obtained by analyzing EDA during the processing of out-of-context words with an emotional content in a reading aloud task. Based on a sample of eighteen Italian participants, we observed a significant relation between EDA features and self-reported arousal scores. Quantitative models relating EDA and speech-derived features are proposed and discussed. We found that increasing values of tonic and phasic components of EDA signals correspond to increasing self-assessed arousal scores; Mel-frequency cepstral analysis of speech was also shown to carry relevant information about subject arousal, with a significant inverse relation to self-assessed scores. Our results suggest how the analysis of concurrent acquisition of EDA and speech features may offer a valid approach for the prediction of subject arousal during speech production, as well as a method for validating self-assessment ratings themselves.}, KEYWORDS = {speech, electrodermal activity, statistical models, arousal, word pronunciation}, PAGES = {1-8}, URL = {http://www.elsevier.com/locate/bspc}, VOLUME = {67}, DOI = {10.1016/j.bspc.2021.102517}, PUBLISHER = {Elsevier (Oxford, Regno Unito)}, ISSN = {1746-8094}, JOURNAL = {Biomedical signal processing and control (Print)}, } @ARTICLE{MIASCHI_2021_ARTICLE_MBD_454570, AUTHOR = {Miaschi, A. and Brunato, D. and Dell'Orletta, F.}, TITLE = {A NLP-based stylometric approach for tracking the evolution of L1 written language competence}, YEAR = {2021}, ABSTRACT = {In this study we present a Natural Language Processing (NLP)-based stylometric approach for tracking the evolution of written language competence in Italian L1 learners. The approach relies on a wide set of linguistically motivated features capturing stylistic aspects of a text, which were extracted from students' essays contained in CItA (Corpus Italiano di Apprendenti L1), the first longitudinal corpus of texts written by Italian L1 learners enrolled in the first and second year of lower secondary school. We address the problem of modeling written language development as a supervised classification task consisting in predicting the chronological order of essays written by the same student at different temporal spans. The promising results obtained in several classification scenarios allow us to conclude that it is possible to automatically model the highly relevant changes affecting written language evolution across time, as well as identifying which features are more predictive of this process. In the last part of the article, we focus the attention on the possible influence of background variables on language learning and we present preliminary results of a pilot study aiming at understanding how the observed developmental patterns are affected by information related to the school environment of the student.}, KEYWORDS = {stylometry, computational linguistics, language competence}, PAGES = {71-105}, URL = {https://www.jowr.org/abstracts/vol13_1/Miaschi_et_al_2021_13_1_abstract.html}, VOLUME = {vol. 13}, DOI = {10.17239/jowr-2021.13.01.03}, PUBLISHER = {Universiteit Antwerpen (Antwerpen, Belgio)}, ISSN = {2030-1006}, JOURNAL = {Journal of Writing Research}, } @ARTICLE{MONACHINI_2021_ARTICLE_MSCPB_466817, AUTHOR = {Monachini, M. and Stamuli, M. F. and Calamai, S. and Pretto, N. and Bianchi, S.}, TITLE = {The Grey-side of Audio Archives}, YEAR = {2021}, ABSTRACT = {Archives often include documents that can hardly be considered publications or grey literature as such, yet they maintain their documentary value and play a role of primary sources for the specialists. These documents, indeed, can help archivists to reveal the sedimentation process of the archive itself and to preserve the authentic context of the documentary production. They also appear to be very useful for the community of researchers and scholars. This happens more frequently with oral archives which include 'non-conventional sources', thus bringing together audio documents, fieldworks notes, correspondence, slipcases, analogic compact cassettes or open reels. At the cross-road of two disciplines, Archival Science and Grey Literature, this paper aims to argue the applicability of the concept of grey literature to this wide range of documentary materials, by showing the experience of Archivio Vi.Vo, a regional project aiming at building a model for archiving, preserving, managing and disseminating audio documents.}, KEYWORDS = {oral archives, infrastructures}, PAGES = {34-37}, URL = {https://publications.cnr.it/doc/466817}, VOLUME = {22}, PUBLISHER = {TransAtlantic (Amsterdam, Paesi Bassi)}, ISSN = {1386-2316}, JOURNAL = {The GL-conference series. Conference proceedings}, } @ARTICLE{MUGELLI_2021_ARTICLE_MBBDKT_461550, AUTHOR = {Mugelli, G. and Boschetti, F. and Bellandi, A. and Del Gratta, R. and Khan, A. F. and Taddei, A.}, TITLE = {Annotating ritual in ancient greek tragedy: A bottom-up approach in action}, YEAR = {2021}, ABSTRACT = {EuporiaRAGT is one of the pilot projects that adopt the Euporia system as a digital support to an historico-anthropological research on the form and function of rituals in the texts of ancient Greek tragedy. This paper describes the bottom-up approach adopted in the project: During the annotation stage, performed with a Domain Specific Language designed with a usercentred approach, the domain expert can annotate ritual and religious phenomena, with the possibility of registering different textual and interpretive variants; the design of a search engine, in a second phase of the work, allows the database to be tested and reorganized. Finally, the construction of an ontology allows to structure the tags, in order to perform complex queries on the database.}, KEYWORDS = {digital philology, dsl, ancient Greek}, PAGES = {17}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85117031943\&origin=inward}, VOLUME = {15}, PUBLISHER = {Alliance of Digital Humanities Organizations ([Providence, RI?], Stati Uniti d'America)}, ISSN = {1938-4122}, JOURNAL = {Digital humanities quarterly}, } @ARTICLE{NAHLI_2021_ARTICLE_ND_463930, AUTHOR = {Nahli, O. and Del Grosso, A. M.}, TITLE = {Structuring Arabic lexical and morphological resources using TEI: theory and practice}, YEAR = {2021}, ABSTRACT = {An Arabic word can be described according to its lexical and morphological information. The lexical information, conveyed by the root, consists of both semantic meaning and syntactic properties (e.g. parts of speech). The morphological information, encoded by patterns, is useful to group the words having similar syntactic, inflectional and semantic behaviour. Lexical analysis and morphological analysis have been separately described since the very first studies of the Arabic language. Although several scholarly works have illustrated Arabic lexicon models that encode semantic meanings, a systematic description of word patterns is still strongly lacking. In this work, we have implemented an exhaustive resource consisting of two levels: lexical and morphological. The lexical level collects information extracted from the dictionary al=q¯am¯us al=muh. ¯?t.. The morphological level describes pattern formalization, which allows to enrich word descriptions with additional semantic, morphosyntactic and inflectional information. To build our digital resource, taking into account primary source, lexical requirements, and reusability, we followed the guidelines provided by the Text Encoding Initiative (abbreviated as TEI). In particular, we adopted the TEI module for the encoding of digital dictionaries and lexicons to formally represent the medieval al=q¯am¯us al=muh. ¯?t. dictionary. Given the complexity of describing the morphological information present in the patterns, we also used the TEI module devoted to encoding feature structures. Consequently, we are building an exhaustive resource formed by the lexical and the morphological blocks. These two components are distinct but complementary resources where the lexical data are connected to morphological information. In addition, the morphological resource can be used as a stand-alone tool that allows the morphological analyzers to capture aspects of meaning that cannot be identified by current systems.}, KEYWORDS = {classical Arabic dictionary, digital lexicography, al=q'}, PAGES = {3-14}, URL = {https://innove.org/ijist/index.php/ijist/article/view/191}, VOLUME = {5}, PUBLISHER = {[El Mohajir Mohammed] ([S. l. ], Marocco)}, ISSN = {2550-5114}, JOURNAL = {International Journal of Information Science and Technology}, } @ARTICLE{NAHLI_2021_ARTICLE_NSBB_463923, AUTHOR = {Nahli, O. and Sanna, A. and Bandini, M. and Boschetti, F.}, TITLE = {Commerce Numérique: traffic signals for the crossroads between cultures}, YEAR = {2021}, ABSTRACT = {Commerce is a literary French journal founded by Princess Margherita Caetani, involving three prestigious collaborators: Paul Valéry, Léon-Paul Fargue, Valéry Larbaud. It is composed by 29 volumes published between 1924 and 1932. Each volume collects different literary material of various well-known and unknown writers as poems or novels, translating some of the most important authors like Joyce, T.S. Eliot, Pirandello, Ungaretti, Saint-John Perse, Rilke, Hofmannsthal. Considering the historical, literary, and cultural importance of the Commerce journal, our project "Commerce numérique" aims to digitize and to make the journal contents freely available online to both the general public and the research community. This article presents how the journal was encoded. Also, we give importance to the coding of poems present in Commerce. Indeed, some poems are original in another language and they are accompanied by their French translation. Other poems are a French-translated form without original text. In order to fully and accurately express the phenomena and their structures, we have adopted some aspects of the TEI framework, which we will explain in detail. Particular attention was paid to the French translation of a Moroccan Arabic poem from the 13th century. On the one hand, the original Arabic poetry is interesting because it presents some aspects of the Moroccan dialect and some aspects of the oral text. On the other hand, the study and the encoding of Arabic poetry in parallel to its translation highlight some important structural differences between Arabic poetry and Western poetry.}, KEYWORDS = {Commerce Journal, OCR, TEI encoding, literary journal, digital resources, Arabic poetry}, PAGES = {36-45}, URL = {https://innove.org/ijist/index.php/ijist/article/view/193}, VOLUME = {5}, PUBLISHER = {[El Mohajir Mohammed] ([S. l. ], Marocco)}, ISSN = {2550-5114}, JOURNAL = {International Journal of Information Science and Technology}, } @ARTICLE{PANCKHURST_2021_ARTICLE_PF_455049, AUTHOR = {Panckhurst, R. and Frontini, F.}, TITLE = {An Internationally Fair Mediated Digital Discourse Corpus: Improving Knowledge on Reuse}, YEAR = {2021}, ABSTRACT = {In this paper, the authors present a French Mediated Digital Discourse corpus, (88milSMS http://88milsms.huma-num.fr https://hdl.handle.net/11403/comere/ cmr-88milsms). Efforts were undertaken over the years to ensure its publication according to the best practices and standards of the community, thus guaranteeing compliance with FAIR principles and CLARIN recommendations with pertinent scientific and pedagogical reuse. Since knowledge on how resources are reused is sometimes difficult to obtain, ways of improving this are also envisaged.}, KEYWORDS = {Reuse, FAIR, SMS, corpus}, PAGES = {185-193}, URL = {https://ecp.ep.liu.se/index.php/clarin/article/view/20}, VOLUME = {180}, DOI = {10.3384/ecp18020}, PUBLISHER = {Linköping University Electronic Press (Linköping, Svezia)}, ISSN = {1650-3740}, JOURNAL = {Linköping electronic conference proceedings (Online)}, } @ARTICLE{PROIETTI_2021_ARTICLE_PY_454043, AUTHOR = {Proietti, C. and Yuste Ginel, A.}, TITLE = {Dynamic epistemic logics for abstract argumentation}, YEAR = {2021}, ABSTRACT = {This paper introduces a multi-agent dynamic epistemic logic for abstract argumentation. Its main motivation is to build a general framework for modelling the dynamics of a debate, which entails reasoning about goals, beliefs, as well as policies of communication and information update by the participants. After locating our proposal and introducing the relevant tools from abstract argumentation, we proceed to build a three-tiered logical approach. At the first level, we use the language of propositional logic to encode states of a multi-agent debate. This language allows to specify which arguments any agent is aware of, as well as their subjective justification status. We then extend our language and semantics to that of epistemic logic, in order to model individuals' beliefs about the state of the debate, which includes uncertainty about the information available to others. As a third step, we introduce a framework of dynamic epistemic logic and its semantics, which is essentially based on so-called event models with factual change. We provide completeness results for a number of systems and show how existing formalisms for argumentation dynamics and unquantified uncertainty can be reduced to their semantics. The resulting framework allows reasoning about subtle epistemic and argumentative updates--such as the effects of different levels of trust in a source--and more in general about the epistemic dimensions of strategic communication.}, KEYWORDS = {Abstract argumentation, Dynamic epistemic logic, Awareness logics, Multi-agent argumentation frameworks, Persuasion, Strategic Argumentation}, PAGES = {1-60}, URL = {https://link.springer.com/article/10.1007/s11229-021-03178-5#citeas}, DOI = {10.1007/s11229-021-03178-5}, PUBLISHER = {Kluwer (Dordrecht, Paesi Bassi)}, ISSN = {1573-0964}, JOURNAL = {Synthese (Dordr., Online)}, } @ARTICLE{SALVATI_2021_ARTICLE_SR_468981, AUTHOR = {Salvati, L. and Russo, I.}, TITLE = {Indicatori di complessità nel parlato degli insegnanti di italiano L2: un'analisi quantitativa}, YEAR = {2021}, ABSTRACT = {Sul parlato dell'insegnante di L2 (identificato anche come teacher talk) esiste una vasta letteratura, in particolare per quanto riguarda l'insegnamento dell'inglese come lingua seconda. Agli approcci di carattere più teorico-normativo si accompagnano, negli ultimi trenta anni, approcci che prendono spunto dalle tecniche di analisi della linguistica dei corporae che si focalizzano su casi di studio specifici, andando a verificare su dati concreti le ipotesi formulate dalla teoria.L'ipotesi di base di questo ambito di ricerca è che gli insegnanti compiono -in maniera non sempre consapevole e pianificata -degli adeguamenti nel loro modo di parlare di fronte ad una classe di apprendenti. Tali adeguamenti riguardano più livelli linguistici e variano di intensità a seconda del livello complessivo di competenza degli apprendenti. Nel presente lavoro ci proponiamo di analizzare quantitativamente la complessità del parlato di insegnanti madrelingua di italiano L2 raccolto e trascritto durante lezioni appartenenti a due livelli del ), Quadro comune europeo di riferimento per le lingue: apprendimento, insegnamento, valutazione(QCER, Council of Europe, 2002 [2001]), A1 e B13. Una parte delle trascrizioni riguarda lezioni svolte in classe (corpusParInIt, Parlato di Insegnanti di Italiano), nella quale vi è compresenza fisica tra insegnante e apprendenti, un secondo corpusè invece composto da lezioni somministrate onlinein maniera asincrona, tramite un canale YouTube (corpus Oneworlditaliano). Proponiamo una classificazione degli adeguamenti rispetto alla quale l'analisi quantitativa della complessità degli indicatori linguistici verificherà se è possibile distinguere sia tra il livello A1 e il livello B1 nel corpus raccolto in presenza e nelcorpusrelativo alle lezionion line.L'obiettivo finale è comprendere se un'analisi quantitativa dei dati possa aiutare ad individuare gli adeguamenti e le modifiche linguistiche attuate dai docenti per favorire una maggiore comprensibilità dell'inputda parte degli apprendenti}, KEYWORDS = {teacher talk, corpus linguistics, complessità lessicale}, PAGES = {122-132}, URL = {https://riviste.unimi.it/index.php/promoitals/article/view/17132/15088}, VOLUME = {13}, PUBLISHER = {Università degli Studi di Milano (Milano, Italia)}, ISSN = {2037-3597}, JOURNAL = {Italiano linguadue}, } @ARTICLE{SALVATORI_2021_ARTICLE_SBD_472280, AUTHOR = {Salvatori, E. and Boschetti, F. and Del Grosso, A. M.}, TITLE = {A un anno da AIUCD2021}, YEAR = {2021}, KEYWORDS = {Digital Humanities, AIUCD}, PAGES = {1-4}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85127630558\&origin=inward}, VOLUME = {2021}, DOI = {10.6092/issn.2532-8816/14209}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @BOOK{MARZI_2021_BOOK_M_461758, AUTHOR = {Marzi, C.}, TITLE = {Modelling the morphological lexicon-A computational approach to mono-and bilingual learning and processing of verb inflection}, YEAR = {2021}, ABSTRACT = {This work aims at defining an explanatory model of the morphological lexicon as a dynamic system of word learning and processing in both mono- and bilingual contexts. The main focus is on exploring some relevant aspects of the paradigmatic organisation of the mental lexicon in language learning, based on a dynamic analysis of mono- and bilingual contexts. The proposed interdisciplinary approach to lexical acquisition combines theoretically-motivated accounts, psycho-cognitive evidence and methodologies, and machine learning technologies. In particular, I will take into account those basic psychological and cognitive mechanisms that are considered as crucial in language acquisition: (i) the ability to perceive recurrent morphological structures (invariances) in varying temporal contexts, (ii) the capability to access/activate time series of symbols in the short term memory and to selectively integrate them with long term memory expectations, (iii) the attitude towards building novel forms through analogical extension of intra- and inter-paradigmatic relations (generalisation). This investigation is pursued through a computational model based on a recurrent Self-Organising Map, with Hebbian connections defined over a temporal layer (Temporal Self-Organising Map, TSOM), providing a principled algorithmic account of effects of lexical acquisition, processing and access. The computational simulation of a biologically inspired neural architecture of the mental lexicon offers the possibility to reproduce a wide range of conditions of mono- and bi-lingual input exposure, and to illustrate the dynamic of word acquisition and the emergence of morphological organisation. The proposed model provides an adaptive multifactorial account of morphology acquisition affected by a variety of input factors, such as word frequency distributions, paradigm regularity and wordlikeness, whereby lexical perception and organisation are grounded in memory-based processing strategies. In addition, it suggests a processing-based notion of morpheme, as a by-product of processing dynamics, with paradigms emerging as specialised surface relations between inflected forms.}, KEYWORDS = {morphology, learning and processing, monolingual and bilingual acquisition, verb inflection, artificial neural networks, temporal self-organising maps}, PAGES = {5-171}, URL = {https://publications.cnr.it/doc/461758}, VOLUME = {1095. 82}, PUBLISHER = {Franco Angeli (Milano, ITA)}, ISBN = {978-88-351-3548-7}, } @INCOLLECTION{BOSCHETTI_2021_INCOLLECTION_BDS_461545, AUTHOR = {Boschetti, F. and Del Grosso, A. M. and Spinazzè, L.}, TITLE = {La galassia Musisque Deoque: storia e prospettive}, YEAR = {2021}, ABSTRACT = {The resources in support of Latin scholars created under the scientific direction of Paolo Mastandrea are numerous; the undertaking of Musisque Deoque, which aims at equipping the entire corpus of ancient Latin poetry with "significant variants", is the most emblematic effort, open to further developments. Looking at the general history of these projects, we try to trace the future path of the "Musisque Deoque galaxy" within Open Science.}, KEYWORDS = {Classical philology, Latin poetry, Intertextuality, Open data, FAIR principles}, PAGES = {405-419}, URL = {https://edizionicafoscari.unive.it/media/pdf/books/978-88-6969-558-2/978-88-6969-558-2-ch-26.pdf}, VOLUME = {32}, DOI = {10.30687/978-88-6969-557-5/026}, PUBLISHER = {Edizioni Ca' Foscari (Venezia, ITA)}, ISBN = {978-88-6969-557-5}, BOOKTITLE = {Paulo maiora canamus-Raccolta di studi per Paolo Mastandrea}, EDITOR = {Venuti, M. and Manca, M.}, } @INCOLLECTION{CAPPA_2021_INCOLLECTION_CFG_461297, AUTHOR = {Cappa, C. and Ferro, M. and Giulivi, S.}, TITLE = {Valutare l'efficienza di lettura in classe, fra "ecologia" e tecnologie}, YEAR = {2021}, ABSTRACT = {La sperimentazione AEREST ha consentito la creazione di un protocollo in grado di offrire una valutazione accurata e dettagliata delle abilità di lettura e comprensione del testo. Lo strumento si è rivelato semplice da utilizzare per gli insegnanti, ed è stato accolto con curiosità e interesse dagli allievi, certamente attratti dal supporto utilizzato per la somministrazione (il tablet), ma anche dai testi, che sono stati scelti e adattati con particolare cura. L'analisi dei dati ha consentito di identificare una considerevole varietà di profili di lettori, per i quali sarà possibile progettare percorsi di potenziamento mirati. Come già accennato, si è potuta constatare l'efficacia dello strumento nell'identificazione di allievi le cui difficoltà (pur evidenti agli occhi degli insegnanti) non vengono rilevate dai test comunemente utilizzati per la valutazione, ma la cui lettura non può essere considerata 'efficiente'. Costituiscono esempi in questo senso gli allievi che decodificano in modo accurato e veloce, con buone prestazioni nella comprensione all'ascolto, ma che manifestano difficoltà nella comprensione di un testo in lettura silente, poiché in questa attività devono integrare la decodifica con l'accesso al significato. Un ulteriore esempio è costituito dagli allievi che ottengono buoni risultati in tutti i test, impiegando però un tempo eccessivamente lungo per svolgerli. Nella prospettiva qui adottata, anche per questi allievi è necessario individuare strategie di supporto volte a evitare che le attività scolastiche, in particolare i compiti a casa, occupino una parte troppo ampia del tempo dell'allievo, togliendo spazio al gioco, allo svago, agli interessi personali e alla socializzazione. Questi ultimi sono aspetti che, come sottolinea la Carta internazionale dei diritti dei bambini (1959), rivestono un'importanza cruciale per il processo di crescita e il benessere generale di ciascuno. Oltre alle difficoltà, il protocollo AEREST consente di mettere in evidenza le prestazioni eccellenti, grazie alla struttura dei test e alle caratteristiche dei testi e delle domande che li accompagnano. Capire a fondo come "funzionano" gli allievi è indispensabile per poterli sostenere al meglio negli apprendimenti, indipendentemente dalla presenza o meno di un'"etichetta" diagnostica. Gli insegnanti hanno in questo senso una grande responsabilità, e uno strumento come AEREST, grazie anche all'implementazione su piattaforma tecnologica, può aiutarli in quella che forse è la loro principale sfida quotidiana: fare in modo che le difficoltà scolastiche non siano vissute come barriere all'apprendimento, al successo scolastico, alle opportunità professionali, alla realizzazione personale, ma come soglie da superare e da trasformare in trampolini di lancio.}, KEYWORDS = {efficienza di lettura, decodifca, comprensione, scuola primaria}, PAGES = {49-69}, URL = {https://buponline.com/prodotto/disturbi-specifici-dellapprendimento-e-insegnamento-linguistico/}, VOLUME = {3}, PUBLISHER = {Bononia University Press (Bologna, ITA)}, ISBN = {978-88-6923-829-1}, BOOKTITLE = {Didattica dell'italiano}, EDITOR = {Garulli, V. and Pasetti, L. and Viale, M.}, } @INCOLLECTION{CUTUGNO_2021_INCOLLECTION_CFCM_443221, AUTHOR = {Cutugno, P. and Ferretti, M. and Chiarella, D. and Marconi, L.}, TITLE = {A Linguistic preliminary study about noise perception}, YEAR = {2021}, ABSTRACT = {The activities of the project "TRIPLO: TRasporti e collegamenti Innovativi e sostenibili tra Porti e piattaforme Logistiche", financed with the resources of the Interregional Operational Programme Italy-France Maritime 2014- 2020, have as specific objective to improve the sustainability of commercial ports and related logistic platforms, contributing to the reduction of noise pollution [1][2]. Some activities in the project are aimed at assessing the acoustic impact on the population exposed to noise generated by back port activities, in relation to individual perception. In environmental surveys, only technical investigations can objectively describe a phenomenon [3], but at the same time they do not guarantee its universality with respect to perception; a sound can be considered both as a physical phenomenon, and therefore measurable through objective parameters, and as a phenomenon linked to sound perception, of a subjective nature and consequent to the psycho-physical-emotional state of the subject. These two characteristics are strictly interdependent, so it is insufficient to limit to examine them separately. The population constitutes a precious source of information in the evaluation of the quality of the space in which they live or work, suggesting the relationships between environment, comfort and productivity. It is therefore important to use subjective investigation tools through which the opinion of the population can become a valid support to traditional methods of analysis and improve the overall evaluation.}, KEYWORDS = {nosie perception, questionnaire, applied linguistics}, PAGES = {57-61}, URL = {https://publications.cnr.it/doc/443221}, ISBN = {978-959-7174-40-0}, BOOKTITLE = {Contribuciones a la Lingüística y a la Comunicación Social. Tributo a Vitelio Ruiz Hernández}, } @INCOLLECTION{DOLOWYRYBINSKA_2021_INCOLLECTION_DS_443475, AUTHOR = {Dolowy Rybinska, N. and Soria, C.}, TITLE = {Surveying the ethnolinguistic vitality of two contested languages. The case of Kashubian and Piedmontese}, YEAR = {2021}, ABSTRACT = {In this chapter we present the results of a Polish-Italian research project aimed at evaluating and comparing the vitality of two contested languages: Kashubian in Poland and Piedmontese in Italy.}, KEYWORDS = {ethnolinguistic vitality, contested languages}, PAGES = {125-142}, URL = {https://publications.cnr.it/doc/443475}, DOI = {10.1075/wlp.8}, PUBLISHER = {John Benjamins (Amsterdam, NLD)}, ISBN = {9789027208040}, } @INCOLLECTION{GUADAGNINI_2021_INCOLLECTION_G_441969, AUTHOR = {Guadagnini, E.}, TITLE = {Scripta}, YEAR = {2021}, ABSTRACT = {Il concetto di "scripta" negli studi linguistici romanzi.}, KEYWORDS = {linguistica italiana, linguistica romanza}, PAGES = {125-151}, URL = {http://www.carocci.it/index.php?option=com_content\&view=article\&id=98}, PUBLISHER = {Carocci (Roma, ITA)}, ISBN = {9788829004294}, BOOKTITLE = {Storia dell'italiano scritto, vol. VI, Supporti, forme, pratiche di scrittura}, EDITOR = {Antonelli, G. and Motolese, M. and Tomasin, L.}, } @INCOLLECTION{GUADAGNINI_2021_INCOLLECTION_G_461426, AUTHOR = {Guadagnini, E.}, TITLE = {Marco Tullio Cicerone (attr.), Rettorica nova}, YEAR = {2021}, ABSTRACT = {Scheda relativa alla "Rettorica nova", presente nella lista dei libri posseduti da Leonardo.}, KEYWORDS = {Leonardo, Cicerone, Retorica}, PAGES = {187-189}, URL = {https://bibliotecadileonardo.museogalileo.it/index.php/esplora/scheda/La%20biblioteca/61572}, PUBLISHER = {Giunti (Firenze, ITA)}, ISBN = {9788809897786}, BOOKTITLE = {La biblioteca di Leonardo}, EDITOR = {Vecce, C.}, } @INCOLLECTION{ONIGA_2021_INCOLLECTION_OCPGBPCCGMFCFTA_463942, AUTHOR = {Oniga, D. and Cantalupo, B. and Perlo, D. and Grangetto, M. and Bolelli, F. and Pollastri, F. and Cancilla, M. and Canalini, L. and Grana, C. and Muñoz, C. and Franco, A. and Cardillo, A. and Florea, M. and Tartaglione, E. and Aldinucci, M.}, TITLE = {Applications of AI and HPC in the Health Domain}, YEAR = {2021}, ABSTRACT = {This chapter presents the applications of artificial intelligence (AI) and high-computing performance (HPC) in the health domain, illustrated by the description of five of the use cases that are developed in the DeepHealth project. In the context of the European Commission supporting the use of AI and HPC in the health sector, DeepHealth Project is helping health experts process large quantities of images, putting at their disposal DeepLearning and computer vision techniques, combined in the DeepHealth toolkit and HPC infrastructures. The DeepHealth toolkit is tested and validated through 15 use cases, each of them representing a biomedical application. The most promising use cases are described in the chapter, which concludes with the value proposition and the benefits that DeepHealth toolkit offers to future end users.}, KEYWORDS = {artificial intelligence, high performance computing}, PAGES = {217-240}, URL = {https://www.taylorfrancis.com/chapters/edit/10.1201/9781003176664-11/applications-ai-hpc-health-domain-dana-oniga-barbara-cantalupo-enzo-tartaglione-daniele-perlo-marco-grangetto-marco-aldinucci-federico-bolelli-federico-pollastri-michele-cancilla-laura-canalini-costantino-grana-cristina-mu%C3%B1oz-alcalde-franco-alberto-cardillo-monica-florea}, DOI = {10.1201/9781003176664}, PUBLISHER = {CRC Press-Taylor \& Francis Group (London, GBR)}, ISBN = {9781032009841}, } @EDITORIAL{BRANDO_2021_EDITORIAL_BFMRM_453809, AUTHOR = {Brando, C. and Frontini, F. and Moreau, D. and Roche, M. and Masson, É.}, TITLE = {Humanités numériques spatialisées}, YEAR = {2021}, ABSTRACT = {This special issue provides an introduction to the contributions presented in this thematic issue dedicated to the spatial humanities. Three main themes are addressed: (1) the processing of spatial information in textual corpora resulting from work in the human and social sciences, mainly in literary studies; (2) problems of acquisition, spatialisation and dissemination of geographical data of the past and from cultural heritage, thus, here, more connected with research in history; (3) spatial information and its processing and uses in archaeology. For each of these topics, we present the founding initiatives with historiographical elements, a brief status quaestionis and a synthesis of the contributions.}, KEYWORDS = {spatial digital humanities, archaeology, history, history of the digital humanities, geographic information system, cartography, spatial analysis, textual analysis}, URL = {https://journals.openedition.org/revuehn/689}, VOLUME = {3}, } @EDITORIAL{BURGASSI_2021_EDITORIAL_BGV_472160, AUTHOR = {Burgassi, C. and Guadagnini, E. and Vaccaro, G.}, TITLE = {Migrazioni linguistiche e trasmissioni culturali in Italia (secoli XIII-XV)}, YEAR = {2021}, ABSTRACT = {Il terzo volume della collana Plurilinguismo e Migrazioni si concentra sui temi della traduzione, un'istanza storica fondamentale per l'area italoromanza antica che può essere intesa sia nel senso ristretto di riproposizione puntuale di un testo in una lingua diversa da quella originale sia, in senso lato, come operazione che importa nella lingua locale un contenuto originariamente espresso in altra lingua. Migrazioni linguistiche e trasmissioni culturali in Italia (secoli XIII-XV) adotta la traduzione, entro i due poli di senso ora definiti, come punto di vista privilegiato per lo studio della lingua e della cultura italiana del Medioevo: i nove contributi raccolti affrontano temi e problemi relativi a traduzioni dal latino e dal francese, rispetto a testi composti originariamente dall'Antichità all'epoca coeva, caratterizzati da tradizioni "passive" o "attive" (secondo la terminologia classica).}, KEYWORDS = {Traduzione, Volgarizzamenti, Eredità classica}, URL = {https://www.cnr.it/sites/default/files/public/media/attivita/editoria/collana_plurimi/PLURIMI_3_2021.pdf}, VOLUME = {3}, DOI = {10.36173/PLURIMI-2021-3}, PUBLISHER = {CNR EDIZIONI (ROMA, ITA)}, ISBN = {9788880804888}, } @EDITORIAL{MONACHINI_2021_EDITORIAL_ME_472301, AUTHOR = {Monachini, M. and Eskevich, M.}, TITLE = {CLARIN Annual Conference Proceedings}, YEAR = {2021}, ABSTRACT = {CLARIN2021 is organised for the wider Humanities and Social Sciences communities in order to exchange ideas and experiences within the CLARIN infrastructure. This includes the design, construction and operation of the CLARIN infrastructure, the data, tools and services that it contains or for which there is a need, its actual use by researchers, its relation to other infrastructures and projects, and the CLARIN Knowledge Sharing Infrastructure.}, KEYWORDS = {Language Resource Infrastrucuture}, PAGES = {1-178}, URL = {https://publications.cnr.it/doc/472301}, } @EDITORIAL{SALVATORI_2021_EDITORIAL_SBD_484494, AUTHOR = {Salvatori, E. and Boschetti, F. and Del Grosso, A. M.}, TITLE = {DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale (Selected Papers AIUCD 2021)}, YEAR = {2021}, ABSTRACT = {Curatela dei selected paper (Dossier) della conferenza AIUCD2021.}, KEYWORDS = {Digital Humanities, AIUCD2021, AIUCD, Umanistica Digitale, Informatica Umanistica}, PAGES = {1-197}, URL = {https://umanisticadigitale.unibo.it/issue/view/1033}, VOLUME = {11}, } @EDITORIAL{BRANDO_2021_EDITORIAL_BFMRM_453821, AUTHOR = {Brando, C. and Frontini, F. and Moreau, D. and Roche, M. and Masson, É.}, TITLE = {Introduction. Humanités numériques et analyses spatiales: enjeux et perspectives}, YEAR = {2021}, KEYWORDS = {spatial digital humanities, archaeology, history, history of the digital humanities, geographic information system, cartography, spatial analysis, textual analysis}, URL = {https://journals.openedition.org/revuehn/2038}, VOLUME = {3}, PUBLISHER = {Humanistica (Bruxelles, Belgio)}, ISSN = {2736-2337}, BOOKTITLE = {Humanités numériques (Online)}, } @INPROCEEDINGS{ALBERTIN_2021_INPROCEEDINGS_AMB_465394, AUTHOR = {Albertin, G. and Miaschi, A. and Brunato, D.}, TITLE = {On the role of textual connectives in sentence comprehension: A new dataset for Italian}, YEAR = {2021}, ABSTRACT = {In this paper we present a new evaluation resource for Italian aimed at assessing the role of textual connectives in the comprehension of the meaning of a sentence. The resource is arranged in two sections (acceptability assessment and cloze test), each one corresponding to a distinct challenge task conceived to test how subtle modifications involving connectives in real usage sentences influence the perceived acceptability of the sentence by native speakers and Neural Language Models (NLMs). Although the main focus is the presentation of the dataset, we also provide some preliminary data comparing human judgments and NLMs performance in the two tasks.}, KEYWORDS = {neural language models, textual connectives, sentence acceptability}, URL = {http://ceur-ws.org/Vol-3033/paper16.pdf}, VOLUME = {3033}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {8th Italian Conference on Computational Linguistics (CLIC-it 2021)}, CONFERENCE_PLACE = {Milano}, CONFERENCE_DATE = {26-28/01/2022}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{BRUNO_2021_INPROCEEDINGS_BGCMF_461393, AUTHOR = {Bruno, E. and Giulivi, S. and Cappa, C. and Marini, M. and Ferro, M.}, TITLE = {Evaluating the accuracy of decoding in children who read aloud}, YEAR = {2021}, ABSTRACT = {Digital tools based on automatic speech recognition (ASR) could be a useful support for teachers in assessing the reading skills of the students. We focus on the evaluation of the decoding accuracy of children with grade level ranging from the 3rd to the 6th performing a reading aloud task on a narrative text displayed on an ordinary tablet using the ReadLet platform. On the basis of previously collected data, we built a gold dataset with sentences characterised by the audio data, the original text to be read, and the text actually spoken by the child. By using the open-source Kaldi toolkit an ASR system based on the GMM-HMM model was trained on the training portion of the gold dataset. The accuracy of the ASR system was calculated as the ability to correctly decode the test audio data with respect to the annotated text, and the decoding accuracy of the children was estimated by measuring the gap between the results obtained with the annotated text and the original text. A consistent trend with increasing grade level was found in terms of word correctness, substitutions and insertions, while the trained model appears to be significantly able to evaluate the children decoding accuracy.}, KEYWORDS = {speech recognition, decoding accuracy, reading aloud, voice parameters, Kaldi, GMM-HMM acoustic model}, PAGES = {145-148}, URL = {https://publications.cnr.it/doc/461393}, DOI = {10.36253/978-88-5518-449-6}, PUBLISHER = {Firenze University Press (Firenze, ITA)}, ISBN = {978-88-5518-449-6}, CONFERENCE_NAME = {12th International Workshop on Models and Analysis of Vocal Emissions for Biomedical Applications (MAVEBA'21)}, CONFERENCE_PLACE = {Firenze (Italy)}, CONFERENCE_DATE = {14-16/12/2021}, BOOKTITLE = {Proceedings of the 12th International Workshop on Models and Analysis of Vocal Emissions for Biomedical Applications (MAVEBA'21)}, EDITOR = {Manfredi, C.}, } @INPROCEEDINGS{CALAMAI_2021_INPROCEEDINGS_CPSPCBM_466824, AUTHOR = {Calamai, S. and Pretto, N. and Stamuli, M. F. and Piccardi, D. and Candeo, G. and Bianchi, S. and Monachini, M.}, TITLE = {COMMUNITY-BASED SURVEY AND ORAL ARCHIVE INFRASTRUCTURE IN THE ARCHIVIO VI. VO. PROJECT}, YEAR = {2021}, ABSTRACT = {Audio and audiovisual archives are at the crossroads of different fields of knowledge, yet they require common solutions for both their long-term preservation and their description, availability, use and reuse. Archivio Vi.Vo. is an Italian project financed by the Tuscany Region, aiming to (i) explore methods for long-term preservation and secure access to oral sources and (ii) develop an infrastructure under the CLARIN-IT umbrella offering several services for scholars from different domains interested in oral sources. This paper describes the project's infrastructure and its methodology through a case study on the Caterina Bueno's audio archive.}, KEYWORDS = {inglese}, URL = {https://publications.cnr.it/doc/466824}, VOLUME = {180}, DOI = {10.3384/ecp180}, ISBN = {978-91-7929-609-4}, CONFERENCE_NAME = {CLARIN2020 Annual Conference}, CONFERENCE_PLACE = {virtual}, CONFERENCE_DATE = {5/10/2020-7/10/2020}, BOOKTITLE = {SELECTED PAPERS FROM THE CLARIN ANNUAL CONFERENCE 2020}, EDITOR = {Navarretta, C. and Eskevich, M.}, } @INPROCEEDINGS{DEMATTEI_2021_INPROCEEDINGS_DLDN_472158, AUTHOR = {De Mattei, L. and Lai, H. and Dell'Orletta, F. and Nissim, M.}, TITLE = {Human Perception in Natural Language Generation}, YEAR = {2021}, ABSTRACT = {We take a collection of short texts, some of which are human-written, while others are automatically generated, and ask subjects, who are unaware of the texts' source, whether they perceive them as human-produced. We use this data to fine-tune a GPT-2 model to push it to generate more human-like texts, and observe that the production of this fine-tuned model is indeed perceived as more human-like than that of the original model. Contextually, we show that our automatic evaluation strategy correlates well with human judgements. We also run a linguistic analysis to unveil the characteristics of human- vs machine-perceived language.}, KEYWORDS = {Natural Language Generation, Neural Language Models, Evaluation}, PAGES = {15-23}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85123713456\&origin=inward}, DOI = {10.18653/v1/2021.gem-1.2}, ISBN = {978-1-954085-67-1}, CONFERENCE_NAME = {First Workshop on Generation Evaluation and Metrics (GEM 2021)}, CONFERENCE_PLACE = {Online}, CONFERENCE_DATE = {05/08/2021}, BOOKTITLE = {Proceedings of the First Workshop on Generation Evaluation and Metrics (GEM 2021)}, } @INPROCEEDINGS{DELGRATTA_2021_INPROCEEDINGS_DBDB_472299, AUTHOR = {Del Gratta, R. and Boschetti, F. and Del Grosso, A. M. and Bambaci, L.}, TITLE = {La Filologia come sistema dinamico: qualche considerazione preliminare}, YEAR = {2021}, ABSTRACT = {In questo articolo introduciamo un approccio formale all'evoluzione dei documenti con particolare attenzione alla prospettiva filologica e alle problematiche tipiche connesse. Proponiamo un modello/framework matematico in grado di formalizzare diversi fenomeni complessi in vari ambiti di ricerca quali la Linguistica Computazionale, la Filologia Digitale e l'Ingegneria del Software, in particolare quando questa viene applicata all'analisi di documenti e testi di interesse storico-letterario.}, KEYWORDS = {approccio evoluzionistico, modello formale, analisi documentale e testuale, sistema dinamico, filologia computazionale}, PAGES = {484-490}, URL = {http://amsacta.unibo.it/6712/1/AIUCD2021_BOA-versione3A.pdf#page=500}, VOLUME = {aiucd2021}, DOI = {10.6092/unibo/amsacta/6712}, ISBN = {9788894253559}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale-DHs for society: e-quality, participation, rights and values in the Digital Age}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {19/01/2021-22/01/2021}, BOOKTITLE = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale-DHs for society: e-quality, participation, rights and values in the Digital Age. Book of extended abstracts of the 10th national conference}, } @INPROCEEDINGS{DIDONATO_2021_INPROCEEDINGS_DDMP_461475, AUTHOR = {Di Donato, F. and Dumouchel, S. and Monachini, M. and Pohle, S.}, TITLE = {The discovery platform GOTRIPLE: An EOSC service for social sciences and humanities research}, YEAR = {2021}, ABSTRACT = {In this paper we present TRIPLE - Transforming Research through Innovative Practices for Linked Interdisciplinary Exploration - an on-going project funded as part of the European Horizon 2020 programme INFRAEOSC-02-2019 "Prototyping new innovative services" (2019-2023). The project's main objective is to develop a multilingual and multicultural discovery solution for the social sciences and humanities (SSH), which will provide a single access point that allows users to explore, find, access and reuse materials such as literature, data, projects and researcher profiles at European scale. The paper first provides an overview of TRIPLE's main goals and impacts. It then describes the methodology adopted for the design and development of the project platform, GOTRIPLE. Finally, it contextualises the project within the European research landscape, and more specifically in the European Open Science Cloud (EOSC) ecosystem. In the conclusion, some current challenges and open issues are presented.}, KEYWORDS = {EOSC, Open Science, scholarly communication, discoverability, OPERAS, TRIPLE}, PAGES = {31-38}, URL = {http://amsacta.unibo.it/6712/1/AIUCD2021_BOA-versione3A.pdf}, DOI = {10.6092/unibo/amsacta/6712}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale}, CONFERENCE_PLACE = {on-line}, CONFERENCE_DATE = {19-22/01/2021}, BOOKTITLE = {AIUCD 2021-Book of Extended Abstracts. p. 624}, EDITOR = {Boschetti, F. and Del Grosso, A. M. and Salvatori, E.}, } @INPROCEEDINGS{FERRETTI_2021_INPROCEEDINGS_FCCMDCM_456166, AUTHOR = {Ferretti, M. and Chiarella, D. and Cutugno, P. and Marconi, L. and Di Feo, G. and Cerniglia, A. and Magrini, A.}, TITLE = {A linguistic and psychoacoustic study for questionnaire analysis: first results}, YEAR = {2021}, ABSTRACT = {This research is inspired by the consideration that language represents a significant dimension of perception and description of the soundscape. A language is a strategic tool that can consistently describe the qualities of physical acoustic environments, introducing an efficient metric to describe the sound perception by people. In this paper, we explore the possibility of using sentiment analysis for extracting the emotional impact of noise from lists of adjectives that describe sounds. To address this, an investigation campaign was conducted to identify the Italian adjectives that best describe the noise sources associated with port and retro-port infrastructures. 402 Italian university students responded to a listening experiment. They were asked to associate adjectives with audios. The association of these adjectives with the sources of sound formed the basis for sentiment analysis. Psychoacoustic parameters were calculated following measurements and processed for each of the sounds administered to look for possible correlations between the different perceptual aspects. The first results show how industrial or traffic sounds are as expected associated with negative sentiment whereas natural sounds evoke positive emotions. The proposed sentiment analysis, in connection with the psychoacoustic investigations developed, provides a framework for future research in the investigation of sound perception.}, KEYWORDS = {Sound perception, Sentiment Analysis, Psychoacoustics, Acoustics, Synesthesia}, PAGES = {8}, URL = {https://publications.cnr.it/doc/456166}, ISBN = {978-83-7880-799-5}, CONFERENCE_NAME = {27th International Congress on Sound and Vibration}, CONFERENCE_PLACE = {Praga}, CONFERENCE_DATE = {11-16/07/2021}, BOOKTITLE = {Proceedings of the 27th International Congress on Sound and Vibration}, } @INPROCEEDINGS{GIOVANNETTI_2021_INPROCEEDINGS_GABMPS_463795, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {The role of a computational lexicon for query expansion in full-text search}, YEAR = {2021}, ABSTRACT = {This work describes the first experiments conducted with a computational lexicon of Italian in a context of query expansion for full-text search. An application, composed of a graphical user interface and backend services to access the lexicon and the database containing the corpus to be queried, was developed. The text was morphologically analysed to improve the precision of the search process. Some examples of queries are given to show the potential of a text search approach supported by a complex and stratified lexical resource.}, KEYWORDS = {full-text search, computational lexicon, query expansion}, PAGES = {162-168}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85121247840\&origin=inward}, VOLUME = {3033}, DOI = {10.4000/books.aaccademia.10417}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {9791280136947}, CONFERENCE_NAME = {CLiC-it 2021 Italian Conference on Computational Linguistics 2021}, CONFERENCE_PLACE = {Milan, Italy}, CONFERENCE_DATE = {January 26-28, 2022}, BOOKTITLE = {Proceedings of the Eight Italian Conference on Computational Linguistics (Clic-it 2021)}, EDITOR = {Fersini, E. and Passarotti, M. and Patti, V.}, } @INPROCEEDINGS{IAVARONE_2021_INPROCEEDINGS_IMBGSVDG_472155, AUTHOR = {Iavarone, B. and Morelli, M. S. and Brunato, D. and Ghiasi, S. and Scilingo, E. P. and Vanello, N. and Dell'Orletta, F. and Greco, A.}, TITLE = {Analyzing the Interaction between the Reader's Voice and the Linguistic Structure of the Text: a Preliminary Study}, YEAR = {2021}, ABSTRACT = {In this study, we present a preliminary analysis of the relationship between the linguistic profile of a text and the voice properties of the reader aiming to improve the speech-based emotion recognition systems. To this aim, we recorded the speech signals from a group of 32 healthy volunteers reading aloud neutral and affective texts and used the BioVoice toolbox to compute some of the main speech features. The selected texts were analyzed to quantify their lexical, morpho-syntactic, and syntactic content. Correlation and Support Vector Regressor analyses between linguistic and speech features have shown a significant modulation of some voice acoustic properties performed by the linguistic structure of the text. Particularly, a significant effect was shown on some specific speech features often used for the assessment of human emotional state (e.g., F0). This suggests that the lexical, morpho-syntactic, and syntactic properties could play an important role in the emotional dynamics of a person.}, KEYWORDS = {Natural Language Processing, Speech analysis, linguistic profile}, URL = {https://publications.cnr.it/doc/472155}, DOI = {10.36253/978-88-5518-449-6}, ISBN = {978-88-5518-448-9}, CONFERENCE_NAME = {12th INTERNATIONAL WORKSHOP "MODELS AND ANALYSIS OF VOCAL EMISSIONS FOR BIOMEDICAL APPLICATIONS"}, CONFERENCE_PLACE = {Firenze, Italia}, CONFERENCE_DATE = {14-16/12/2021}, BOOKTITLE = {Proceedings of 12th INTERNATIONAL WORKSHOP "MODELS AND ANALYSIS OF VOCAL EMISSIONS FOR BIOMEDICAL APPLICATIONS"}, } @INPROCEEDINGS{KELLI_2021_INPROCEEDINGS_KLKVLBMHDVTV_462357, AUTHOR = {Kelli, A. and Lindén, K. and Kamocki, P. and Vider, K. and Labropoulou, P. and Birštonas, R. and Mantrov, V. and Hannesschläger, V. and Del Gratta, R. and Värv, A. and Tavits, G. and Vutt, A.}, TITLE = {The Interplay of Legal Regimes of Personal Data, Intellectual Property and Freedom of Expression in Language Research}, YEAR = {2021}, ABSTRACT = {Sometimes legal scholars get relevant but baffling questions from laypersons like: "The reference to a work is personal data, so does the GDPR actually require me to anonymise it? Or, as my voice data is personal data, does the GDPR automatically give me access to a speech recognizer using my voice sample? Or, can I say anything about myself without the GDPR requiring the web host to anonymise or remove the post? What can I say about others like politicians? And, what can researchers say about patients in a research report?" Based on these questions, the authors address the interaction of intellectual property and data protection law in the context of data minimisation and attribution rights, access rights, trade secret protection, and freedom of expression.}, KEYWORDS = {Legal aspects, research data}, PAGES = {154-159}, URL = {https://office.clarin.eu/v/CE-2021-1923-CLARIN2021_ConferenceProceedings.pdf}, CONFERENCE_NAME = {CLARIN Annual Conferece 2021}, CONFERENCE_DATE = {27-29/09/2021}, EDITOR = {Monachini, M. and Eskevich, M.}, } @INPROCEEDINGS{MARTELLI_2021_INPROCEEDINGS_MNKTKGKNPOLKKDUSLVGLQMFTCSIM_461705, AUTHOR = {Martelli, F. and Navigli, R. and Krek, S. and Tiberius, C. and Kallas, J. and Gantar, P. and Koeva, S. and Nimb, S. and Pedersen, B. S. and Olsen, S. and Langements, M. and Koppel, K. and Üksik, T. and Dobrovolijc, K. and Ureña Ruiz, R. and Sanchosánchez, J. and Lipp, V. and Varadi, T. and Györffy, A. and László, S. and Quochi, V. and Monachini, M. and Frontini, F. and Tempelaars, R. and Costa, R. and Salgado, A. and Čibej, J. and Munda, T.}, TITLE = {Designing the ELEXIS Parallel Sense-Annotated Dataset in 10 European Languages}, YEAR = {2021}, ABSTRACT = {Over the course of the last few years, lexicography has witnessed the burgeoning of increasingly reliable automatic approaches supporting the creation of lexicographic resources such as dictionaries, lexical knowledge bases and annotated datasets. In fact, recent achievements in the field of Natural Language Processing and particularly in Word Sense Disambiguation have widely demonstrated their effectiveness not only for the creation of lexicographic resources, but also for enabling a deeper analysis of lexical-semantic data both within and across languages. Nevertheless, we argue that the potential derived from the connections between the two fields is far from exhausted. In this work, we address a serious limitation affecting both lexicography and Word Sense Disambiguation, i.e. the lack of high-quality sense-annotated data and describe our efforts aimed at constructing a novel entirely manually annotated parallel dataset in 10 European languages. For the purposes of the present paper, we concentrate on the annotation of morpho-syntactic features. Finally, unlike many of the currently available sense-annotated datasets, we will annotate semantically by using senses derived from high-quality lexicographic repositories.}, KEYWORDS = {Digital lexicography, Natural Language Processing, Computational Linguistics, Corpus Linguistics, Word Sense Disambiguation}, PAGES = {377-396}, URL = {https://static-curis.ku.dk/portal/files/279888836/eLex_2021_22_pp377_395.pdf}, CONFERENCE_NAME = {eLex 2021}, CONFERENCE_DATE = {05/-7/2021-07/07/2021}, BOOKTITLE = {Proceedings of the eLex 2021 conference}, } @INPROCEEDINGS{MIASCHI_2021_INPROCEEDINGS_MABDV_463833, AUTHOR = {Miaschi, A. and Alzetta, C. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Probing tasks under pressure}, YEAR = {2021}, ABSTRACT = {Probing tasks are frequently used to evaluate whether the representations of Neural Language Models (NLMs) encode linguistic information. However, it is still questioned if probing classification tasks really enable such investigation or they simply hint for surface patterns in the data. We present a method to investigate this question by comparing the accuracies of a set of probing tasks on gold and automatically generated control datasets. Our results suggest that probing tasks can be used as reliable diagnostic methods to investigate the linguistic information encoded in NLMs representations.}, KEYWORDS = {Neural Language Models, Linguistic probing, Treebanks}, PAGES = {1-7}, URL = {http://ceur-ws.org/Vol-3033/paper29.pdf}, VOLUME = {3033}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {8th Italian Conference on Computational Linguistics (CLIC-it 2021)}, CONFERENCE_PLACE = {Milano}, CONFERENCE_DATE = {29/06-01/07/2022}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{MIASCHI_2021_INPROCEEDINGS_MBDV_454441, AUTHOR = {Miaschi, A. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {What Makes My Model Perplexed? A Linguistic Investigation on Neural Language Models Perplexity}, YEAR = {2021}, ABSTRACT = {This paper presents an investigation aimed at studying how the linguistic structure of a sentence affects the perplexity of two of the most popular Neural Language Models (NLMs), BERT and GPT-2. We first compare the sentence-level likelihood computed with BERT and the GPT-2's perplexity showing that the two metrics are correlated. In addition, we exploit linguistic features capturing a wide set of morpho-syntactic and syntactic phenomena showing how they contribute to predict the perplexity of the two NLMs.}, KEYWORDS = {nlp, interpretability, deep learning}, PAGES = {40-47}, URL = {https://www.aclweb.org/anthology/2021.deelio-1.5}, ISBN = {978-1-954085-30-5}, CONFERENCE_NAME = {2nd Workshop on Knowledge Extraction and Integrationfor Deep Learning Architectures}, CONFERENCE_DATE = {10/06/2021}, } @INPROCEEDINGS{MIASCHI_2021_INPROCEEDINGS_MRD_469731, AUTHOR = {Miaschi, A. and Ravelli, A. A. and Dell'Orletta, F.}, TITLE = {Evaluating Transformer Models for Punctuation Restoration in Italian}, YEAR = {2021}, ABSTRACT = {In this paper, we propose an evaluation of a Transformerbased punctuation restoration model for the Italian language. Experimenting with a BERT-base model, we perform several fine-tuning with different training data and sizes and tested them in an in- and crossdomain scenario. Moreover, we offer a comparison in a multilingual setting with the same model fine-tuned on English transcriptions. Finally, we conclude with an error analysis of the main weaknesses of the model related to specific punctuation marks.}, KEYWORDS = {transformer models, nlp, punctuation restoration}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85121647978\&origin=inward}, VOLUME = {3015}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {5th Workshop on Natural Language for Artificial Intelligence (NL4AI 2021)}, CONFERENCE_DATE = {29/11/2021}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{PROIETTI_2021_INPROCEEDINGS_PC_463554, AUTHOR = {Proietti, C. and Chiarella, D.}, TITLE = {Measuring bi-polarization with argument graphs}, YEAR = {2021}, ABSTRACT = {Multi-agent models play a significant role in testing hypotheses about the unfolding of opinion dynamics in complex social networks. The model of the Argument Communication Theory of Bi-polarization (ACTB), developed by Maes and Flache (2013), shows that simple circulation of arguments among individuals in a group can determine strong differentiation of opinions (bi-polarization effects) even with a small degree of homophily. The ACTB model and similar ones have nevertheless one limitation: given a topic of discussion, only direct pro and con arguments for it are considered. This does not allow to account for the topology of a more complex debate, where arguments may also interact indirectly with the topic at stake. This gap can be filled by using Quantitative Bipolar Argument Frameworks (QBAF). More specifically, by applying measures of argument strength for QBAFs in order to calculate the agents' opinion. In the present paper we generalize the ACTB measure of opinion strength to acyclic bipolar graphs and compare it with other measures from the literature. We then present a revised version of the ACTB model, where the agents' knowledge bases are structured as subgraphs of an underlying global knowledge base (described as a QBAF). We first test that the predictions of the ACTB model are confirmed when the underlying QBAF contains only direct pro and con arguments for a topic. We then explore more complex topologies of debate with two additional batches of simulations. Our first results show that changing the topology, while keeping the same number of pro and con arguments, has no significant impact on bi-polarization dynamics.}, KEYWORDS = {bi-polarization, abstract argumentation, opinion dynamics, multi-agent modelling}, PAGES = {13}, URL = {https://publications.cnr.it/doc/463554}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {20th International Conference Italian Association for Artificial Intelligence-5th Workshop on Advances in Argumentation in Artificial Intelligence}, CONFERENCE_PLACE = {Milano}, CONFERENCE_DATE = {29/11/2021}, BOOKTITLE = {Advances in Argumentation in Artificial Intelligence 2021}, } @INPROCEEDINGS{PUCCETTI_2021_INPROCEEDINGS_PMD_454440, AUTHOR = {Puccetti, G. and Miaschi, A. and Dell'Orletta, F.}, TITLE = {How Do BERT Embeddings Organize Linguistic Knowledge?}, YEAR = {2021}, ABSTRACT = {Several studies investigated the linguistic information implicitly encoded in Neural Language Models. Most of these works focused on quantifying the amount and type of information available within their internal representations and across their layers. In line with this scenario, we proposed a different study, based on Lasso regression, aimed at understanding how the information encoded by BERT sentence-level representations is arrange within its hidden units. Using a suite of several probing tasks, we showed the existence of a relationship between the implicit knowledge learned by the model and the number of individual units involved in the encodings of this competence. Moreover, we found that it is possible to identify groups of hidden units more relevant for specific linguistic properties.}, KEYWORDS = {nlp, interpretability, deep learning}, PAGES = {48-57}, URL = {https://www.aclweb.org/anthology/2021.deelio-1.6}, ISBN = {978-1-954085-30-5}, CONFERENCE_NAME = {2nd Workshop on Knowledge Extraction and Integrationfor Deep Learning Architectures}, CONFERENCE_DATE = {10/06/2021}, } @INPROCEEDINGS{RICCUCCI_2021_INPROCEEDINGS_RDVC_472300, AUTHOR = {Riccucci, M. and Del Grosso, A. M. and Valecchi, F. and Causarano, G.}, TITLE = {Testimoniare il Lager: l'informatica al servizio della memoria}, YEAR = {2021}, ABSTRACT = {Il contributo illustra il lavoro condotto in seno al progetto di ricerca Voci dall'inferno: le parole per dirlo, condotto e coordinato da Marina Riccucci (Università di Pisa) con il supporto del Dott. Angelo Mario Del Grosso (Università di Pisa) e della Prof.ssa Frida Valecchi. Nel caso specifico sarà dedicata particolare attenzione alla rappresentazione digitale XML-TEI di una testimonianza manoscritta inedita di una donna sopravvissuta alla deportazione in Lager, nonché allo sviluppo di un componente innovativo per l'elaborazione dei documenti digitali, integrato nell'applicazione web Memoriarchivio - software creato specificamente per Voci dall'inferno e che rappresenta un fondamentale strumento per archiviare e analizzare le risorse testuali oggetto d'analisi.}, KEYWORDS = {italianistica digitale, codifica del testo, testimonianze, lager, lessico dantesco, XML-TEI, Saxon-js}, PAGES = {567-572}, URL = {http://amsacta.unibo.it/6712/1/AIUCD2021_BOA-versione3A.pdf#page=583}, VOLUME = {aiucd2021}, DOI = {10.6092/unibo/amsacta/6712}, ISBN = {9788894253559}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale-DHs for society: e-quality, participation, rights and values in the Digital Age}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {19/01/2021-22/01/2021}, BOOKTITLE = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale-DHs for society: e-quality, participation, rights and values in the Digital Age. Book of extended abstracts of the 10th national conference}, } @INPROCEEDINGS{SARTI_2021_INPROCEEDINGS_SBD_464972, AUTHOR = {Sarti, G. and Brunato, D. and Dell'Orletta, F.}, TITLE = {That Looks Hard: Characterizing Linguistic Complexity in Humans and Language Models}, YEAR = {2021}, ABSTRACT = {This paper investigates the relationship between two complementary perspectives in the human assessment of sentence complexity and how they are modeled in a neural language model (NLM). The first perspective takes into account multiple online behavioral metrics obtained from eye-tracking recordings. The second one concerns the offline perception of complexity measured by explicit human judgments. Using a broad spectrum of linguistic features modeling lexical, morpho-syntactic, and syntactic properties of sentences, we perform a comprehensive analysis of linguistic phenomena associated with the two complexity viewpoints and report similarities and differences. We then show the effectiveness of linguistic features when explicitly leveraged by a regression model for predicting sentence complexity and compare its results with the ones obtained by a fine-tuned neural language model. We finally probe the NLM's linguistic competence before and after fine-tuning, highlighting how linguistic information encoded in representations changes when the model learns to predict complexity.}, KEYWORDS = {linguistic complexity, eyetracking, human evaluation}, PAGES = {48-60}, URL = {https://aclanthology.org/2021.cmcl-1.5}, DOI = {10.18653/v1/2021.cmcl-1.5}, ISBN = {978-1-954085-35-0}, CONFERENCE_NAME = {Proceedings of Workshop on Cognitive Modeling and Computational Linguistics (CMCL 2021)}, CONFERENCE_DATE = {10/06/2021}, } @INPROCEEDINGS{SASSOLINI_2021_INPROCEEDINGS_SBDGM_455303, AUTHOR = {Sassolini, E. and Biffi, M. and De Blasi, F. and Guadagnini, E. and Montemagni, S.}, TITLE = {La digitalizzazione del GDLI: un approccio linguistico per la corretta acquisizione del testo?}, YEAR = {2021}, ABSTRACT = {In questo articolo sono discussi metodi e strategie in via di elaborazione per la correzione (propedeutica alla successiva strutturazione) dei contenuti del Grande dizionario della lingua italiana (GDLI) fondato da Salvatore Battaglia, estratti da un formato digitale non standard. La presenza, in questo formato, di errori distribuiti di vario tipo ha condizionato la scelta dell'approccio all'estrazione e messo in luce tutte le difficoltà dell'operazione. Le sperimentazioni fatte sino a oggi portano a privilegiare una strategia di correzione multilivello, che procede scomponendo in sezioni distinte l'individuazione e la correzione degli errori, in modo da rendere gestibili interventi complessi di correzione semi-automatica, altrimenti improponibili, e consentire un loro raffinamento progressivo. Parallelamente alla definizione di regole di riconoscimento di struttura e formato, stiamo analizzando metodi e procedure in grado di migliorare la qualità dell'input e specializzare i moduli di estrazione per i singoli campi della voce a partire dal "lemma". Le finalità del lavoro sono duplici: l'estrazione e strutturazione dei contenuti e la produzione di un formato standard di rappresentazione dei dati. Si tratta di un percorso difficile perché il formato dei dati rende l'uso di strumenti reperibili in letteratura non applicabile. Solamente al termine del lavoro potremo capire se esistono le condizioni per trasformare l'approccio adottato in un protocollo di intervento replicabile.}, KEYWORDS = {dizionari digitali, risorse linguistiche, estrazione dell'informazione, correzione del testo post OCR}, PAGES = {159-166}, URL = {https://aiucd2021.labcd.unipi.it/wp-content/uploads/2021/05/AIUCD2021_BOA-versione3A.pdf}, DOI = {10.6092/unibo/amsacta/6712}, ISBN = {9788894253559}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {19-22/01/2021}, } @INPROCEEDINGS{TAXITARI_2021_INPROCEEDINGS_TCFMNP_441870, AUTHOR = {Taxitari, L. and Cappa, C. and Ferro, M. and Marzi, C. and Nadalini, A. and Pirrelli, V.}, TITLE = {Using mobile technology for reading assessment}, YEAR = {2021}, ABSTRACT = {The enormous potential of Information and Communication Technologies (ICT) for addressing critical educational issues is generally acknowledged, but its use in the assessment of the complex skills of reading and understanding a text has been very limited to date. The paper contrasts traditional reading assessment protocols with ReadLet, an ICT platform with a tablet front-end, designed to support online monitoring of silent and oral reading abilities in early graders. ReadLet makes use of cloud computing and mobile technology for large-scale data collection and allows the time alignment of the child's reading behaviour with texts tagged using Natural Language Processing (NLP) tools. Initial findings replicate established benchmarks from the psycholinguistic literature on reading in both typically and atypically developing children, making the application a new ground-breaking approach in the evaluation of reading skills. Index Terms--reading assessment, reading research, mobile technology, NLP, cloud computing, special education needs.}, KEYWORDS = {reading assessment, reading research, mobile technology, NLP, cloud computing, special education needs}, PAGES = {1-6}, URL = {http://www.ieee.ma/cist20/component/content/?id=26\&Itemid=185}, ISBN = {9781728166469}, CONFERENCE_NAME = {6th IEEE Congress on Information Science \& Technology (IEEE CIST'20)}, CONFERENCE_PLACE = {online}, CONFERENCE_DATE = {05/06/2021}, } @INPROCEEDINGS{VAGIONAKIS_2021_INPROCEEDINGS_VDBBDMM_461540, AUTHOR = {Vagionakis, I. and Del Gratta, R. and Boschetti, F. and Baroni, P. and Del Grosso, A. M. and Mancinelli, T. and Monachini, M.}, TITLE = {'Cretan Institutional Inscriptions' Meets CLARIN-IT}, YEAR = {2021}, ABSTRACT = {This paper describes a project in the domain of Digital Epigraphy named 'Cretan Institutional Inscriptions' and developed at the Ca' Foscari University of Venice. The project is supported by CLARIN-IT as part of the actions addressed to initiatives, projects and events in the field of Humanities and Social Sciences. The main goal is to make the project visible through CLARIN channels with the hope that it will be a forerunner for other digital epigraphy projects in CLARIN. The article illustrates also the dockerization process applied to the 'Cretan Institutional Inscriptions' project, currently hosted on the CLARIN-IT servers.}, KEYWORDS = {Digital Epigraphy, Digital Classics, Ancient Greek, Crete, Institutions, Text Encoding Initiative, TEI, EpiDoc, EpiDoc Front-End Services, EFES, Virtual Language Observatory, Dockerization, ILC4CLARIN, CLARIN-IT, CLARIN}, PAGES = {48-53}, URL = {https://office.clarin.eu/v/CE-2021-1923-CLARIN2021_ConferenceProceedings.pdf}, CONFERENCE_NAME = {CLARIN Annual Conference 2021}, CONFERENCE_PLACE = {Virtual Edition}, CONFERENCE_DATE = {27-29/09/2021}, BOOKTITLE = {Proceedings of CLARIN Annual Conference 2021 (Virtual Edition)}, EDITOR = {Monachini, M. and Eskevich, M.}, } @INPROCEEDINGS{AIOLA_2021_INPROCEEDINGS_AADL_461479, AUTHOR = {Aiola, C. and Andreini, G. and Di Donato, F. and Lombardo, T.}, TITLE = {Sharing Knowledge Digitally. The Muruca case study}, YEAR = {2021}, ABSTRACT = {In this poster we present the evolution of Muruca, a platform that has been conceived as a framework to allow Digital Humanities researchers and research teams to create, curate and share their own Digital Editions. The poster describes the main features of the framework, its evolution over the years in synergy with the researchers' needs and with the progress of technologies and policies, and the open challenges and opportunities of the framework in a European context.}, KEYWORDS = {open culture, digital humanities, digital libraries, digital editions, electronic objects}, PAGES = {428-431}, URL = {https://aiucd2021.labcd.unipi.it/wp-content/uploads/2021/01/27_Aiola-AIUCD2021_presentation_46.pdf}, DOI = {10.6092/unibo/amsacta/6712}, ISBN = {9788894253559}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: eguaglianza, partecipazione, diritti e valori nell'era digitale}, CONFERENCE_PLACE = {on-line}, CONFERENCE_DATE = {19-22/01/2021}, } @INPROCEEDINGS{DELGROSSO_2021_INPROCEEDINGS_D_484396, AUTHOR = {Del Grosso, A. M.}, TITLE = {How to leverage Domain-Driven Design to foster Digital Scholarly Editing and DSL}, YEAR = {2021}, ABSTRACT = {The field of digital scholarly editing is deemed to be particularly challenging from a software engineering point of view. Indeed, after decades of research activities, computational philology still lacks effective tools and efficient procedures organized in common interfaces, decoupled application services and domain-specific commodities. As a matter of fact, there is neither convergence on how to model software applications to meet philological requirements nor traditional philologists fully satisfied with the current digital solutions. By adopting the Domain Driven Design approach, we start our modeling activities by defining the problem space of our domain which, in turn, is broken into small components (called sub-domains) to progressively refine the ongoing digital models. In this way, together with the experts, we are able to identify the main capabilities of the field we want to model and strive to design coherent domain-specific solutions for such capabilities (bounded contexts).}, KEYWORDS = {Digital Humanities, Domain-Driven Design, Digital Philology, Euporia, Domain-Specific Languages}, URL = {https://easychair.org/cfp/EUPORIA2021}, CONFERENCE_NAME = {EUPORIA2021: Webinars in Theories and Practices of the Annotation through Domain-Specific Languages}, CONFERENCE_PLACE = {online}, CONFERENCE_DATE = {15/03/2021}, } @INPROCEEDINGS{DELGROSSO_2021_INPROCEEDINGS_D_484397, AUTHOR = {Del Grosso, A. M.}, TITLE = {La codifica testuale delle testimonianze inedite, manoscritte, audio e video}, YEAR = {2021}, ABSTRACT = {Il contributo intende illustrare gli aspetti più significativi della rappresentazione digitale delle testimonianze sia scritte sia orali, inquadrando la proposta in una prospettiva infrastrutturale e di sostenibilità delle risorse digitali prodotte.}, KEYWORDS = {Digital Humanities, Informatica Umanistica, Archivi Digitali, Voci dall'Inferno, XML/TEI}, URL = {https://www.unipi.it/index.php/archivio/event/6081-voci-dall-inferno-le-testimonianze-dei-sopravvissuti-ai-lager}, CONFERENCE_NAME = {Voci dall'Inferno. Le testimonianze dei sopravvissuti ai Lager}, CONFERENCE_PLACE = {Pisa, Palazzo "La Sapienza"}, CONFERENCE_DATE = {10/12/2021}, } @INPROCEEDINGS{DELGROSSO_2021_INPROCEEDINGS_DT_484390, AUTHOR = {Del Grosso, A. M. and Tessarolo, L.}, TITLE = {Lavori in corso: trasferimento di competenze per una re-ingegnerizzazione del sistema la "galassia MQDQ"}, YEAR = {2021}, ABSTRACT = {L'intervento illustra le attività in corso presso il centro VeDPH dell'Università Ca' Foscari di Venezia informate dal progetto di visiting scholar tra il dipartimento di Scienze Umane e l'Istituto di Linguistica Computazionale CNR-ILC volto alla reingegnerizzazione della tecnologia su cui poggiano i progetti della galassia Musisque Deoque (MQDQ).}, KEYWORDS = {Digital Humanties, Archivi Digitali Latini, MQDQ, XML/TEI, Filologia Collaborativa}, URL = {https://publications.cnr.it/doc/484390}, CONFERENCE_NAME = {Biblioteche digitali di testi latini in Italia. Per un progetto di archivio comune}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {28 giugno 2021}, } @INPROCEEDINGS{ESKEVICH_2021_INPROCEEDINGS_EF_455136, AUTHOR = {Eskevich, M. and Frontini, F.}, TITLE = {SSHOC'ing drama in the cloud}, YEAR = {2021}, ABSTRACT = {At LIBER 2021 Online Conference, CLARIN and SSHOC presented a webinar showcasing how SSH researchers can benefit from the resources and services offered by SSH research infrastructures in order to produce and exploit highly encoded historical textual data. After the webinar, the participants were able to successfully guide and advise SSH researchers (with a particular focus on literature studies) in their choice amongst existing resources and tools, based on their research question.}, KEYWORDS = {CLARIN, infrastrutture, scienze umane e sociali}, URL = {https://zenodo.org/record/5082522#.YOgETBMzb0s}, CONFERENCE_NAME = {LIBER annual conference}, CONFERENCE_PLACE = {virtual event}, CONFERENCE_DATE = {08/07/2021}, } @INPROCEEDINGS{FRONTINI_2021_INPROCEEDINGS_FK_443609, AUTHOR = {Frontini, F. and Khan, A. F.}, TITLE = {Di cosa parliamo quando parliamo di FAIR?}, YEAR = {2021}, ABSTRACT = {Nel 2016 un consorzio di scienziati afferenti a diverse istituzioni e discipline enuncia i principi FAIR; in questi quattro anni l'importanza e la portata del programma FAIR è divenuta sempre più evidente. L'adesione a tali principi nelle discipline umanistiche sembra farsi largo, ma non senza difficoltà e interrogativi. Questo lavoro propone una riflessione sulle implicazioni della proposta FAIR per la gestione dei dati scientifici, confrontandola con la sua effettiva ricezione nella comunità delle DH in Italia e in Europa.}, KEYWORDS = {Principi FAIR, Open Data, dati della ricerca, politiche della ricerca, EOSC}, PAGES = {19-24}, URL = {https://aiucd2021.labcd.unipi.it/en/book-of-abstracts-conference/}, ISBN = {9788894253559}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale}, CONFERENCE_DATE = {19-22/01/2021}, BOOKTITLE = {AIUCD 2021-DH per la società: e-guaglianza, par-tecipazione, diritti e valori nell'era digitale. Raccolta degli abstract estesi della 10a conferenza nazionale, Pisa, 2021}, EDITOR = {Del Grosso, A. M. and Boschetti, F. and Salvatori, E.}, } @INPROCEEDINGS{MARINETTI_2021_INPROCEEDINGS_MMQBBDPRS_461529, AUTHOR = {Marinetti, A. and Murano, F. and Quochi, V. and Ballerini, M. and Boschetti, F. and Del Grosso, A. M. and Piccini, S. and Rigobianco, L. and Solinas, P.}, TITLE = {Languages and Cultures of Ancient Italy. Historical Linguistics and Digital Models}, YEAR = {2021}, ABSTRACT = {The abstract accompanies a poster presenting an overview of the project "Languages and cultures of Ancient Italy", which had just started. The project brings together competences from Historical Linguistics, Computational Lexicography and Digital Humanities. The main objective of the project is to investigate the cultures of ancient Italy on the basis of theirlinguistic documentation (7th - 1stc. B.C.) by means of digital tools specifically tailored for their peculiarities.}, KEYWORDS = {digital epigraphy, computational lexicons, text-lexicon linking, restsprachen, digital models, digital humanities}, PAGES = {528-532}, URL = {https://aiucd2021.labcd.unipi.it/en/book-of-abstracts-conference/}, CONFERENCE_NAME = {10th National Conference of Associazione per l'Informatica Umanistica e la Cultura Digitale}, CONFERENCE_PLACE = {Pisa (Virtuale)}, CONFERENCE_DATE = {19-22 gennaio 2021}, } @INPROCEEDINGS{MARZI_2021_INPROCEEDINGS_MTFNP_445743, AUTHOR = {Marzi, C. and Taxitari, L. and Ferro, M. and Nadalini, A. and Pirrelli, V.}, TITLE = {Valutare la lettura "in tempo reale": un esempio di integrazione tra linguistica computazionale e linguistica applicata}, YEAR = {2021}, ABSTRACT = {In anni recenti, linguistica computazionale e linguistica applicata hanno ampliato i loro rispettivi ambiti d'indagine, utilizzando l'ontologia formale della linguistica teorica e i modelli cognitivi della psicolinguistica per studiare le difficoltà che i parlanti incontrano nello svolgimento di "compiti" linguistici specifici. Nell'ambito della lettura, le tecnologie per il Trattamento Automatico del Linguaggio (TAL) si sono dimostrate capaci di classificare il livello di leggibilità di un testo, basandosi sulla distribuzione di alcuni parametri linguistici in testi pre-classificati per età dei lettori destinatari, o per grado di scolarità, o per livello di sviluppo cognitivo. Ad esempio, parole o frasi più lunghe, o parole più rare tendono a distribuirsi in testi di più difficile comprensione, o destinati a lettori più maturi. E' possibile così assegnare a un testo, o a ogni singola frase, un punteggio di leggibilità in funzione (inversa) della complessità lessicale, morfologica, sintattica o pragmatica dell'unità testuale analizzata. In Linguistica Applicata (LA) la valutazione della difficoltà di lettura ha seguito un approccio funzionale. Nel modello semplice di lettura, ad esempio, la capacità di leggere un testo è analizzata come il prodotto dell'interazione tra decodifica e comprensione. Attraverso l'osservazione di un campione di bambini impegnati nella lettura, è possibile valutare la loro fluenza in decodifica, gli errori di decodifica e comprensione, e l'efficacia di percorsi educativi personalizzati. La piattaforma ReadLet è stata sviluppata con l'obiettivo di integrare l'approccio classificatorio del TAL con quello funzionale della LA. Il bambino legge un breve testo visualizzato sullo schermo di un tablet, ad alta voce o in modalità silente. In entrambi i casi, al bambino viene chiesto di "tenere il segno" con il dito sullo schermo nel corso della lettura. La traccia tattile è registrata e allineata con il testo visualizzato sullo schermo mediante un algoritmo di convoluzione. Al contempo, il testo è annotato automaticamente per tratti linguistici. Alla fine della sessione di lettura silente, il bambino risponde ad alcune semplici domande sul contenuto del testo. I dati raccolti consentono di valutare le difficoltà (rallentamenti o errori) che il bambino incontra nella lettura, e di mettere in relazione "in tempo reale" queste difficoltà con aspetti linguistici specifici del testo. Un'analisi preliminare dei dati raccolti da ReadLet su oltre 400 allievi di alcune scuole elementari toscane e della Svizzera italiana, ha evidenziato il differente "passo" di lettura tra lettori con sviluppo tipico e atipico, e il peso che variabili come lunghezza, frequenza e lessicalità hanno su profili di lettura individuali e aggregati. La possibilità di "controllare" automaticamente la distribuzione di queste variabili nel testo e di correlarle con le difficoltà del singolo bambino consente, infine, di somministrare testi con livelli di difficoltà gradualmente crescenti, rendendo possibili percorsi personalizzati di potenziamento.}, KEYWORDS = {reading assessment, reading strategies, NLP, ICT mobile technologies}, PAGES = {5-5}, URL = {https://publications.cnr.it/doc/445743}, VOLUME = {2021}, CONFERENCE_NAME = {XXI Congresso Internazionale di AItLA}, CONFERENCE_PLACE = {Bergamo (I)}, CONFERENCE_DATE = {11-12/02/2021}, BOOKTITLE = {FARE LINGUISTICA APPLICATA CON LE DIGITAL HUMANITIES}, } @INPROCEEDINGS{ZENZARO_2021_INPROCEEDINGS_ZBD_472303, AUTHOR = {Zenzaro, S. and Boschetti, F. and Del Grosso, A. M.}, TITLE = {Domain Specific Languages on editing papyri: the GreekSchools case study}, YEAR = {2021}, ABSTRACT = {Within the ERC AdG 885222-GreekSchools we aim to manage the edit of multiple papyrologic texts: diplomatic and literary editions and the corresponding apparatuses and their translations. To endow scholars with automatic consistency and coherence of editorial choices and to support the whole editing process, we leverage Domain Specific Languages (DSLs): a formal language definition in a bounded domain. Digital text editing can be handled in multiple ways depending on the editorial purpose. We identify four possible editing approaches to digital textual scholarship: (1) word processor; (2) structured text (e.g. XML); (3) GUI-centric; (4) domain specific language (DSL). Each of them has pros and cons. In particular we analyse five dimensions: familiarity, compactness, completeness, data elaboration support, and the need of technical training. With familiarity we refer to how much the scholar can avoid shifting his established working paradigm/environment. Compactness is the ratio between quantity of information and formalisation size. Completeness refers to the information the content represents. The capability to extract or deduce information from the data is addressed by data elaboration support. Finally, we consider important to evaluate the amount of technical training for text editing. For example, structured texts grant completeness of information, while requiring extensive technical training. In this context only the DSL approach encompasses all these dimensions while the other approaches compromises on some of them. We propose a DSL-based editor that will support and improve the editing workflow in the context of the ERC project.}, KEYWORDS = {Digital Humanities, Digital Papyrology, GreekSchools, Computational philology}, URL = {https://www.clarin.eu/content/clarin-bazaar-2021#data-curation-using-nlp}, CONFERENCE_NAME = {CLARIN Annual Conference 2021}, CONFERENCE_DATE = {27-29/09/2021}, } @TECHREPORT{ALBANESI_2021_TECHREPORT_ABCPGS_463868, AUTHOR = {Albanesi, D. and Bellandi, A. and Colombo, M. and Papini, M. and Giovannetti, E. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 19}, YEAR = {2021}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo novembre 2020 - aprile 2021. Le principali attività tecniche svolte sul sistema Traduco sono state la risoluzione di bug, l'implementazione di nuove funzionalità richieste e lo sviluppo di una nuova interfaccia grafica utente. Le attività di ricerca sono state condotte, in continuità a quelle descritte nel rapporto precedente, nella rappresentazione del lessico, della terminologia e della conoscenza del Talmud.}, KEYWORDS = {Linguistica Computazionale, Traduzione di Testi Religiosi, Traduzione Assistita dal Calcolatore, Traduzione Collaborativa, Lessici elettronici, rappresentazione della conoscenza}, PAGES = {28}, URL = {https://publications.cnr.it/doc/463868}, } @TECHREPORT{ALBANESI_2021_TECHREPORT_ABCPGS_463870, AUTHOR = {Albanesi, D. and Bellandi, A. and Colombo, M. and Papini, M. and Giovannetti, E. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 20}, YEAR = {2021}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo maggio 2021 - ottobre 2021. Le principali attività tecniche svolte sul sistema Traduco sono state la risoluzione di bug e l'implementazione di nuove funzionalità richieste. Parallelamente, sono proseguite le attività volte alla visualizzazione di risorse testuali e linguistiche tramite grafi e per la conversione e l'utilizzo del lessico computazionale PSC a supporto di ricerca "full-text" sul testo talmudico italiano.}, KEYWORDS = {Traduzione Assistita dal Calcolatore, Traduzione Collaborativa, Lessici elettronici, rappresentazione della conoscenza, Linguistica Computazionale, traduzione di testi religiosi}, PAGES = {20}, URL = {https://publications.cnr.it/doc/463870}, } @TECHREPORT{ALBANESI_2021_TECHREPORT_AGPS_463871, AUTHOR = {Albanesi, D. and Giovannetti, E. and Papini, M. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-rapporto integrativo 3}, YEAR = {2021}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto delle attività di progetto previste dalla convenzione integrativa stipulata tra PTTB e ILC-CNR in data 10/07/2018 e condotte nel periodo febbraio 2020 - gennaio 2021.}, KEYWORDS = {Linguistica Computazionale, Traduzione di Testi Religiosi, Traduzione Assistita dal Calcolatore, Traduzione Collaborativa, Sviluppo front-end, lessici computazionali}, PAGES = {17}, URL = {https://publications.cnr.it/doc/463871}, } @TECHREPORT{DIDONATO_2021_TECHREPORT_DDLKOTCCEMDT_459083, AUTHOR = {Di Donato, F. and Dumouchel, S. and Lombardo, T. and Katsaloulis, I. and Ocansey, J. T. and Thiel, C. and Capelli, L. and Chen, Y. and Eskevich, M. and Moranville, Y. and De Santis, L. and Tóth Czifra, E.}, TITLE = {TRIPLE Deliverable: D6. 2 Report on Procedure to Follow to Be Part of the EOSC Catalogue}, YEAR = {2021}, ABSTRACT = {The 6.2 Deliverable presents the procedure to onboard the future GoTriple platform into the EOSC catalogue. This deliverable is supposed to guide the TRIPLE consortium in the purpose of adding a SSH discovery platform to the EOSC catalogue but it can also guide other service providers for their own purposes, especially services that are made with different components. Part 1 of this deliverable provides an overview of the GoTriple platform and the five innovative services that are integrated into it, e.g. ScaR, MEOH App, Visualisation components, Pundit, and Head Start. As those innovative services are independent tools, the perspectives, challenges and potential solutions of their onboarding into the EOSC catalogue are discussed in detail on a case-by-case basis. Part 2 contains the core information of this deliverable. First, an overview of the three main steps in the onboarding process (2.1.) provides the overall context of the task. Second, the timeline of all needed steps (already taken and planned closer to the end of the project) to define the final federation that will serve as GoTriple provider is outlined (2.2). Essentially, all project partners that are committed to continue their support of the developed service after the project will be identified as GoTriple providers in the EOSC portal, while the GoTriple discovery service will be part of the OPERAS Research Infrastructure's catalogue. Third, the details of the resource profile that are mandatory and optional, are listed according to the set of fields in the EOSC portal. Part 3 summarises the two main steps in the roadmap for integration which are planned to take place in September 2021 and March 2023. In conclusion, the authors highlight the fact that the EOSC development is an ongoing process, and therefore, the current report reflects the procedures and planning steps that are valid at this point of time and fit the current requirements.}, KEYWORDS = {EOSC, GoTRIPLE}, URL = {https://zenodo.org/record/5702705#.YZYgApDMKHs}, } @TECHREPORT{FRONTINI_2021_TECHREPORT_FGM_463461, AUTHOR = {Frontini, F. and Gamba, F. and Monachini, M.}, TITLE = {D3. 9 Report on Ontology and Vocabulary Collection and Publication}, YEAR = {2021}, ABSTRACT = {This deliverable pertains to SSHOC Task 3.1 which was responsible for investigating and providing resources and tools to support the multilingual aspects of the future pan-EU SSH infrastructure. Making data and services accessible and usable in SSH is very much also a matter of providing relevant translations, translation of metadata concepts, multilingual vocabularies, terminology extraction across languages, multilingual databases. The deliverable offers a detailed report on the gathering and translation of relevant SSH metadata, ontologies and vocabularies for the use-cases indicated in the task's topics: multilingual metadata concepts and vocabularies, the multilingual occupation ontology, with cross-country female occupational titles. In accordance with SSHOC and the EOSC FAIR recommendations and requirements, the metadata vocabularies and ontologies have been published via several different formats and facilities. Section 1. The introduction sets the landscape and describes the need of multilingual vocabularies both for classification and discovery in the context of a cloud-based infrastructure that will offer access to research data and related services adapted to the needs of the SSH community. Section 2. "Multilingual metadata" investigates the possibility to use and test Natural Language Processing (NLP) approaches and Machine Translation (MT) to make the metadata more accessible using national languages other than English. A selected case study was the recommended metadata set of the CLARIN Concept Registry (CCR): the whole set of metadata and definitions were translated into French, Greek, and Italian. The section describes the machine-translation and evaluation process, also comparing different technologies. Section 3. "Multilingual vocabularies and ontologies" introduces two other typical case-studies. The first one addresses one of the pressing needs in social sciences research. Many surveys, indeed, ask respondents to specify their occupation and the occupational ontology is used for the survey questions. For many languages the occupational titles for males and females are not identical. In section 3.1 the enrichment of the occupational ontology with lists for male and female titles, is described for many languages, namely for Dutch, German, Slovenian and French. The second case study focuses on the automatic extraction of terminology from texts: a list of domain- specific terms was automatically extracted from a corpus of Data Curation and Stewardship, validated by domain experts, automatically translated into multiple languages (Dutch, French, German, Greek, Italian, Slovenian) and linked to other existing terminologies. Section 4. describes the SKOS-ification and publication process of the results, together with the challenges posed by multilinguality. Section 5. offers an overview of the exploitation and sustainability of the results and how these are made available to the community. Finally the Conclusions provide some reflections on Machine Translation approaches adopted for translating the vocabularies into multiple languages, the advantages in terms of time saving and some first recommendations to the community.}, KEYWORDS = {Terminologies, Infrastructures, Social Sciences and Humanities, Data Curation, Data Stewardship, vocabularies, Translations, Metadata}, URL = {https://doi.org/10.5281/zenodo.5913485}, } @TECHREPORT{LAZZERI_2021_TECHREPORT_LTPAABBCCCCCCCDDDFFFGGGKLLMNOOPPPPRRRSSSSTTVVZ_466613, AUTHOR = {Lazzeri, E. and Tanlongo, F. and Pavone, G. and Alpi, F. and Ansuini, A. and Bertazzon, E. and Bonaccorsi, D. and Cappelluti, F. and Casati, S. and Castelli, D. and Cippitani, R. and Colcelli, V. and Costantini, A. and Cozzini, S. and Degl'Innocenti, E. and Di Donato, F. and Di Giorgio, S. and Fava, I. and Fiore, S. and Forni, M. and Galimberti, G. and Giglia, E. and Giorgetti, A. and Kurapati, S. and Landoni, M. and Lavitrano, M. and Marras, C. and Niccolucci, F. and Occioni, M. and Osmenaj, E. and Paolini, G. and Pasquale, V. and Petrillo, C. and Pugliese, R. and Ripepi, E. and Rivoira, G. and Rossi, G. and Salon, S. and Sarretta, A. and Sartori, A. and Spiga, D. and Tamagno, D. and Tammaro, A. M. and Vellico, M. and Vignocchi, M. and Zane, D.}, TITLE = {Competence Centre ICDI per Open Science, FAIR, ed EOSC-Mission, strategia e piano d'azione}, YEAR = {2021}, ABSTRACT = {This document presents the mission and strategy of the Italian Competence Centre on Open Science, FAIR, and EOSC. The Competence Centre is an initiative born within the Italian Computing and Data Infrastructure (ICDI), a forum created by representatives of major Italian Research Infrastructures and e-Infrastructures, with the aim of promoting sinergies at the national level, and optimising the Italian participation to European and global challenges in this field, including the European Open Science Cloud (EOSC), the European Data Infrastructure (EDI) and HPC. This working paper depicts the mission and objectives of the ICDI Competence Centre, a network of experts with various skills and competences that are supporting the national stakeholders on topics related to Open Science, FAIR principles application and participation to the EOSC. The different actors and roles are described in the document as well as the activities and services offered, and the added value each stakeholder can find the in Competence Centre. The tools and services provided, in particular the concept for the portal, though which the Centre will connect to the national landscape and users, are also presented. An english translation of this document is provided in Zenodo: Lazzeri, Emma, et all. (2021). ICDI Competence Centre for Open Science, FAIR and EOSC - Mission, Strategy and Action Plan. Zenodo. https://doi.org/10.5281/zenodo.5512638}, KEYWORDS = {EOSC, Competence Centre, ICDI, Open Science, FAIR, EDI, HPC}, PAGES = {13}, URL = {https://doi.org/10.32079/ISTI-TR-2022/022}, } @TECHREPORT{LAZZERI_2021_TECHREPORT_LTPAABBCCCCCCCDDDFFFGGGKLLMNOOPPPPRRRSSSSTTVVZ_466609, AUTHOR = {Lazzeri, E. and Tanlongo, F. and Pavone, G. and Alpi, F. and Ansuini, A. and Bertazzon, E. and Bonaccorsi, D. and Cappelluti, F. and Casati, S. and Castelli, D. and Cippitani, R. and Colcelli, V. and Costantini, A. and Cozzini, S. and Degl'Innocenti, E. and Di Donato, F. and Di Giorgio, S. and Fava, I. and Fiore, S. and Forni, M. and Galimberti, G. and Giglia, E. and Giorgetti, A. and Kurapati, S. and Landoni, M. and Lavitrano, M. and Marras, C. and Niccolucci, F. and Occioni, M. and Osmenaj, E. and Paolini, G. and Pasquale, V. and Petrillo, C. and Pugliese, R. and Ripepi, E. and Rivoira, G. and Rossi, G. and Salon, S. and Sarretta, A. and Sartori, A. and Spiga, D. and Tamagno, D. and Tammaro, A. M. and Vellico, M. and Vignocchi, M. and Zane, D.}, TITLE = {ICDI Competence Centre for Open Science, FAIR and EOSC-Mission, strategy and action plan}, YEAR = {2021}, ABSTRACT = {This document presents the mission and strategy of the Italian Competence Centre on Open Science, FAIR, and EOSC. The Competence Centre is an initiative born within the Italian Computing and Data Infrastructure (ICDI), a forum created by representatives of major Italian Research Infrastructures and e-Infrastructures, with the aim of promoting synergies at the national level, and optimising the Italian participation to European and global challenges in this field, including the European Open Science Cloud (EOSC), the European Data Infrastructure (EDI) and HPC. This working paper depicts the mission and objectives of the ICDI Competence Centre, a network of experts with various skills and competencies that are supporting the national stakeholders on topics related to Open Science, FAIR principles application and participation to the EOSC. The different actors and roles are described in the document as well as the activities and services offered, and the added value each stakeholder can find the in Competence Centre. The tools and services provided, in particular the concept for the portal, through which the Centre will connect to the national landscape and users, are also presented. This record is the English translation of the original Italian (2021). Competence Centre ICDI per Open Science, FAIR, ed EOSC - Mission, Strategia e piano d'azione. Zenodo. https://doi.org/10.5281/zenodo.5071055}, KEYWORDS = {EOSC, Competence Centre, ICDI, Open Science, FAIR, EDI, HPC}, PAGES = {7}, URL = {https://doi.org/10.32079/ISTI-TR-2021/023}, } @TECHREPORT{SAYERS_2021_TECHREPORT_SSHAAABBBCECDDDDDFFGGGGGGHLLJJKKMMMMMNRPSASSSTYBCCLKRP_472131, AUTHOR = {Sayers, D. and Sousa Silva, R. and Höhn, S. and Ahmedi, L. and Allkivi Metsoja, K. and Anastasiou, D. and Beňuš, Š. and Bowker, L. and Bytyçi, E. and Catala, A. and Çepani, A. and Chacón Beltrán, R. and Dadi, S. and Dalipi, F. and Despotovic, V. and Doczekalska, A. and Drude, S. and Fort, K. and Fuchs, R. and Galinski, C. and Galinski, C. and Galinski, C. and Gobbo, F. and Gungor, T. and Guo, S. and Höckner, K. and Láncos, P. and Libal, T. and Jantunen, T. and Jones, D. and Klimova, B. and Korkmaz, E. and Maučec, M. S. and Melo, M. and Meunier, F. and Migge, B. and Mititelu, V. B. and Névéol, A. and Rossi, A. and Pareja Lora, A. and Sanchez Stockhammer, C. and Şahin, A. and Soltan, A. and Soria, C. and Shaikh, S. and Turchi, M. and Yildirim Yayilgan, S. and Bessa, M. and Cabral, L. and Coler, M. and Liebeskind, C. and Kernerman, I. and Rousi, R. and Prys, C.}, TITLE = {The Dawn of the Human-Machine Era: A forecast of new and emerging language technologies}, YEAR = {2021}, ABSTRACT = {New language technologies are coming, thanks to the huge and competing private investment fuelling rapid progress; we can either understand and foresee their effects, or be taken by surprise and spend our time trying to catch up. This report scketches out some transformative new technologies that are likely to fundamentally change our use of language. Some of these may feel unrealistically futuristic or far-fetched, but a central purpose of this report - and the wider LITHME network - is to illustrate that these are mostly just the logical development and maturation of technologies currently in prototype. But will everyone benefit from all these shiny new gadgets? Throughout this report we emphasise a range of groups who will be disadvantaged and issues of inequality. Important issues of security and privacy will accompany new language technologies. A further caution is to re-emphasise the current limitations of AI. Looking ahead, we see many intriguing opportunities and new capabilities, but a range of other uncertainties and inequalities. New devices will enable new ways to talk, to translate, to remember, and to learn. But advances in technology will reproduce existing inequalities among those who cannot afford these devices, among the world's smaller languages, and especially for sign language. Debates over privacy and security will flare and crackle with every new immersive gadget. We will move together into this curious new world with a mix of excitement and apprehension - reacting, debating, sharing and disagreeing as we always do. Plug in, as the human-machine era dawns.}, KEYWORDS = {language technologies, human-machine communication}, URL = {https://doi.org/10.17011/jyx/reports/20210518/1}, } @MISC{ALRAHABI_2021_MISC_ABFPJBKG_453820, AUTHOR = {Alrahabi, M. and Brando, C. and Frontini, F. and Provenier, A. and Jalabert, R. and Bordry, M. and Koskas, C. and Gawley, J.}, TITLE = {Guide d'annotation manuelle d'entités nommées dans des corpus littéraires}, YEAR = {2021}, ABSTRACT = {Guide d'annotation manuelle d'entités nommées dans des corpus littéraires Campagne d'annotation OBVIL 2019-2021}, KEYWORDS = {NER}, URL = {https://hal.archives-ouvertes.fr/hal-03156278}, } @MISC{BARONI_2021_MISC_B_483770, AUTHOR = {Baroni, P.}, TITLE = {DiPText-KC Web Site}, YEAR = {2021}, ABSTRACT = {Sito Web del CLARIN Knowledge Centre for Digital and Public Textual Scholarship, realizzato con WordPress, sviluppato in inglese}, KEYWORDS = {CLARIN, Knowledge Centre, Digital and Public Textual Scholarship}, URL = {https://diptext-kc.clarin-it.it}, } @MISC{BOSCHETTI_2021_MISC_BD_484394, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {Problemi tecnici e questioni teoriche nella gestione degli archivi digitali di testi}, YEAR = {2021}, ABSTRACT = {Il seminario illustra alcune potenzialità offerte dalla rappresentazione digitale di testi d'interesse umanistico. Si prenderà spunto dall'archivio di testi latini conservato e fruibile dalla piattaforma Musisque Deoque per poi passare ad alcuni esempi di indagine condotti mediante il linguaggio di interrogazione XQuery.}, KEYWORDS = {Digital Humanities, Filologia Digitale, Filologia Computazionale, MQDQ, Archivi Digitali di Testi, XML/TEI}, URL = {https://publications.cnr.it/doc/484394}, } @MISC{DELGROSSO_2021_MISC_D_484395, AUTHOR = {Del Grosso, A. M.}, TITLE = {Preparing the XML-TEI text for Euporia}, YEAR = {2021}, ABSTRACT = {Seminario introduttivo alla codifica XML/TEI per la rappresentazione digitale di testi d'interesse storico, letterario e umanistico.}, KEYWORDS = {Digital Humanities, Euporia, Filologia Digitale, XML/TEI, CoPhiLab}, URL = {https://cophilab.ilc.cnr.it/euporia-2021/}, } @MISC{DIDONATO_2021_MISC_DL_465157, AUTHOR = {Di Donato, F. and Lazzeri, E.}, TITLE = {Data Management}, YEAR = {2021}, ABSTRACT = {The Data Management course has been organised by AREA Science Park and tailored to their researchers and support research staff. The course is focused on FAIR data management and introduces to all aspects of Open Science and provides the skills, tools and standards required to embed Open Science in the research workflow. It has been structured in 4 on-line training modules, each one built on 20 mins sub-modules of frontal lessons and on several interactions. The structure of the 4 modules is the following: Module 1: An Introduction to Open Science and Open Data (2hrs) Module 2: Open Science in Horizon Europe, the FAIR principles and Open data (4 hrs) Module 3: Data Management Plans (3hrs) Module 4: Data Management Plans and Metadata schemas (3hrs) Materials of each module start with MX.0. A readme file is associated with each module.}, KEYWORDS = {open science, research data management}, URL = {https://doi.org/10.5281/zenodo.5575096}, } @MISC{DIDONATO_2021_MISC_DL_465158, AUTHOR = {Di Donato, F. and Lazzeri, E.}, TITLE = {Horizon Europe: L'Open Science e la nuova normalita}, YEAR = {2021}, ABSTRACT = {Nel programma quadro Horizon Europe, l'adozione dell'Open Science e delle sue pratiche è un requisito fondamentale. Questo evento, rivolto a ricercatori, tecnologi e personale di supporto alla ricerca, intende fornire gli elementi introduttivi per comprendere come rispondere in modo adeguato alle nuove richieste introdotte dalla Commissione europea. In particolare ci concentreremo sugli elementi fondamentali della comunicazione scientifica, sulla definizione di scienza aperta e dei suoi elementi fondanti, e su come l'Open Science sta cambiando il paradigma di valutazione della ricerca. Programma - Modulo 1.0: Intro e warm-up - Modulo 1.1: La valutazione della ricerca e l'Open Science - Modulo 1.2: Comunicazione scientifica e Open Science - Domande - Modulo 1.3: Open Science e Horizon Europe - Domande - Chiusura lavori}, KEYWORDS = {open science, Horizon Europe, valutazione}, URL = {https://doi.org/10.5281/zenodo.5604541}, } @MISC{ERJAVEC_2021_MISC_EOOLSGRPKBSVDDJHNCDVMLCAFMQVRMBSRDUPBKMDLR_463861, AUTHOR = {Erjavec, T. and Ogrodniczuk, M. and Osenova, P. and Ljubešić, N. and Simov, K. and Grigorova, V. and Rudolf, M. and Pančur, A. and Kopp, M. and Barkarson, S. and Steingrímsson, S. and Van Der Pol, H. and Depoorter, G. and De Does, J. and Jongejan, B. and Haltrup Hansen, D. and Navarretta, C. and Calzada Pérez, M. and De Macedo, L. D. and Van Heusden, R. and Marx, M. and Çöltekin, Ç. and Coole, M. and Agnoloni, T. and Frontini, F. and Montemagni, S. and Quochi, V. and Venturi, G. and Ruisi, M. and Marchetti, C. and Battistoni, R. and Sebők, M. and Ring, O. and Darģis, R. and Utka, A. and Petkevičius, M. and Briedienė, M. and Krilavičius, T. and Morkevičius, V. and Diwersy, S. and Luxardo, G. and Rayson, P.}, TITLE = {Linguistically annotated multilingual comparable corpora of parliamentary debates ParlaMint. ana 2. 1}, YEAR = {2021}, ABSTRACT = {ParlaMint 2.1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (from November 1st 2019), or being "reference" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e.g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e.g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https://clarin-eric.github.io/parla-clarin/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the linguistically marked-up version of the corpus, while the text version is available at http://hdl.handle.net/11356/1432. The ParlaMint.ana linguistic annotation includes tokenization, sentence segmentation, lemmatisation, Universal Dependencies part-of-speech, morphological features, and syntactic dependencies, and the 4-class CoNLL-2003 named entities. Some corpora also have further linguistic annotations, such as PoS tagging or named entities according to language-specific schemes, with their corpus TEI headers giving further details on the annotation vocabularies and tools.}, KEYWORDS = {dibattiti parlamentari, covid-19, ParlaCLARIN, parlamenti, discorso politico, CLARIN, linguistic annotation, pos-tagging, ner, linguistic dependency annotation, UD}, URL = {http://hdl.handle.net/11356/1432}, } @MISC{ERJAVEC_2021_MISC_EOOLSGRPKBSVDDJHNCDVMLCAFMQVRMBSRDUPBKMDLR_463865, AUTHOR = {Erjavec, T. and Ogrodniczuk, M. and Osenova, P. and Ljubešić, N. and Simov, K. and Grigorova, V. and Rudolf, M. and Pančur, A. and Kopp, M. and Barkarson, S. and Steingrímsson, S. and Van Der Pol, H. and Depoorter, G. and De Does, J. and Jongejan, B. and Haltrup Hansen, D. and Navarretta, C. and Calzada Pérez, M. and De Macedo, L. D. and Van Heusden, R. and Marx, M. and Çöltekin, Ç. and Coole, M. and Agnoloni, T. and Frontini, F. and Montemagni, S. and Quochi, V. and Venturi, G. and Ruisi, M. and Marchetti, C. and Battistoni, R. and Sebők, M. and Ring, O. and Darģis, R. and Utka, A. and Petkevičius, M. and Briedienė, M. and Krilavičius, T. and Morkevičius, V. and Diwersy, S. and Luxardo, G. and Rayson, P.}, TITLE = {Multilingual comparable corpora of parliamentary debates ParlaMint 2. 1}, YEAR = {2021}, ABSTRACT = {ParlaMint 2.1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (after November 1st 2019), or being "reference" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e.g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e.g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https://clarin-eric.github.io/parla-clarin/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the ParlaMint TEI-encoded corpora with the derived plain text version of the corpus along with TSV metadata on the speeches. Also included is the 2.0 release of the data and scripts available at the GitHub repository of the ParlaMint project. Note that there also exists the linguistically marked-up version of the corpus, which is available at http://hdl.handle.net/11356/1431.}, KEYWORDS = {dibattiti parlamentari, covid-19, discorso politico, CLARIN, parlamenti, ParlaCLARIN}, URL = {http://hdl.handle.net/11356/1431}, } @MISC{FRONTINI_2021_MISC_FGMB_463503, AUTHOR = {Frontini, F. and Gamba, F. and Monachini, M. and Broeder, D.}, TITLE = {SSHOC Multilingual Data Stewardship Terminology}, YEAR = {2021}, ABSTRACT = {The SSHOC Multilingual Data Stewardship Terminology is a multilingual terminology that collects terms specific to the domain of Data Stewardship, as well as their definitions. A list of domain-specific terms was automatically extracted from a corpus pertaining to the domain of Data Stewardship and Curation, validated by domain experts, assigned a definition, and linked to other existing terminologies (Loterre Open Science Thesaurus, terms4FAIRskills, Linked Open Vocabularies, ISO terms and definitions). Each term-definition pair was then automatically translated into multiple languages (Dutch, French, German, Greek, Italian, Slovenian) by employing Deep-L. The Multilingual Data Stewardship Terminology thus consists of 210 concepts available in Dutch, French, German, Greek, Italian, Slovenian. This resource was created within the frame of the SSHOC (Social Sciences and Humanities Open Cloud) project (H2020-INFRAEOSC-2018-2-823782). It is the result of the work of Task 3.1.2 "extraction of terminology from technical documentation about standards and interoperability", as described in D3.9, carried out jointly by ILC-CNR and CLARIN ERIC.}, KEYWORDS = {terminology, data stewardship}, URL = {http://hdl.handle.net/20.500.11752/ILC-567}, } @MISC{FRONTINI_2021_MISC_FGMB_463504, AUTHOR = {Frontini, F. and Gamba, F. and Monachini, M. and Broeder, D.}, TITLE = {SSHOC Multilingual Metadata}, YEAR = {2021}, ABSTRACT = {SSHOC Multilingual Metadata is based on the metadata set of the CLARIN Concept Registry (CCR). The CCR 232 approved metadata concepts, as well as their definitions, were automatically translated into several languages (Dutch, French, Greek, Italian) thanks to the support of Machine Translation tools, and eventually validated by native speakers who were also expert of the domain. This resource was created within the frame of the SSHOC (Social Sciences and Humanities Open Cloud) project (H2020-INFRAEOSC-2018-2-823782). It is the result of the work of Task 3.1.3 "creating Multilingual metadata and taxonomies for discovery", as described in D3.9, carried out jointly by ILC-CNR and CLARIN ERIC.}, KEYWORDS = {metadata, terminology}, URL = {http://hdl.handle.net/20.500.11752/ILC-568}, } @MISC{GIGLIA_2021_MISC_GLD_465155, AUTHOR = {Giglia, E. and Lazzeri, E. and Di Donato, F.}, TITLE = {Scienza aperta e gestione dei dati per le scienze umane e del patrimonio culturale}, YEAR = {2021}, ABSTRACT = {Corso tenuto per gli istituti CNR di area umanistica, maggio-giugno 2021. Modulo1: Open Science, perché serve; il diritto d'autore; come siamo arrivati alla comunicazione scientifica attuale Modulo 2: Open Access e politiche europee Modulo 3: Gestione dei dati e dati FAIR Modulo 4: Data management plan ed esempi concreti Moduli 5-8: presentazione delle infrastrutture di ricerca, CLARIN, DARIAH, OPERAS, ERHIS, Parthenos, Ariadne+}, KEYWORDS = {open science, fair data, humanities}, URL = {https://zenodo.org/record/5497914#.YjCP05PMJfU}, } @MISC{MATHIAK_2021_MISC_MJHDJWFCBCK_465241, AUTHOR = {Mathiak, B. and Juty, N. and Heger, T. and Di Donato, F. and Jeschke, J. and Widmann, H. and Flügel, A. and Culina, A. and Bardi, A. and Colomb, J. and Kraker, P.}, TITLE = {Stocktaking GO FAIR Discovery IN-Use cases, infrastructure (0. 95)}, YEAR = {2021}, ABSTRACT = {In order to build a better ecosystem for data discovery tools the Data Discovery Implementation Group of GO Fair (https://www.go-fair.org/implementation-networks/overview/discovery) collected use cases between 2019 and 2020 from a variety of sources. We also detail the 'Actors' for these use cases and the 'Source' providing links, whenever possible. Since we found over a hundred individual use cases, we decided to cluster them to provide a better overview. The clustering, as well as the results of a small survey among data infrastructure specialists to find how they rate the importance of the clusters are detailed in the documentation to this dataset, a draft of which can currently be found here. The code and data to produce the figures in the documentation are available as R code in the GO_FAIR_Discovery_Use_case-master.zip file. The use cases themselves are available as Excel sheet and csv.}, KEYWORDS = {discovery, gofair}, URL = {https://doi.org/10.5281/zenodo.5211196}, } @MISC{TESSAROLO_2021_MISC_TDB_484490, AUTHOR = {Tessarolo, L. and Del Grosso, A. M. and Boschetti, F.}, TITLE = {florilegiasyriaca}, YEAR = {2021}, ABSTRACT = {Il portale di produzione e fruizione delle edizioni dei testi critici restituiti al pubblico durante il lavoro del team del progetto ERC 758732-FLOS}, KEYWORDS = {ERC, Digital Humanities, Digital Philology, FLOS, DSL-based DSE}, URL = {https://www.florilegiasyriaca.eu/}, } @MISC{ZENZARO_2021_MISC_ZDB_472307, AUTHOR = {Zenzaro, S. and Del Grosso, A. M. and Boschetti, F.}, TITLE = {CophiEditor A collaborative web platform for the creation of scholarly digital editions}, YEAR = {2021}, ABSTRACT = {Relazione di avanzamento della piattaforma per il progetto GreekSchools}, KEYWORDS = {Digital Humanities, Digital papyrology, GreekSchools}, URL = {https://publications.cnr.it/doc/472307}, } @ARTICLE{ALZETTA_2020_ARTICLE_ADMV_463828, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Linguistically-driven Selection of Difficult-to-Parse Dependency Structures}, YEAR = {2020}, ABSTRACT = {The paper illustrates a novel methodology meeting a twofold goal, namely quantifying the reliability of automatically generated dependency relations without using gold data on the one hand, and identifying which are the linguistic constructions negatively affecting the parser performance on the other hand. These represent objectives typically investigated in different lines of research, with different methods and techniques. Our methodology, at the crossroads of these perspectives, allows not only to quantify the parsing reliability of individual dependency types but also to identify and weight the contextual properties making relation instances more or less difficult to parse. The proposed methodology was tested in two different and complementary experiments, aimed at assessing the degree of parsing difficulty across (a) different dependency relation types, and (b) different instances of the same relation. The results show that the proposed methodology is able to identify difficult-to-parse dependency relations without relying on gold data and by taking into account a variety of intertwined linguistic factors. These findings pave the way to novel applications of the methodology, both in the direction of defining new evaluation metrics based purely on automatically parsed data and towards the automatic creation of challenge sets.}, KEYWORDS = {Linguistic Complexity, Syntactic Parsing, Evaluation metrics}, PAGES = {37-60}, URL = {https://journals.openedition.org/ijcol/719}, VOLUME = {6}, DOI = {10.4000/ijcol.719}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{BOSCHETTI_2020_ARTICLE_BD_472293, AUTHOR = {Boschetti, F. and Del Grosso, A.}, TITLE = {L'annotazione di testi storico-letterari al tempo dei social media}, YEAR = {2020}, ABSTRACT = {The annotation of historical and literary texts is approached differently by traditional philologists and digital philologists. The former are concentrated on the detailed study of a given text (close reading) while the latter are focused on the study of large quantities of texts (distant reading). A structured and collaborative annotation makes it possible both to add information to particular passages of individual texts, as in a traditional linear comment, and to connect data from entire textual collections through rigorous protocols. However, the standards developed by digital philologists are not highly appreciated by traditional academics, since the effort necessary to apply the proposed technologies allegedly diverts researchers' attention from the object of study. As opposed to this objection, we intend to highlight that it is indeed possible to maintain the precision requisite for the application of computational tools to digital resources without renouncing the annotation practices established in traditional contexts. In support of the method, we report a number of case studies of digital scientific editions whose goals include both reconstructing respective texts and encouraging the dissemination of contents and public participation in the academic debate. In particular, we will discuss the following projects: a) the stylistic annotation of three different editions of Giacomo Leopardi's translation of the Batracomiomachia; b) the scientific edition of Bellini's letters; c) the multi-level annotated edition of Bassani; and d) the comparison of Umberto Eco's variants of his Il nome della rosa.}, KEYWORDS = {digital philology, collaborative annotation, communities, digital scholarly edition, formalisation}, PAGES = {65-99}, URL = {https://publications.cnr.it/doc/472293}, VOLUME = {11}, DOI = {10.15804/IW.2020.11.1.03}, PUBLISHER = {Wydawnictwo Adam Marszalek (Torun, Polonia)}, ISSN = {2084-4514}, JOURNAL = {Italica Wratislaviensia (Print)}, } @ARTICLE{DEROSIS_2020_ARTICLE_DGZVF_435971, AUTHOR = {De Rosis, S. and Guidotti, E. and Zuccarino, S. and Venturi, G. and Ferre, F.}, TITLE = {Waiting time information in the Italian NHS: A citizen perspective}, YEAR = {2020}, ABSTRACT = {Public involvement in the management and communication of waiting times is known to support initiatives to reduce waiting times, as well as increase fairness and promote transparency and accountability. In order to improve transparency and communication to citizens, Italy recently updated the National Regulatory Plan for Waiting Lists (2019-2021), which calls for the disclosure of waiting time information on healthcare provider webpages. This study analyses waiting time information for outpatient visits and digital services available on the institutional website pages of 144 public healthcare organisations in nine regions and two autonomous provinces of Italy. Web pages were analysed both in terms of the available information/services, using a grid, and in terms of the quality of the text using an advanced readability assessment tool (READ-IT). This information was complemented and validated by regional healthcare key informants during research-specific workshops. Waiting time information disclosure, digital services and text readability varied both within and between the regional healthcare systems and organisations. The types and characteristics of waiting time information and statistics vary considerably with a negative impact on their use for benchmarking and their readability and usability for booking purposes. Overall, communication weaknesses due to low harmonization and clarity of information can undermine efforts in effectively informing and involving the public through online waiting time data disclosure. (C) 2020 The Author(s). Published by Elsevier B.V.}, KEYWORDS = {Waiting times, Healthcare, Online information, Readability, Italy}, PAGES = {796-804}, URL = {https://www.sciencedirect.com/science/article/pii/S0168851020301111?via%3Dihub}, VOLUME = {124}, DOI = {10.1016/j.healthpol.2020.05.012}, PUBLISHER = {Elsevier (Amsterdam, Paesi Bassi)}, ISSN = {0168-8510}, JOURNAL = {Health policy (Amst. Print)}, } @ARTICLE{DUMOUCHEL_2020_ARTICLE_DBBCDEFGGGMPDPT_437781, AUTHOR = {Dumouchel, S. and Blotière, E. and Breitfuss, G. and Chen, Y. and Di Donato, F. and Eskevich, M. and Forbes, P. and Georgiadis, H. and Gingold, A. and Gorgaini, E. and Moranville, Y. and Pohle, S. and De Paoli, S. and Petitfils, C. and Toth Czifra, E.}, TITLE = {GOTRIPLE: A User-Centric Process to Develop a Discovery Platform}, YEAR = {2020}, ABSTRACT = {Social sciences and humanities (SSH) research is divided across a wide array of disciplines, sub-disciplines and languages. While this specialization makes it possible to investigate the extensive variety of SSH topics, it also leads to a fragmentation that prevents SSH research from reaching its full potential. The TRIPLE project brings answers to these issues by developing an innovative discovery platform for SSH data, researchers' projects and profiles. Having started in October 2019, the project has already three main achievements that are presented in this paper: (1) the definition of main features of the GOTRIPLE platform; (2) its interoperability; (3) its multilingual, multicultural and interdisciplinary vocation. These results have been achieved thanks to different methodologies such as a co-design process, market analysis and benchmarking, monitoring and co-building. These preliminary results highlight the need for respecting diversity of practices and communities through coordination and harmonization.}, KEYWORDS = {user-centric approach user research social sciences and humanities open science European Open Science Cloud (EOSC) FAIR principles discovery research data}, URL = {https://www.mdpi.com/2078-2489/11/12/563}, VOLUME = {2020, 11}, DOI = {10.3390/info11120563}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2078-2489}, JOURNAL = {Information (Basel)}, } @ARTICLE{GIOVANNETTI_2020_ARTICLE_GBDDMPP_453583, AUTHOR = {Giovannetti, E. and Bellandi, A. and Dattilo, D. and Del Grosso, A. M. and Marchi, S. and Pecchioli, A. and Piccini, S.}, TITLE = {The Terminology of the Babylonian Talmud: Extraction, Representation and Use in the Context of Computational Linguistics}, YEAR = {2020}, ABSTRACT = {A formal digital structuring of the terminology of the Talmud is being carried out in the context of the Project for the Translation of the Babylonian Talmud in Italian. The terminological resource was encoded in the form of a multi-language Explanatory Combinatorial Dictionary (Hebrew-Aramaic-Italian) according to the principles of the Meaning-Text Theory. The construction of such a resource was supported by text processing and computational linguistics techniques aimed at automatically extracting terms from the Italian translation of the Talmud and aligning them with the corresponding Hebrew/Aramaic source terms. The paper describes the process that was set up for the construction of the terminological resource with the ultimate goal of illustrating the advantages of the adoption of a formal linguistic model. The terminological resource aims, indeed, to be a useful tool to deepen the characteristics of the languages of the Talmud, to help translators in their work and more generally scholars in their study of the Talmud itself.}, KEYWORDS = {Babylonian Talmud, Computational Linguistics, Explanatory and Combinatorial Lexicology}, PAGES = {61-74}, URL = {https://publications.cnr.it/doc/453583}, VOLUME = {XXV}, DOI = {10.1400/283235}, PUBLISHER = {Giuntina (Firenze, Italia)}, ISSN = {2282-4499}, JOURNAL = {Materia giudaica Print}, } @ARTICLE{GUADAGNINI_2020_ARTICLE_G_426575, AUTHOR = {Guadagnini, E.}, TITLE = {Alessandro, la 'cautela' e altri latinismi: un esercizio traduttologico su un estratto della tradizione latino-romanza del "Secretum secretorum" (SS/B)}, YEAR = {2020}, ABSTRACT = {This paper takes into consideration several translation and linguistic issues concerning the Secretum secretorum and its various Romance versions, including the Franco-Italian Amaestramens (ms. Paris BnF fr. 821). Romance inflections of Latin the words cautela, custodia, munimen, providentia, and salus are considered as well.}, KEYWORDS = {Secretum secretorum, Traductology, Lexicology, Latinism}, PAGES = {243-278}, URL = {https://www.francigena-unipd.com/index.php/francigena/article/view/50}, VOLUME = {6}, DOI = {10.25430/2420-9767/V6-239-278}, PUBLISHER = {Università degli Studi di Padova (Italia, Italia)}, ISSN = {2420-9767}, JOURNAL = {Francigena}, } @ARTICLE{KHALFI_2020_ARTICLE_KZN_438041, AUTHOR = {Khalfi, M. and Zarghili, A. and Nahli, O.}, TITLE = {A New Rich Lexical Resource For Classical Arabic}, YEAR = {2020}, ABSTRACT = {Currently, large lexical resources are getting a high potential relevance for information systems and need of Lexical resources in Natural Language Processing (NLP) fields is paramount. To contribute meet these needs, we build a lexical resource from the famous dictionary al=q?m?s al=mu???(AQAM). Using a rule based approach, we have designed a system that allows extracting morpho-syntactical, semantics and lexical information from the famous dictionary. So, we obtained a digitized and structured version of AQAM, enriched by morpho-syntactical and lexical explicit information. In addition, the obtained resource is enriched by English translations of lemma and accompanying senses using a bilingual English-Arabic dictionary. Then we present an overview of an experiment alignment of the section of the letter b?" on Princeton"s WordNet (PWN) and Suggested Upper Merged Ontology (SUMO). This experience turned out to be interesting because it revealed that mapping an Arabic lexical resource on an English resource shows commonality between the two languages, but it allows especially to emphasize the non-equivalences between them. All obtained resources are represented in XML format anddistributed under free license}, KEYWORDS = {Information Extraction Arabic Lexicon Al Qamus Al Muhit Machine-readable dictionary Arabic Lexical Resource}, PAGES = {3863-3884}, URL = {https://www.ijact.in/index.php/ijact/article/view/1196}, VOLUME = {Volume-IX, Issue-X}, PUBLISHER = {Research India Publications (New Delhi, India)}, ISSN = {2249-3123}, JOURNAL = {International journal of advanced computer science and technology}, } @ARTICLE{MARZI_2020_ARTICLE_M_424281, AUTHOR = {Marzi, C.}, TITLE = {Modeling Word Learning and Processing with Recurrent Neural Networks}, YEAR = {2020}, ABSTRACT = {The paper focuses on what two different types of Recurrent Neural Networks, namely a recurrent Long Short-Term Memory and a recurrent variant of self-organizing memories, a Temporal Self-Organizing Map, can tell us about speakers' learning and processing a set of fully inflected verb forms selected from the top-frequency paradigms of Italian and German. Both architectures, due to the re-entrant layer of temporal connectivity, can develop a strong sensitivity to sequential patterns that are highly attested in the training data. The main goal is to evaluate learning and processing dynamics of verb inflection data in the two neural networks by focusing on the effects of morphological structure on word production and word recognition, as well as on word generalization for untrained verb forms. For both models, results show that production and recognition, as well as generalization, are facilitated for verb forms in regular paradigms. However, the two models are differently influenced by structural effects, with the Temporal Self-Organizing Map more prone to adaptively find a balance between processing issues of learnability and generalization, on the one side, and discriminability on the other side.}, KEYWORDS = {word-learning, serial word processing, recurrent neural networks, long short-term memories, temporal self-organizing memories}, PAGES = {14}, URL = {https://www.mdpi.com/2078-2489/11/6/320}, VOLUME = {11}, DOI = {10.3390/info11060320}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2078-2489}, JOURNAL = {Information (Basel)}, } @ARTICLE{MARZI_2020_ARTICLE_M_424627, AUTHOR = {Marzi, C.}, TITLE = {Modelling the interaction of regularity and morphological structure: the case of Russian verb inflection}, YEAR = {2020}, ABSTRACT = {The main focus of this paper is to investigate how aspects of morphological regularity may have an impact on early stages of word processing, prior to full lexical access. Here I explore the interaction of regularity and morphological structure by using a computational simulation of the process of learning Russian verb forms, without any morpho-syntactic or morphosemantic additional information. With a recurrent variant of self-organising memories, namely a Temporal Self-Organising Map, or TSOM, experimental results allow an investigation of the impact of incremental learning and online processing principles on paradigm organisation, by assessing the differential impact of several aspects of regularity, ranging from formal transparency and predictability to allomorphy, on the processing/learning behaviour in a connectionist framework. The proposed analysis suggests a performance-oriented account of inflectional regularity in morphology, whereby perception of morphological structure is not the by-product of the design of the human word processor, with rules separated from exceptions, but rather an emergent property of the dynamic self-organisation of stored lexical representations, dependent on the adaptive processing history of inflected word forms, intrinsically graded and probabilistic.}, KEYWORDS = {morphological complexity, discriminative learning, recurrent neural networks, self-organisation, Russian verb in?ection}, PAGES = {131-156}, URL = {https://www.mulino.it/riviste/issn/1720-9331}, VOLUME = {XIX}, DOI = {10.1418/97534}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{MASINI_2020_ARTICLE_MP_444782, AUTHOR = {Masini, F. and Pirrelli, V.}, TITLE = {L'evidenza morfologica nell'era digitale: per un'integrazione di teoria e computazione}, YEAR = {2020}, ABSTRACT = {This article proposes a research perspective on morphological and lexical data based on an integrated approach that merges linguistic theory and computational analyses of a large quantity of textual data. Starting from a description of the units and processes of morphology, and of the issues they raise, we discuss to what extent these theoretical notions can be translated into the algorithmic procedures of Natural Language Processing (NLP) and what resources and methods are nowadays available to make morphological and lexical knowledge explicit within texts. At the same time, we explore the repercussions that the application of computational (but also psycho-/neuro-linguistic) techniques may have on our theoretical representations and on their plausibility.}, KEYWORDS = {morphology-lexicon-categories-Natural Language Processing-Italian}, PAGES = {77-126}, URL = {https://publications.cnr.it/doc/444782}, VOLUME = {VI}, PUBLISHER = {Editrice CLUEB; [poi] Edizioni dell'Orso (Bologna; [poi] Alessandria, Italia)}, ISSN = {0393-1226}, JOURNAL = {Quaderni di semantica (Testo stampato)}, } @ARTICLE{NICOLOSI_2020_ARTICLE_NMN_429366, AUTHOR = {Nicolosi, A. and Monachini, M. and Nava, B.}, TITLE = {CLARIN-IT and the Definition of a Digital Critical Edition for Ancient Greek Poetry}, YEAR = {2020}, ABSTRACT = {Ancient Greek studies, and Classics in general, is a perfect field of investigation in Digital Humanities. Indeed, DH approaches could become a means of building models for complex realities, analyzing them with computational methods and sharing the results with a broader public. Ancient texts have a complex tradition, which includes many witnesses (texts that handed down other texts) and different typologies of supports (papyri, manuscripts, and epigraphs). These texts are the basis of all European Literatures and it is crucial to spread their knowledge, in a reliable and easy way. Our project on ancient Greek fragmentary poetry (DEA - Digital Edition of Archilochus: New models and tools for authoring, editing and indexing an ancient Greek fragmentary author), growing out of the existing experience, tries to define a TEI-based digital critical edition combined with NLP techniques and semantic web technologies. Our goal is to provide a complete and reliable tool for scholars, suitable for critical studies in Classics, and a user-friendly environment also for non-specialist users. The project represents one of the attempts within the context of CLARIN-IT to contribute to the wider impact of CLARIN on the specific Italian community interested in Digital Classics. It is intended to improve services in fostering new knowledge in SSH digital research and sustaining the existing one.}, KEYWORDS = {Digital edition, Ancient Greek, research infrastructures, digital humanities, digital classics}, PAGES = {85-93}, URL = {https://ep.liu.se/ecp/172/011/ecp20172011.pdf}, VOLUME = {172}, DOI = {10.3384/ecp2020172011}, PUBLISHER = {Linköping University Electronic Press (Linköping, Svezia)}, ISSN = {1650-3740}, JOURNAL = {Linköping electronic conference proceedings (Online)}, } @ARTICLE{VENTURI_2020_ARTICLE_VDMMS_441971, AUTHOR = {Venturi, G. and Dell'Orletta, F. and Montemagni, S. and Morini, E. and Sagri, M. T.}, TITLE = {Metodi e Tecniche di Trattamento Automatico della Lingua per l'Estrazione di Conoscenza dalla Documentazione Scolastica}, YEAR = {2020}, ABSTRACT = {Il contributo riguarda la creazione di un sistema integrato di "knowledge management", per la gestione e condivisione della conoscenza prodotta e utilizzata dalla scuola.}, KEYWORDS = {Estrazione di informazione, Documenti scolastici, Indicizzazione, Terminology extraction}, PAGES = {49-68}, URL = {https://publications.cnr.it/doc/441971}, VOLUME = {2}, DOI = {10.3280/CAD2020-002005}, PUBLISHER = {Franco Angeli (Napoli, Italia)}, ISSN = {1122-5165}, JOURNAL = {Cadmo (Testo stamp.)}, } @ARTICLE{VERTECCHI_2020_ARTICLE_VADMV_441967, AUTHOR = {Vertecchi, B. and Agrusti, F. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Verba et Acta. Un esperimento per promuovere l'evoluzione delle compe-tenze linguistiche degli studenti degli istituti professionali}, YEAR = {2020}, ABSTRACT = {Ricerche in corso. Verba et Acta. Un esperimento per promuovere l'evoluzione delle competenze linguistiche degli studenti degli istituti professionali}, KEYWORDS = {Evoluzione competenze linguistiche, Annotazione linguistica, Previsione dello sviluppo delle competenze di scrittura}, PAGES = {109-117}, URL = {https://publications.cnr.it/doc/441967}, DOI = {10.3280/CAD2020-001008}, PUBLISHER = {Franco Angeli (Napoli, Italia)}, ISSN = {1122-5165}, JOURNAL = {Cadmo (Testo stamp.)}, } @INCOLLECTION{GUADAGNINI_2020_INCOLLECTION_G_444062, AUTHOR = {Guadagnini, E.}, TITLE = {Les Métamorphoses d'Ovide et le Moyen Âge italien: une esquisse du cadre général}, YEAR = {2020}, ABSTRACT = {Cette contribution propose un cadre d'ensemble de la réception des Métamorphoses ovidiennes dans la production vernaculaire italienne du Moyen Âge (XII -XIV siècles). Trois typologies de réception sont isolées : le poème en tant que source de contenus, le poème en tant que source textuelle et les traductions du poème. Une attention particulière est accordée à la tradition manuscrite des oeuvres citées, ainsi qu'à la recherche des sources vraisemblablement employées par les auteurs.}, KEYWORDS = {Volgarizzamenti, eredità dei classici, Ovidio, Filologia romanza}, PAGES = {209-236}, URL = {https://classiques-garnier.com/traire-de-latin-et-espondre-etudes-sur-la-reception-medievale-d-ovide.html}, PUBLISHER = {Editions Classiques Garnier (Paris, FRA)}, ISBN = {9782406105077}, BOOKTITLE = {« Traire du latin et espondre ». Études sur la réception médiévale d'Ovide}, EDITOR = {Baker, C. and Cavagna, M. and Guadagnini, E.}, } @INCOLLECTION{MARZI_2020_INCOLLECTION_MBBP_421742, AUTHOR = {Marzi, C. and Blevins, J. P. and Booij, G. and Pirrelli, V.}, TITLE = {Inflection at the morphology-syntax interface}, YEAR = {2020}, ABSTRACT = {What is inflection? Is it part of language morphology, syntax or both? What are the basic units of inflection and how do speakers acquire and process them? How do they vary across languages? Are some inflection systems somewhat more complex than others, and does inflectional complexity affect the way speakers process words? This chapter addresses these and other related issues from an interdisciplinary perspective. Our main goal is to map out the place of inflection in our current understanding of the grammar architecture. In doing that, we will embark on an interdisciplinary tour, which will touch upon theoretical, psychological, typological, historical and computational issues in morphology, with a view to looking for points of methodological and substantial convergence from a rather heterogeneous array of scientific approaches and theoretical perspectives. The main upshot is that we can learn more from this than just an additive medley of domain-specific results. In the end, a cross-domain survey can help us look at traditional issues in a surprisingly novel light.}, KEYWORDS = {inflection, paradigmatic relations, word processing, word learning, inflectional complexity, family size, entropy}, PAGES = {228-294}, URL = {https://www.degruyter.com/view/book/9783110440577/10.1515/9783110440577-007.xml}, VOLUME = {337}, DOI = {10.1515/9783110440577-007}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, BOOKTITLE = {Word Knowledge and Word Usage. A cross-interdisciplinary guide to the mental lexicon}, EDITOR = {Pirrelli, V. and Plag, I. and Dressler, W. U.}, } @INCOLLECTION{PIRRELLI_2020_INCOLLECTION_PMFCBM_421741, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Baayen, H. R. and Milin, P.}, TITLE = {Psycho-computational modelling of the mental lexicon}, YEAR = {2020}, ABSTRACT = {Over the last decades, a growing body of evidence on the mechanisms governing lexical storage, access, acquisition and processing has questioned traditional models of language architecture and word usage based on the hypothesis of a direct correspondence between modular components of grammar competence (lexicon vs. rules), processing correlates (memory vs. computation) and neuro-anatomical localizations (prefrontal vs. temporo-parietal perisylvian areas of the left hemisphere). In the present chapter, we explore the empirical and theoretical consequences of a distributed, integrative model of the mental lexicon, whereby words are seen as emergent properties of the functional interaction between basic, language-independent processing principles and the language- specific nature and organization of the input. From this perspective, language learning appears to be inextricably related to the way language is processed and internalized by the speakers, and key to an interdisciplinary understanding of such a way, in line with Tomaso Poggio's suggestion that the development of a cognitive skill is causally and ontogenetically prior to its execution (and sits "on top of it"). In particular, we discuss conditions, potential and prospects of the epistemological continuity between psycholinguistic and computational modelling of word learning, and illustrate the yet largely untapped potential of their integration. We use David Marr's hierarchy to clarify the complementarity of the two viewpoints. Psycholinguistic models are informative about how speakers learn to use language (interfacing Marr's levels 1 and 2). When we move from the psycholinguistic analysis of the functional operations involved in language learning to an algorithmic description of how they are computed, computer simulations can help us explore the relation between speakers' behavior and general learning principles in more detail. In the end, psycho-computational models can be instrumental to bridge Marr's levels 2 and 3, bringing us closer to understanding the nature of word knowledge in the brain.}, KEYWORDS = {mental lexicon, word storage and processing, psycholinguistics, computational linguistics, connectionist models, discriminative learning}, PAGES = {23-82}, URL = {https://www.degruyter.com/view/book/9783110440577/10.1515/9783110440577-002.xml}, VOLUME = {337}, DOI = {10.1515/9783110440577-002}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, BOOKTITLE = {Word Knowledge and Word Usage}, EDITOR = {Pirrelli, V. and Plag, I. and Dressler, W. U.}, } @INCOLLECTION{PIRRELLI_2020_INCOLLECTION_PPD_423388, AUTHOR = {Pirrelli, V. and Plag, I. and Dressler, U. W.}, TITLE = {Word knowledge in a cross-disciplinary world}, YEAR = {2020}, ABSTRACT = {This editorial project stemmed from a 4-year period of intense interdisciplinary research networking funded by the European Science Foundation within the framework of the NetWordS project (09-RNP-089). The project mission was to bring together experts of various research fields (from brain sciences and computing to cognition and linguistics) and of different theoretical inclinations, to advance the current awareness of theoretical, typological, psycholinguistic, computational and neurophysiological evidence on the structure and processing of words, with a view to promoting novel methods of research and assessment for grammar architecture and language usage. The unprecedented cross-disciplinary fertilization prompted by a wide range of scientific and educational initiatives (three international workshops, two summer schools, one main conference and over a hundred grants supporting short visits and multilateral exchanges) persuaded us to pursue this effort beyond the project lifespan, spawning the idea of an interdisciplinary handbook, where a wide range of central topics on word knowledge and usage are dealt with by teams of authors with common interests and different backgrounds. Unsurprisingly (with the benefit of the hindsight), the project turned out to be more challenging and time-consuming than initially planned. Cross-boundary talking and mutual understanding are neither short-term, nor immediately rewarding efforts, but part of a long-sighted, strategic vision, where stamina, motivation and planning ahead play a prominent role. We believe that this book, published as an open access volume, significantly sharpens the current understanding of issues of word knowledge and usage, and has a real potential for promoting novel research paradigms, and bringing up a new generation of language scholars.}, KEYWORDS = {interdisciplinarity, word knowledge, word usage, language units, statistical and computer modeling, levels of understanding, between-level mapping, linking hypotheses, scale effects}, PAGES = {1-20}, URL = {https://doi.org/10.1515/9783110440577}, VOLUME = {337}, DOI = {10.1515/9783110440577}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, BOOKTITLE = {Word Knowledge and Word Usage. A Cross-Disciplinary Guide to the Mental Lexicon}, EDITOR = {Pirrelli, V. and Plag, I. and Dressler, U. W.}, } @EDITORIAL{BEERMANN_2020_EDITORIAL_BBSS_472133, AUTHOR = {Beermann, D. and Besacier, L. and Sakriani, S. and Soria, C.}, TITLE = {Proceedings of 1st Joint SLTU and CCURL Workshop (SLTU-CCURL 2020)}, YEAR = {2020}, ABSTRACT = {Proceedings of the 1st Joint SLTU and CCURL Workshop (SLTU-CCURL 2020)}, KEYWORDS = {less-resourced languages, NLP, language resources}, URL = {https://aclanthology.org/events/lrec-2020/#2020-sltu-1}, ISBN = {979-10-95546-35-1}, } @EDITORIAL{BROEDER_2020_EDITORIAL_BEM_472326, AUTHOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, TITLE = {Proceedings of the Workshop about Language Resources for the SSH Cloud}, YEAR = {2020}, ABSTRACT = {This workshop was envisaged to focus on the goals and aims of realising the SSHOC part of the EOSC, where SSH data, language processing tools, and services are made available, adjusted and accessible for users across SSH domain. It provides a forum to discuss common requirements, challenges and opportunities for developing, enhancing, integrating tools and services for managing and processing SSH research data. Such SSH scenarios based implementations of currently existing language tools and services demonstrate their multidisciplinary usability and stimulate further multidisciplinary collaboration across the various subfields of SSH and beyond, which will increase the potential for societal impact.}, KEYWORDS = {Social Science and Humanties Open Cloud, EOSC, Language Resource Infrastructure}, PAGES = {1-46}, URL = {https://aclanthology.org/2020.lr4sshoc-1}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-43-6}, } @EDITORIAL{DISEGNI_2020_EDITORIAL_D_441369, AUTHOR = {Di Segni, R.}, TITLE = {Talmud Babilonese, Trattato Chaghigà (Sacrificio festivo)}, YEAR = {2020}, ABSTRACT = {Coordinamento della traduzione in italiano del trattato del Talmud Babilonese "Chaghigà", con commento esplicativo e note scientifiche, tramite il software Traduco messo a punto dall'ILC-CNR di Pisa}, KEYWORDS = {software Traduco, talmud babilonese}, PAGES = {299}, URL = {https://publications.cnr.it/doc/441369}, VOLUME = {12}, PUBLISHER = {La Giuntina (Firenze, ITA)}, ISBN = {978-88-8057-858-1}, } @EDITORIAL{PIRRELLI_2020_EDITORIAL_PPD_424203, AUTHOR = {Pirrelli, V. and Plag, I. and Dressler, W. U.}, TITLE = {Word knowledge and word usage: a cross-disciplinary guide to the mental lexicon}, YEAR = {2020}, ABSTRACT = {This editorial project stemmed from a 4-year period of intense interdisciplinary research networking funded by the European Science Foundation within the framework of the NetWordS project (09-RNP-089).}, KEYWORDS = {interdisciplinarity, word knowledge, word usage, language units, statistical and computer modeling, levels of understanding, between-level mapping, linking hypotheses, scale effects}, PAGES = {1-717}, URL = {https://doi.org/10.1515/9783110440577}, VOLUME = {337}, DOI = {10.1515/9783110440577}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {978-3-11-051748-4}, } @INPROCEEDINGS{ALZETTA_2020_INPROCEEDINGS_ADMOSV_444113, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Osenova, P. and Simov, K. and Venturi, G.}, TITLE = {Quantitative linguistic investigations across universal dependencies treebanks}, YEAR = {2020}, ABSTRACT = {The paper illustrates a case study aimed at identifying cross-lingual quantitative trends in the distribution of dependency relations in treebanks for typologically different languages. Preliminary results show interesting differences rooted either in language-specific peculiarities or cross-lingual annotation inconsistencies, with a potential impact on different application scenarios.}, KEYWORDS = {Universal Dependencies Treebanks, Cross-linguistic analysis, Typology}, PAGES = {1-7}, URL = {http://ceur-ws.org/Vol-2769/paper_59.pdf}, VOLUME = {2769}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, ISBN = {979-12-80136-28-2}, CONFERENCE_NAME = {7th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Online}, CONFERENCE_DATE = {1-3/03/2021}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{ALZETTA_2020_INPROCEEDINGS_AMDKT_442044, AUTHOR = {Alzetta, C. and Miaschi, A. and Dell'Orletta, F. and Koceva, F. and Torre, I.}, TITLE = {PRELEARN @ EVALITA 2020: Overview of the Prerequisite Relation Learning Task for Italian}, YEAR = {2020}, ABSTRACT = {The Prerequisite Relation Learning (PRELEARN) task is the EVALITA 2020 shared task on concept prerequisite learning, which consists of classifying prerequisite relations between pairs of concepts distinguishing between prerequisite pairs and non-prerequisite pairs. Four sub-tasks were defined: two of them define different types of features that participants are allowed to use when training their model, while the other two define the classification scenarios where the proposed models would be tested. In total, 14 runs were submitted by 3 teams comprising 9 total individual participants.}, KEYWORDS = {nlp, prerequisite learning, shared task}, URL = {http://ceur-ws.org/Vol-2765/paper164.pdf}, CONFERENCE_NAME = {Seventh Evaluation Campaign of Natural Language Processing and Speech Tools for Italian (EVALITA)}, CONFERENCE_DATE = {17/12/2020}, } @INPROCEEDINGS{BELLANDI_2020_INPROCEEDINGS_BG_427282, AUTHOR = {Bellandi, A. and Giovannetti, E.}, TITLE = {Involving Lexicographers in the LLOD Cloud with LexO, an Easy-to-use Editor of Lemon Lexical Resources}, YEAR = {2020}, ABSTRACT = {In this contribution, we show LexO, a user-friendly web collaborative editor of lexical resources based on the lemon model. LexO has been developed in the context of Digital Humanities projects, in which a key point in the design of an editor was the ease of use by lexicographers with no skill in Linked Data or Semantic Web technologies. Though the tool already allows creating a lemon lexicon from scratch and lets a team of users work on it collaboratively, many developments are possible. The involvement of the LLOD community appears now crucial both to find new users and application fields where to test it, and, even more importantly, to understand in which way it should evolve.}, KEYWORDS = {lexO, lexical resources editor, semantic web, llod}, PAGES = {70-74}, URL = {https://www.aclweb.org/anthology/2020.ldl-1.10.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-36-8}, CONFERENCE_NAME = {7th Workshop on Linked Data in Linguistics (LDL-2020)}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {22-23/06/2020}, EDITOR = {Ionov, M. and McCrae, J. P. and Chiarcos, C. and Declerck, T. and Bosque Gil, J. and Gracia, J.}, } @INPROCEEDINGS{BOSCHETTI_2020_INPROCEEDINGS_BDMBMD_462360, AUTHOR = {Boschetti, F. and Del Gratta, R. and Monachini, M. and Buzzoni, M. and Monella, P. and Del Turco, R. R.}, TITLE = {"Tea for two": the Archive of the Italian Latinity of the Middle Ages meets the CLARIN infrastructure}, YEAR = {2020}, ABSTRACT = {This paper presents the Archive of the Italian Latinity of the Middle Ages (ALIM) and focuses, particularly, on its structure and metadata for its integration into the ILC4CLARIN repository. Access to this archive of Latin texts produced in Italy during the Middle Ages is of great importance in providing CLARIN-IT and the CLARIN community, at large, with critically reliable texts for the use of philologists, historians of literature, historians of institutions, culture and science of the Middle Ages.}, KEYWORDS = {Latin resources, CLARIN, corpus, repository}, PAGES = {121-125}, URL = {https://office.clarin.eu/v/CE-2020-1738-CLARIN2020_ConferenceProceedings.pdf}, CONFERENCE_NAME = {CLARIN Annual Conference 2020}, CONFERENCE_DATE = {05-07/10/2021}, } @INPROCEEDINGS{BROEDER_2020_INPROCEEDINGS_BEM_472328, AUTHOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, TITLE = {LR4SSHOC: The Future of Language Resources in the Context of the Social Sciences and Humanities Open Cloud}, YEAR = {2020}, ABSTRACT = {This paper outlines the future of language resources and identifies their potential contribution for creating and sustaining the social sciences and humanities (SSH) component of the European Open Science Cloud (EOSC).}, KEYWORDS = {EOSC, Social Science and Humanities Open Cloud}, PAGES = {33-36}, URL = {https://aclanthology.org/2020.lr4sshoc-1.6}, ISBN = {979-10-95546-43-6}, CONFERENCE_NAME = {LREC}, CONFERENCE_PLACE = {virtual}, CONFERENCE_DATE = {10/5/2020}, BOOKTITLE = {Proceedings of the Workshop about Language Resources for the SSH Cloud}, EDITOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, } @INPROCEEDINGS{BRUNATO_2020_INPROCEEDINGS_BCDMVZ_444114, AUTHOR = {Brunato, D. and Chesi, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G. and Zamparelli, R.}, TITLE = {AcCompl-it @ EVALITA2020: Overview of the acceptability & complexity evaluation task for Italian}, YEAR = {2020}, ABSTRACT = {The Acceptability and Complexity evaluation task for Italian (AcCompl-it) was aimed at developing and evaluating methods to classify Italian sentences according to Acceptability and Complexity. It consists of two independent tasks asking participants to predict either the acceptability or the complexity rate (or both) of a given set of sentences previously scored by native speakers on a 1-to-7 points Likert scale. In this paper, we introduce the datasets distributed to the participants, we describe the different approaches of the participating systems and provide a first analysis of the obtained results.}, KEYWORDS = {Shared Task, Linguistic Complexity, Acceptability}, PAGES = {1-8}, URL = {http://ceur-ws.org/Vol-2765/paper163.pdf}, VOLUME = {2765}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {EVALITA '20, Evaluation of NLP and Speech Tools for Italian}, CONFERENCE_PLACE = {Online}, CONFERENCE_DATE = {17/12/2020}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{BRUNATO_2020_INPROCEEDINGS_BCDMV_435966, AUTHOR = {Brunato, D. and Cimino, A. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Profiling-UD: a Tool for Linguistic Profiling of Texts}, YEAR = {2020}, ABSTRACT = {In this paper, we introduce Profiling-UD, a new text analysis tool inspired to the principles of linguistic profiling that can support language variation research from different perspectives. It allows the extraction of more than 130 features, spanning across different levels of linguistic description. Beyond the large number of features that can be monitored, a main novelty of Profiling-UD is that it has been specifically devised to be multilingual since it is based on the Universal Dependencies framework. In the second part of the paper, we demonstrate the effectiveness of these features in a number of theoretical and applicative studies in which they were successfully used for text and author profiling.}, KEYWORDS = {Computational Language Variation Analysis, Linguistic Profiling, Universal Dependencies}, PAGES = {7145-7151}, URL = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.883.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-34-4}, CONFERENCE_NAME = {Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_DATE = {11-16/05/2020}, } @INPROCEEDINGS{CALAMAI_2020_INPROCEEDINGS_CPMSBB_466823, AUTHOR = {Calamai, S. and Pretto, N. and Monachini, M. and Stamuli, M. F. and Bianchi, S. and Bonazzoli, P.}, TITLE = {Building a Home for Italian Audio Archives}, YEAR = {2020}, ABSTRACT = {Audio and audiovisual archives are at the crossroads of different fields of knowledge, yet they require common solutions for both their long-term preservation and their description, availability, use and reuse. Archivio Vi.Vo. is an Italian project financed by the Tuscany Region, aiming to (i) explore methods for long-term preservation and secure access to oral sources and (ii) develop an infrastructure under the CLARIN-IT umbrella offering several services for scholars from different domains interested in oral sources. This paper describes the project's infrastructure and its methodology through a case study on the Caterina Bueno's audio archive.}, KEYWORDS = {oral archives, infrastructures}, PAGES = {112-116}, URL = {https://publications.cnr.it/doc/466823}, CONFERENCE_NAME = {CLARIN2020 Annual Conference Proceedings ISSN 2773-2177 (online)}, CONFERENCE_PLACE = {virtual}, CONFERENCE_DATE = {5/10/2020-7/10/2020}, } @INPROCEEDINGS{DEMATTEI_2020_INPROCEEDINGS_DDIMPR_442042, AUTHOR = {De Mattei, L. and De Martino, G. and Iovine, A. and Miaschi, A. and Polignano, M. and Rambelli, G.}, TITLE = {ATE ABSITA@ EVALITA2020: Overview of the Aspect Term Extraction and Aspect-based Sentiment Analysis Task}, YEAR = {2020}, ABSTRACT = {Over the last years, the rise of novel sentiment analysis techniques to assess aspect-based opinions on product reviews has become a key component for providing valuable insights to both consumers and businesses. To this extent, we propose ATE\_ABSITA: the EVALITA 2020 shared task on Aspect Term Extraction and Aspect-Based Sentiment Analysis. In particular, we approach the task as a cascade of three subtasks: Aspect Term Extraction (ATE), Aspect-based Sentiment Analysis (ABSA) and Sentiment Analysis (SA). Therefore, we invited participants to submit systems designed to automatically identify the "aspect terms" in each review and to predict the sentiment expressed for each aspect, along with the sentiment of the entire review. The task received broad interest, with 27 teams registered and more than 45 participants. However, only three teams submitted their working systems. The results obtained underline the task's difficulty, but they also show how it is possible to deal with it using innovative approaches and models. Indeed, two of them are based on large pre-trained language models as typical in the current state of the art for the English language.}, KEYWORDS = {nlp, sentiment analysis, shared task}, URL = {http://ceur-ws.org/Vol-2765/paper153.pdf}, CONFERENCE_NAME = {Seventh Evaluation Campaign of Natural Language Processing and Speech Tools for Italian (EVALITA)}, CONFERENCE_DATE = {17/12/2020}, } @INPROCEEDINGS{DELGRATTA_2020_INPROCEEDINGS_DBBS_462341, AUTHOR = {Del Gratta, R. and Boschetti, F. and Bambaci, L. and Sarnari, F.}, TITLE = {Approaching document analysis with a formal model}, YEAR = {2020}, ABSTRACT = {We introduce a formal approach to document and text analysis. The method proposed herein results in a mathematical and physical model/framework which can formalize different challenges in research fields such as computational linguistics, digital philology, and software engineering, principally if applied to document and text analysis. We examine texts and documents from an evolutionary perspective, where both corruption and correction are involved. We describe document evolution via fiber bundles formalism.}, KEYWORDS = {Formal model, document analysis, evolutionary approach}, PAGES = {208-214}, URL = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=\&arnumber=9357202\&tag=1}, DOI = {10.1109/CIST49399.2021.9357202}, CONFERENCE_NAME = {6th International IEEE Colloquium on Information Science and Technology}, CONFERENCE_PLACE = {Agadir, Morocco}, CONFERENCE_DATE = {5-12/06/2021}, } @INPROCEEDINGS{DELGROSSO_2020_INPROCEEDINGS_DFENT_439862, AUTHOR = {Del Grosso, A. M. and Fassi, F. D. and El Mohajir, M. and Nahli, O. and Tonazzini, A.}, TITLE = {Digital safeguard of laminated historical manuscripts: the treatise "Poem in Rajaz on medicine" as a case study}, YEAR = {2020}, ABSTRACT = {In this paper, we analyze and discuss the characteristics of a system for the effective digital preservation and fruition of historical manuscripts degraded by the process of lamination. As a case study, we will make reference to the "Poem in Rajaz on medicine", written by Abubacer in the XII century, and conserved in the Al Quaraouiyine Library located in Fez, Morocco. The conceived system should have at least four main functionalities: image acquisition (i.e. digitization), image enhancement, text encoding, and linguistic analysis. Based on the evaluation of the manuscript damages, the acquisition set up should be designed in such a way to be able to avoid reflections as much as possible. Suitable digital image processing techniques should also be devised to correct the residual degradations and enhance the text for an easier legibility. Finally, semi-automatic transcription, scholarly encoding and linguistic analysis, to be performed on the virtually restored pages, should adapt existing tools to the specificity of the primary source writing system and language. The feasibility study for the realization of such a system is of general utility, in that it can provide guidelines for the digitization, the enhancement and the text encoding of the many laminated manuscripts conserved in other historical archives. On the other hand, from the cultural heritage point of view, the experimentation on the "Poem in Rajaz on medicine" could foster the systematic philological and ontological study of a unique piece of our documental heritage: the longest poem of medieval Islamic medical literature.}, KEYWORDS = {Cultural Heritage, Digital Safeguard, Historical Manuscript Digitization, Document Image Processing, Linguistic Analysis, Ontological Analysis}, PAGES = {192-197}, URL = {https://ieeexplore.ieee.org/document/9357192}, DOI = {10.1109/CiSt49399.2021.9357192}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-7281-6646-9}, CONFERENCE_NAME = {CiSt'2020-6th IEEE Congress on Information Science \& Technology}, CONFERENCE_PLACE = {Agadir-Essaouira, Morocco}, CONFERENCE_DATE = {June 5-12, 2021}, } @INPROCEEDINGS{DELGROSSO_2020_INPROCEEDINGS_DGM_427281, AUTHOR = {Del Grosso, A. M. and Giovannetti, E. and Marchi, S.}, TITLE = {Enriching a Multilingual Terminology Exploiting Parallel Texts: An Experiment on the Italian Translation of the Babylonian Talmud}, YEAR = {2020}, ABSTRACT = {Parallel texts can represent an extremely useful source of information in a number of text and linguistic processing tasks. In this work we show an experiment conducted on the Italian translation of the Babylonian Talmud, a text we have analyzed and processed to support in the construction of a multilingual Hebrew/Aramaic/Italian terminological resource. The approach we adopted comprised: i) the TEI encoding of the text, ii) the automatic extraction of the Italian terms, iii) the addition of Hebrew/Aramaic terms via word-by-word alignment, iv) the revision of the obtained results.}, KEYWORDS = {multilingual terminology, parallel text, text alignment, babylonian talmud}, PAGES = {119-124}, URL = {http://amsacta.unibo.it/6316/1/AIUCD_2020_volume_FINAL.pdf}, DOI = {10.6092/unibo/amsacta/6316}, ISBN = {978-88-942535-4-2}, CONFERENCE_NAME = {IX Convegno Annuale AIUCD}, CONFERENCE_PLACE = {Milano: Università Cattolica del Sacro Cuore}, CONFERENCE_DATE = {15-17/01/2020}, } @INPROCEEDINGS{DIDONATO_2020_INPROCEEDINGS_DMEPMD_425644, AUTHOR = {Di Donato, F. and Monachini, M. and Eskevich, M. and Pohle, S. and Moranville, Y. and Dumouchel, S.}, TITLE = {Social Sciences and Humanities Pathway. Towards the European Open Science Cloud}, YEAR = {2020}, ABSTRACT = {The paper describes a journey which starts from various social sciences and humanities (SSH) Research Infrastructures (RI) in Europe and arrives at the comprehensive "ecosystem of infrastructures", namely the European Open Science Cloud (EOSC). We highlight how the SSH Open Science infrastructures contribute to the goal of establishing the EOSC. First, through the example of OPERAS, the European Research Infrastructure for Open Scholarly Communication in the SSH, to see how its services are conceived to be part of the EOSC and to address the communities' needs. The next two sections highlight collaboration practices between partners in Europe to build the SSH component of the EOSC and a SSH discovery platform, as a service of OPERAS and the EOSC. The last two sections focus on an implementation network dedicated to SSH data fairification.}, KEYWORDS = {EOSC, Open science, SSH, Infrastructures}, PAGES = {5-9}, URL = {https://www.aclweb.org/anthology/2020.lr4sshoc-1.2.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-43-6}, CONFERENCE_NAME = {Language Resources and Evaluation Conference (LREC 2020)}, CONFERENCE_PLACE = {Marseille}, CONFERENCE_DATE = {11-16/05/2020}, BOOKTITLE = {Proceedings of the Workshop about Language Resources for the SSH Cloud}, EDITOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, } @INPROCEEDINGS{DUVAL_2020_INPROCEEDINGS_DG_442467, AUTHOR = {Duval, F. and Guadagnini, E.}, TITLE = {La rappresentazione lessicale del teatro antico nel Medioevo francese e italiano: per una lessicologia storica tra "transferts culturels" e comparatismo}, YEAR = {2020}, ABSTRACT = {Studio della ricezione italiana e francese medievale del lessico e dei concetti del teatro classico.}, KEYWORDS = {Lessicologia, teatro}, PAGES = {21-44}, URL = {http://www.brepols.net/Pages/ShowProduct.aspx?prod_id=IS-9782503587714-1}, VOLUME = {8}, PUBLISHER = {Brepols (Turnhout, BEL)}, ISBN = {9782503587714}, CONFERENCE_NAME = {Transferts culturels franco-italiens au Moyen Âge, Colloque organisé par la «Società italiana di Filologia romanza», la «Société de langues et littératures médiévales d'oc et d'oïl» et la «Société de Linguistique romane», sous le haut patronage de l'Académie des Inscriptions et Belles-Lettres et de la Fondation Primoli}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {20-22 septembre 2018}, BOOKTITLE = {Transferts culturels franco-italiens au Moyen Âge-Trasferimenti culturali italo francesi}, EDITOR = {Galderisi, C. and Antonelli, R. and Punzi, A. and Ducos, J.}, } @INPROCEEDINGS{FERRO_2020_INPROCEEDINGS_FGC_441873, AUTHOR = {Ferro, M. and Giulivi, S. and Cappa, C.}, TITLE = {The AEREST reading database}, YEAR = {2020}, ABSTRACT = {Aerest is a reading assessment protocol for the concurrent evaluation of a child's decoding and comprehension skills. Reading data complying with the Aerest protocol were automatically collected and structured with the ReadLet web-based platform in a pilot study, to form the Aerest Reading Database. The content, structure and potential of the database are described here, together with the main directions of current and future developments.}, KEYWORDS = {reading database, reading efficiency, decoding, comprehension, multimodal analysis}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85097912116\&origin=inward}, VOLUME = {2769}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {9791280136282}, CONFERENCE_NAME = {7th Italian Conference on Computational Linguistics (CLIC-IT'20)}, CONFERENCE_PLACE = {Bologna, Italy}, CONFERENCE_DATE = {01-03/03/2021}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{LENCI_2020_INPROCEEDINGS_LMBDDDDMPPVL_435958, AUTHOR = {Lenci, A. and Montemagni, S. and Boschetti, F. and De Felice, I. and Dei Rossi, S. and Dell'Orletta, F. and Di Giorgio, M. and Miliani, M. and Passaro, L. C. and Puddu, A. and Venturi, G. and Labanca, N.}, TITLE = {Voices of the Great War: A Richly Annotated Corpus of Italian Texts on the First World War}, YEAR = {2020}, ABSTRACT = {Voci della Grande Guerra ("Voices of the Great War") is the first large corpus of Italian historical texts dating back to the period of First World War. This corpus differs from other existing resources in several respects. First, from the linguistic point of view it gives account of the wide range of varieties in which Italian was articulated in that period, namely from a diastratic (educated vs. uneducated writers), diaphasic (low/informal vs. high/formal registers) and diatopic (regional varieties, dialects) points of view. From the historical perspective, through a collection of texts belonging to different genres it represents different views on the war and the various styles of narrating war events and experiences. The final corpus is balanced along various dimensions, corresponding to the textual genre, the language variety used, the author type and the typology of conveyed contents. The corpus is annotated with lemmas, part-of-speech, terminology, and named entities. Significant corpus samples representative of the different "voices" have also been enriched with meta-linguistic and syntactic information. The layer of syntactic annotation forms the first nucleus of an Italian historical treebank complying with the Universal Dependencies standard. The paper illustrates the final resource, the methodology and tools used to build it, and the Web Interface for navigating it.}, KEYWORDS = {Historical Corpora, Linguistic and Meta-linguistic Annotation, Information Extraction}, PAGES = {911-918}, URL = {https://www.aclweb.org/anthology/2020.lrec-1.114.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-34-4}, CONFERENCE_NAME = {Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_DATE = {11-16/05/2020}, } @INPROCEEDINGS{MARZI_2020_INPROCEEDINGS_MRNTP_438979, AUTHOR = {Marzi, C. and Rodella, A. and Nadalini, A. and Taxitari, L. and Pirrelli, V.}, TITLE = {Does finger-tracking point to child reading strategies?}, YEAR = {2020}, ABSTRACT = {The movement of a child's index finger that points to a printed text while (s)he is reading may provide a proxy for the child's eye movements and attention focus. We validated this correlation by showing a quantitative analysis of patterns of "finger-tracking" of Italian early graders engaged in reading a text displayed on a tablet. A web application interfaced with the tablet monitors the reading behaviour by modelling the way the child points to the text while reading. The analysis found significant developmental trends in reading strategies, marking an interesting contrast between typically developing and atypically developing readers.}, KEYWORDS = {reading assessment, reading strategies, mobile technology, special educiation needs}, PAGES = {1-7}, URL = {http://ceur-ws.org/Vol-2769/paper_60.pdf}, VOLUME = {vol-2769}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Italian Conference on Computational Linguistics 2020}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {1-3/03/2021}, BOOKTITLE = {Proceedings of the Seventh Italian Conference on Computational Linguistics}, EDITOR = {Monti, J. and Dell'Orletta, F. and Tamburini, F.}, } @INPROCEEDINGS{MIASCHI_2020_INPROCEEDINGS_MABDV_442040, AUTHOR = {Miaschi, A. and Alzetta, C. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Is Neural Language Model Perplexity Related to Readability?}, YEAR = {2020}, ABSTRACT = {This paper explores the relationship between Neural Language Model (NLM) perplexity and sentence readability. Starting from the evidence that NLMs implicitly acquire sophisticated linguistic knowledge from a huge amount of training data, our goal is to investigate whether perplexity is affected by linguistic features used to automatically assess sentence readability and if there is a correlation between the two metrics. Our findings suggest that this correlation is actually quite weak and the two metrics are affected by different linguistic phenomena.}, KEYWORDS = {nlp, neural language models, readability}, URL = {http://ceur-ws.org/Vol-2769/paper_57.pdf}, ISBN = {979-12-80136-28-2}, CONFERENCE_NAME = {Seventh Italian Conference on Computational Linguistics}, CONFERENCE_DATE = {01-03/03/2021}, } @INPROCEEDINGS{MIASCHI_2020_INPROCEEDINGS_MBDV_438491, AUTHOR = {Miaschi, A. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Linguistic Profiling of a Neural Language Model}, YEAR = {2020}, ABSTRACT = {In this paper we investigate the linguistic knowledge learned by a Neural Language Model (NLM) before and after a fine-tuning process and how this knowledge affects its predictions during several classification problems. We use a wide set of probing tasks, each of which corresponds to a distinct sentence-level feature extracted from different levels of linguistic annotation. We show that BERT is able to encode a wide range of linguistic characteristics, but it tends to lose this information when trained on specific downstream tasks. We also find that BERT's capacity to encode different kind of linguistic properties has a positive influence on its predictions: the more it stores readable linguistic information of a sentence, the higher will be its capacity of predicting the expected label assigned to that sentence.}, KEYWORDS = {Linguistic Profiling, Neural Language Model, Interpretability}, PAGES = {745-756}, URL = {https://www.aclweb.org/anthology/2020.coling-main.65/}, ISBN = {978-1-952148-27-9}, CONFERENCE_NAME = {International Conference on Computational Linguistics (COLING)}, CONFERENCE_PLACE = {Online}, CONFERENCE_DATE = {8-13/12/2020}, } @INPROCEEDINGS{MIASCHI_2020_INPROCEEDINGS_MDBDSSV_435969, AUTHOR = {Miaschi, A. and Davidson, S. and Brunato, D. and Dell'Orletta, F. and Sagae, K. and Sanchez Gutierrez, C. H. and Venturi, G.}, TITLE = {Tracking the Evolution of Written Language Competence in L2 Spanish Learners}, YEAR = {2020}, ABSTRACT = {In this paper we present an NLP-based approach for tracking the evolution of written language competence in L2 Spanish learners using a wide range of linguistic features automatically extracted from students' written productions. Beyond reporting classification results for different scenarios, we explore the connection between the most predictive features and the teaching curriculum, finding that our set of linguistic features often reflects the explicit instruction that students receive during each course.}, KEYWORDS = {Evolution of Language Competence, Natural Language Processing, Linguistic Profiling}, PAGES = {92-101}, URL = {https://www.aclweb.org/anthology/2020.bea-1.9.pdf}, DOI = {10.18653/v1/W16-05}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-941643-83-9}, CONFERENCE_NAME = {15th Workshop on Innovative Use of NLP for Building Educational Applications}, CONFERENCE_DATE = {10/07/2020}, } @INPROCEEDINGS{MIASCHI_2020_INPROCEEDINGS_MD_442036, AUTHOR = {Miaschi, A. and Dell'Orletta, F.}, TITLE = {Contextual and Non-Contextual Word Embeddings: an in-depth Linguistic Investigation}, YEAR = {2020}, ABSTRACT = {In this paper we present a comparison between the linguistic knowledge encoded in the internal representations of a contextual Language Model (BERT) and a contextual-independent one (Word2vec). We use a wide set of probing tasks, each of which corresponds to a distinct sentence-level feature extracted from different levels of linguistic annotation. We show that, although BERT is capable of understanding the full context of each word in an input sequence, the implicit knowledge encoded in its aggregated sentence representations is still comparable to that of a contextual-independent model. We also find that BERT is able to encode sentence-level properties even within single-word embeddings, obtaining comparable or even superior results than those obtained with sentence representations.}, KEYWORDS = {nlp, interpretability, representation learning}, PAGES = {110-119}, URL = {https://www.aclweb.org/anthology/2020.repl4nlp-1.15}, DOI = {10.18653/v1/2020.repl4nlp-1.15}, ISBN = {978-1-952148-15-6}, CONFERENCE_NAME = {5th Workshop on Representation Learning for NLP}, CONFERENCE_DATE = {09/07/2020}, } @INPROCEEDINGS{MIASCHI_2020_INPROCEEDINGS_MSBDV_442038, AUTHOR = {Miaschi, A. and Sarti, G. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Italian Transformers Under the Linguistic Lens}, YEAR = {2020}, ABSTRACT = {In this paper we present an in-depth investigation of the linguistic knowledge encoded by the transformer models currently available for the Italian language. In particular, we investigate whether and how using different architectures of probing models affects the performance of Italian transformers in encoding a wide spectrum of linguistic features. Moreover, we explore how this implicit knowledge varies according to different textual genres.}, KEYWORDS = {nlp, neural language models, interpretability}, URL = {http://ceur-ws.org/Vol-2769/paper_56.pdf}, ISBN = {979-12-80136-28-2}, CONFERENCE_NAME = {Seventh Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_DATE = {01-03/03/2021}, } @INPROCEEDINGS{NAHLI_2020_INPROCEEDINGS_ND_439789, AUTHOR = {Nahli, O. and Del Grosso, A. M.}, TITLE = {Creating Arabic Lexical Resources in TEI; A Schema for Discontinuous Morphology Encoding}, YEAR = {2020}, ABSTRACT = {This article aims at formally grouping lexical and morphological information in order to obtain an electronic resource with respect to the Arabic language starting from the classical dictionary al=q?m?s al=mu???. This contribution examines practical aspects about the adoption of the guidelines provided by the Text Encoding Initiative (TEI) to encode the Arabic dictionary as a primary source. Moreover, the contribution points out a possible way to integrate semantic, morphological and syntactic information characterizing word patterns within the same TEI document. Specifically, the formalization of word patterns allows us to emphasize additional morphosyntactic regularities mainly concerning word distribution within sentences. Consequently, the obtained digital object represents both the medieval Arabic dictionary and a suitable resource that can be exploited for a number of Natural Language Processing tasks.}, KEYWORDS = {classical Arabic dictionary, digital lexicography, al=q?m?s al=mu???, word patterns, TEI}, PAGES = {9}, URL = {https://publications.cnr.it/doc/439789}, DOI = {10.1109/CiSt49399.2021.9357273}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-7281-6646-9}, CONFERENCE_NAME = {IEEE-CIST2020 DPWH}, CONFERENCE_PLACE = {Agadir-Essaouira, Morocco}, CONFERENCE_DATE = {5/06/2021-12/06/2021}, } @INPROCEEDINGS{RICCI_2020_INPROCEEDINGS_RMBSD_430888, AUTHOR = {Ricci, L. and Melighetti, F. and Boschetti, F. and Salvatori, E. and Del Grosso, A. M.}, TITLE = {DH as an Ideal Educational Environment: the Ethnographic Museum of La Spezia}, YEAR = {2020}, ABSTRACT = {The authors present the outcomes of an educational experimentation that took place in the academic year 2018-2019 at the degree course in Informatica Umanistica at the University of Pisa. The first objective of the project concerned the digitization of a corpus of postcards from the period of the First World War owned by the ethnographic Museum of La Spezia "G. Podenzana". The aims of the work are not only the historical study of the corpus, but also the organization of a public history project with the Museum.}, KEYWORDS = {digital humanities}, PAGES = {222-226}, URL = {http://amsacta.unibo.it/id/eprint/6316}, DOI = {10.6092/unibo/amsacta/6316}, ISBN = {978-88-942535-4-2}, CONFERENCE_NAME = {La svolta inevitabile: sfide e prospettive per l'Informatica Umanistica}, CONFERENCE_PLACE = {Milano}, CONFERENCE_DATE = {15/01/2020-17/01/2020}, BOOKTITLE = {Quaderni di Umanistica Digitale}, EDITOR = {Marras, C. and Passarotti, M. and Franzini, G. and Litta, E.}, } @INPROCEEDINGS{SANNA_2020_INPROCEEDINGS_SCBN_439796, AUTHOR = {Sanna, A. and Cinerari, R. and Boschetti, F. and Nahli, O.}, TITLE = {Digitizing and Encoding a Multilingual Literary Review: Commerce Numerique}, YEAR = {2020}, ABSTRACT = {Commerce was an important literary review founded in Paris by Princess Margherita Caetani, Prince Roffredo Caetani's wife. Born in America, she was polyglot and maecenas. Between 1924 and 1932 she surrounded herself with three prestigious collaborators: Paul Valéry, Léon-Paul Fargue, Valéry Larbaud. The review promoted the translation of World and European literature in French, translating some of the most important authors like Joyce, T.S. Eliot, Pirandello, Ungaretti, Saint-John Perse, Rilke, Hofmannsthal. The aim of this project is to promote by digitizing the dissemination of the review, to develop studies and research concerning the Caetani family's cultural activities in Europe. All the volumes of the literary review Commerce have been scanned, acquired by OCR and encoded in TEI-XML. The cultural value of the operation is discussed and the work-flow to create the digital textual corpus is described in detail.}, KEYWORDS = {Review Commerce, OCR, TEI encoding, literary review, digital resources}, PAGES = {4}, URL = {https://publications.cnr.it/doc/439796}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-7281-6646-9}, CONFERENCE_NAME = {IEEE-CIST2020 DPWH}, CONFERENCE_PLACE = {Agadir-Essaouira, Morocco}, CONFERENCE_DATE = {5/06/2021-12/06/2021}, } @INPROCEEDINGS{SASSOLINI_2020_INPROCEEDINGS_SB_455300, AUTHOR = {Sassolini, E. and Biffi, M.}, TITLE = {Strategie e metodi per il recupero di dizionari storici}, YEAR = {2020}, ABSTRACT = {L'articolo descrive un approccio sperimentale all'estrazione, da formato digitale non standard, della completa struttura delle entrate lessicali del Grande Dizionario storico della Lingua Italiana (GDLI) di S. Battaglia. Sono riportati i risultati preliminari di una collaborazione tra l'Accademia della Crusca e Istituto di Linguistica Computazionale "A. Zampolli" del CNR, che mira a convertire i contenuti testuali in dati digitali strutturati per offrirli alla consultazione e allo studio degli utenti e/o per la successiva integrazione con altre risorse linguistiche, sia dizionari che corpora. Il processo di estrazione si articola da un lato nella definizione di procedure di estrazione dei dati, dall'altro nell'adozione di strategie finalizzate al supporto alla correzione degli errori.}, KEYWORDS = {Archivi digitali, recupero e conservazione, estrazione dell'informazione}, PAGES = {235-239}, URL = {https://publications.cnr.it/doc/455300}, DOI = {10.6092/unibo/amsacta/6316}, ISBN = {978-88-942535-4-2}, CONFERENCE_NAME = {IX Convegno annuale AIUCD: LA SVOLTA INEVITABILE: SFIDE E PROSPETTIVE PER L'INFORMATICA UMANISTICA}, CONFERENCE_PLACE = {Università Cattolica del Sacro Cuore, Milano}, CONFERENCE_DATE = {15-17/01/2020}, } @INPROCEEDINGS{BIANCHI_2020_INPROCEEDINGS_BCMPS_466816, AUTHOR = {Bianchi, S. and Calamai, S. and Monachini, M. and Pretto, N. and Stamuli, M. F.}, TITLE = {The grey-side of audio archives}, YEAR = {2020}, ABSTRACT = {Archives often include documents that can hardly be considered publications or grey literature as such, yet they maintain their documentary value and play a role of primary sources for the specialists. These documents, indeed, can help archivists to reveal the sedimentation process of the archive itself and to preserve the authentic context of the documentary production. They also appear to be very useful for the community of researchers and scholars. This happens more frequently with oral archives which include 'non-conventional sources', thus bringing together audio documents, fieldworks notes, correspondence, slipcases, analogic compact cassettes or open reels. At the cross-road of two disciplines, Archival Science and Grey Literature, this paper aims to argue the applicability of the concept of grey literature to this wide range of documentary materials, by showing the experience of Archivio Vi.Vo, a regional project aiming at building a model for archiving, preserving, managing and disseminating audio documents.}, KEYWORDS = {archivi orali}, URL = {https://publications.cnr.it/doc/466816}, VOLUME = {2020-November}, PUBLISHER = {TransAtlantic (Amsterdam, Paesi Bassi)}, ISSN = {1386-2316}, CONFERENCE_NAME = {GL2020 22nd International Conference on Grey Literature}, CONFERENCE_DATE = {20/11/2020}, BOOKTITLE = {The GL-conference series. Conference proceedings}, } @INPROCEEDINGS{BOSCHETTI_2020_INPROCEEDINGS_BDM_416444, AUTHOR = {Boschetti, F. and Del Gratta, R. and Monachini, M.}, TITLE = {Latin digital archives and research infrastructures: just a trendy option or a substantive need?}, YEAR = {2020}, ABSTRACT = {Latin digital archives and research infrastructures: just a trendy option or a substantive need?}, KEYWORDS = {Research Infrastructure, Digital Libraries, Latin}, URL = {https://publications.cnr.it/doc/416444}, CONFERENCE_NAME = {ALIM and beyond}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {27-28/01/2020}, } @INPROCEEDINGS{DIDONATO_2020_INPROCEEDINGS_DP_437795, AUTHOR = {Di Donato, F. and Pohle, S.}, TITLE = {GOTRIPLE: Building an innovative discovery platform for the social sciences and humanities}, YEAR = {2020}, ABSTRACT = {The proposed poster presents the goals of the TRIPLE project and the ways the project is addressing them both through the work of its 8 intertwined work packages, and via the collaboration with existing research infrastructures in SSH, i.e. mainly CESSDA, CLARIN and DARIAH.}, KEYWORDS = {GOTRIPLE, discovery, EOSC}, URL = {https://www.clarin.eu/sites/default/files/clarin2020_bazaar_didonato_triple.pdf}, CONFERENCE_NAME = {Clarin Annual Conference 2020}, CONFERENCE_DATE = {05/10/2020}, } @INPROCEEDINGS{FRONTINI_2020_INPROCEEDINGS_F_437563, AUTHOR = {Frontini, F.}, TITLE = {Dans les coulisses des infrastructures européennes en SHS. Rôle et opportunités pour les acteurs de la recherche (ingénieurs et chercheurs)}, YEAR = {2020}, ABSTRACT = {La composante technologique prend une dimension de jour en jour plus importante en LLASHS. Les projets de recherche sont de plus en plus nombreux à mobiliser de gros volumes de données exigeant des services adaptés garants de formes de méthodologies augmentées (exploitation, interopérabilité, accessibilité, archivage). Afin de partager les savoirs et de garantir l'interopérabilité et la préservation à long terme de ces ressources et services, de grandes infrastructures informatiques se mettent en place aux niveaux national et international. Dans cette présentation, vous allez découvrir le panorama, en la matière, des e-infrastructures et des grands projets européens à caractère infrastructurel, avec un accent particulier sur les technologies utilisées, les principaux services offerts, et les aspects les plus intéressants en termes de synergie entre approches et disciplines différentes. La présentation portera sur des ERICs (European Research Infrastructure Consortium) établis, comme CLARIN et DARIAH, et sur des projets récents ou en cours de développement, comme PARTHENOS, SSHOC, ELEXIS et TRIPLE. Concernant les aspects techniques, on abordera les questions liées au dépôt, au stockage, à l'identification (sigle sign on), aux formats et choix des métadonnées et de modélisation formelle, à la recherche fédérée des sources. Nous soulignerons en particulier l'interaction de ces projets avec les infrastructures nationales, notamment Huma-Num, ainsi qu'avec la récemment constituée European Open Science Cloud (EOSC). La présentation aura une visée pratique, avec l'objectif de fournir des indications concrètes aux acteurs de la recherche (chercheurs, ingénieurs...) qui souhaitent participer à ces initiatives et aux groupes de travail qui les animent, ou plus largement favoriser l'accès des chercheurs français aux nombreux services et opportunités offerts.}, KEYWORDS = {Infrastrutture di ricerca, Scienze umane e sociali}, URL = {https://ja-mate2020.sciencesconf.org/data/pages/Resume_Frontini_Nov.pdf}, CONFERENCE_NAME = {Journées annuelles du réseau Mate-shs (JA2020)}, CONFERENCE_PLACE = {Montpellier}, CONFERENCE_DATE = {10/11/2020}, } @INPROCEEDINGS{PIRRELLI_2020_INPROCEEDINGS_PCCDFGMNT_442758, AUTHOR = {Pirrelli, V. and Cappa, C. and Crepaldi, D. and Del Pinto, V. and Ferro, M. and Giulivi, S. and Marzi, C. and Nadalini, A. and Taxitari, L.}, TITLE = {Tracking the pace of reading with finger movements}, YEAR = {2020}, ABSTRACT = {Recent experimental evidence in visual perception analysis shows that eye and finger movements strongly correlate during scene exploration, at both individual and group levels. A familiar context which exploits this synergistic behaviour is when children learn to read, with the practice of finger-pointing to text as a support for their attention focus, directional movement and voice-print match. Using a tablet to display short texts, we collected evidence on the finger-pointing behaviour of 3rd-6th Italian graders engaged in both silent and oral reading. "Finger-tracking" data, sampled by the tablet and aligned with the text, made it possible to time a child's reading paceat word and sentence level. Results are shown to replicate established benchmarks in the reading literature, such as the difference in reading pace between age-matched typical and atypical readers as a function of word frequency and length, and neighbourhood entropy and Old20. Atypical readers show increasing difficulty with longer words, with a steeper time increment for word length > 6, integrating previous evidence. In addition, neighbourhood density plays a sparse facilitative role in atypical reading, with no significant interaction with neighbourhood entropy, pointing to a non trivial developmental interplay between sublexical reading and the richness of the Italian orthographic-phonological lexicon. Despite their different dynamics, optical and tactile strategies for text exploration prove to be highly congruent: this suggests that finger-tracking can be used as an ecological proxy for eye-tracking in reading assessment.}, KEYWORDS = {Reading, Finger tracking, Mental Lexicon, Word frequency, Word Length, Neighbourhood entropy}, PAGES = {1}, URL = {https://osf.io/hr62g/}, CONFERENCE_NAME = {Words in the World International Conference}, CONFERENCE_PLACE = {Montreal (Canada)}, CONFERENCE_DATE = {16-18/10/2020}, } @TECHREPORT{BARTOLINI_2020_TECHREPORT_BQMA_453502, AUTHOR = {Bartolini, R. and Quochi, V. and Monachini, M. and Affé, F.}, TITLE = {Relazione di fine progetto "PIM-Piattaforma Integrata Monitoraggio"}, YEAR = {2020}, ABSTRACT = {Il documento presenta l'attività svolta dal CNR-ILC nel ruolo di subcontraente di COMDATA per la realizzazione di moduli di trattamento automatico del linguaggio e la consulenza per l'integrazione di metodi di clustering automatico di documenti nella Digital Library del progetto PIM.}, KEYWORDS = {accesso intelligente al testo, digital library, natural language processing}, PAGES = {156}, URL = {https://publications.cnr.it/doc/453502}, } @TECHREPORT{CARDILLO_2020_TECHREPORT_CS_428576, AUTHOR = {Cardillo, F. A. and Straccia, U.}, TITLE = {Fuzzy OWL-BOOST: Learning Fuzzy Concept Inclusions via Real-Valued Boosting}, YEAR = {2020}, ABSTRACT = {OWL ontologies are nowadays a quite popular way to describe structured knowledge in terms of classes, relations among classes and class instances. In this paper, given a target class T of an OWL ontology, we address the problem of learning fuzzy concept inclusion axioms that describe sufficient conditions for being an individual instance of T. To do so, we present Fuzzy OWL-BOOST that relies on the Real AdaBoost boosting algorithm adapted to the (fuzzy) OWL case. We illustrate its effectiveness by means of an experimentation. An interesting feature is that the learned rules can be represented directly into Fuzzy OWL 2. As a consequence, any Fuzzy OWL 2 reasoner can then be used to automatically determine/classify (and to which degree) whether an individual belongs to the target class T.}, KEYWORDS = {Fuzzy Logic, Description Logics, OWL 2, Machine Learning, AdaBoost}, PAGES = {1-26}, URL = {https://arxiv.org/abs/2008.05297}, } @TECHREPORT{DIDONATO_2020_TECHREPORT_DLBCDEGGMMOTT_437796, AUTHOR = {Di Donato, F. and Lombardo, T. and Breitfuss, G. and Chen, Y. and Dumouchel, S. and Eskevich, M. and Gingold, A. and Gorgaini, E. and Monachini, M. and Moranville, Y. and Ocansey, J. T. and Thiel, C. and Tóth Czifra, E.}, TITLE = {TRIPLE D 6. 1-Report on the General Interoperability Requirements}, YEAR = {2020}, ABSTRACT = {TRIPLE - Transforming Research Through Innovative Practices for Linked Interdisciplinary Exploration is a EU funded project under the INFRAEOSC-02-2019 - Prototyping new innovative services topic, which started in October 2019 and will end in March 2023. Its main objective is to design and develop a discovery platform for SSH, called GOTRIPLE. This deliverable is the main outcome of Task 6.1 which started at M4 at ends at M36, whose aim is to deal with the definition and the set-up of general TRIPLE's interoperability requirements, considering all the components which are composing the TRIPLE ecosystem (data, resources and tools). As preliminary results, we thus present here a general overview of the main EOSC interoperability requirements and specifications, both coming from a mapping of the EOSC Working Groups outputs, and of the most relevant results of EOSC related projects related to TRIPLE. We also attempt to provide TRIPLE's answers, proposals and solutions to the above mentioned requirements. The final picture presents different levels of precision, which depends on the fluidity of the EOSC definition on the one hand, and on the consequent fact that some implications are still unclear, and a discussion on the measures to address EOSC requirements is still on-going. While tackling interoperability, we introduce TRIPLE in its context in order to locate the GOTRIPLE platform in the EOSC frame, and more specifically in the SSH cluster of the EOSC (section 1). Section 2 defines the general interoperability requirements, starting with the software (2.2) and then presents an analysis of the main outputs released by the EOSC Working groups (2.3), taking into consideration as a general reference, the EOSC Interoperability Framework, and, more specifically, the FAIR and Architecture WGs documents (2.3.2, 2.3.4). These are the main guiding references for the design and realization of the EOSC, considering specifically interoperability. Section 2.3.3 illustrates how TRIPLE is translating into practice the FAIR requirements, while section 2.3.5 is focused on TRIPLE current decisions regarding the integration of the TRIPLE solution into the EOSC. To present an enriched scenario, the section includes as well a brief overview of other relevant outputs released by the EOSC WGs (Landscape, RoP, Sustainability and Skills and Training) (2.3.6). With the aim to provide a comprehensive analysis of the EOSC interoperability requirements, the WP6 partners have analyzed relevant deliverables produced by the main EOSC related projects as preparatory activity. The analysis was useful to understand the EOSC environment and its evolution in terms of interoperability and at the same time to understand which external deliverables have to be taken into consideration for the overall project development in TRIPLE. Section 3 includes a synthesis of this work, which is fully presented in Annex I. Section 4 - Conclusions and Outlook, outlines TRIPLE's the next steps to achieve interoperability and the strategies that will be adopted.}, KEYWORDS = {TRIPLE, GOTRIPLE, EOSC}, URL = {https://zenodo.org/record/4322806}, } @MISC{BOSCHETTI_2020_MISC_BD_484370, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {Textual markup (typographic, structural, semantic: HTML, CSS, XML)}, YEAR = {2020}, ABSTRACT = {Lezione tenuta nel contesto dell Summer School organizzata nel 2020 dal centro Venice Centre for Digital and Public Humanities (VeDPH)}, KEYWORDS = {Digital Humanities, Public Humanities, XML/TEI, Digital Scholarly Edition, VeDPH}, URL = {https://vedph.github.io/summercamp/}, } @MISC{DELGROSSO_2020_MISC_D_484365, AUTHOR = {Del Grosso, A. M.}, TITLE = {Dalla Recensio all'Emendatio Digitale. Teoria, metodi e applicazioni della filologia digitale}, YEAR = {2020}, ABSTRACT = {Presentazione svolta nel contesto del ciclo di webinars di informatica umanistica a cura di G. Ferrante e A. Mazzucchi intitolato "La tecnologia informatica applicata alle scienze filologiche e librarie". Il seminario, inserito nel corso di Alta Formazione in Storia e Filologia del Manoscritto e del Libro Antico della Scuola Superiore Meridionale (Girolamini), introduce in 5 ore di lezione all'uso dei linguaggi di markup per la rappresentazione e conservazione digitale di risorse filologiche e di apparati critici.}, KEYWORDS = {Digital Humanities, Filologia Digitale, XML/TEI, Digital Scholarly Edition, Apparati Critici Digitali}, URL = {https://publications.cnr.it/doc/484365}, } @MISC{DELGROSSO_2020_MISC_D_484376, AUTHOR = {Del Grosso, A. M.}, TITLE = {Git per edizioni digitali collaborative su GitHub}, YEAR = {2020}, ABSTRACT = {This workshop will cover the basic functionalities of the software Git and the platform GitHub. Two days, each day divided into two parts of three hours each. Morning sessions: presentation of Git topics/features (theory and practice) Afternoon sessions: hands-on exercises (developing a dynamic and collaborative digital scholarly edition) Total hours for the workshop 12 hours. Objectives of the workshop: Learning the most relevant features regarding Git tool with references to GitHub platform. Git is a "distributed" Version Control System. It is able to handle the history of the changes made to tracked resources over a time period. Git allows users to manage the evolution of collaborative documents and to revisit and/or revert the content of these documents to a particular older state. GitHub is a development platform where it is possible to host and review Git repositories, and many more functionalities. First day topics: Introduction to systems for version control Preliminaries about command line environment Installing Git and testing the correctness of the local installation Git design model and the basic work-flow Local version control: git init, git status, git add, git commit, git log Local files: git rm, git mv Second day topics: Git help: git help and man pages Remote basics: GitHub, git remote, git push, git fetch, git pull Searching and examination: git diff, git grep Advanced log options Branching system: git branch, git checkout, git merge Handling the commit history: git rebase, git reset}, KEYWORDS = {git, github, Digital Scholarly Edition, Filologia Collaborativa, Filologia Digitale}, URL = {https://www.unive.it/data/33113/2/43767}, } @MISC{DELGROSSO_2020_MISC_DPCDFG_484385, AUTHOR = {Del Grosso, A. M. and Piccini, S. and Cosenza, G. and D'Ottavi, G. and Fadda, E. and Gambarara, D.}, TITLE = {Saussure General Linguistics Project: Beyond the Course. A Knowledge Site for Rethinking Saussure's General Linguistics}, YEAR = {2020}, ABSTRACT = {The project is primarily committed to establish a standard scholarly edition of Saussure's General Linguistics related texts, making use of a consistent philological methodology, i.e. aware of the profoundly different textualities of the available sources, that is: autograph unfinished notes for a book, didactic materials (students' notes and Saussure's), and an edited volume.}, KEYWORDS = {Digital Humaniteis, Digital Scholarly Edition, Saussure's manuscripts, Filologia Digitale, Filologia computazionale}, URL = {https://github.com/saussure-team/general-linguistics-project}, } @MISC{DELTURCO_2020_MISC_DCDDMMSZ_484347, AUTHOR = {Del Turco, R. R. and Cacioli, G. and Del Grosso, A. M. and Di Pietro, C. and Martignano, C. and Memaj, J. and Spinelli, F. and Zenzaro, S.}, TITLE = {EVT-Edition Visualization Technology 2 (v. beta 2)}, YEAR = {2020}, ABSTRACT = {EVT (Edition Visualization Technology) is a software for creating and browsing digital editions of manuscripts based on text encoded according to the TEI XML (http://www.tei-c.org/) schemas and Guidelines. This tool was born as part of the [Digital Vercelli Book] (http://vbd.humnet.unipi.it/) project in order to allow the creation of a digital edition of the Vercelli Book, a parchment codex of the late tenth century, now preserved in the Archivio e Biblioteca Capitolare of Vercelli and regarded as one of the four most important manuscripts of the Anglo-Saxon period as regards the transmission of poetic texts in the Old English language. To ensure that it will be working on all the most recent web browsers, and for as long as possible on the World Wide Web itself, EVT is built on open and standard web technologies such as HTML, CSS and JavaScript. Specific features, such as the image viewer, are entrusted to the most effective and stable ones (e.g. Openseadragon in the case of the image viewer), again chosen among the open source and best supported ones to reduce the risk of future incompatibilities. The general architecture of the software, in any case, is modular, so that any component which may cause trouble or turn out to be not completely up to the task can be replaced easily. This version is based on the AngularJS programming framework and implements the MVC (Model - View - Controller) design pattern.}, KEYWORDS = {edizioni scientifiche digitali, filologia digitale, critica testuale, filologia di testi medievali, edizioni diplomatiche, web-publishing, XML/TEI, HTML5, CSS, AngularJS, visualization software, manuscript viewer}, URL = {http://evt.labcd.unipi.it/}, } @ARTICLE{ALZETTA_2019_ARTICLE_ADMV_423880, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {INFERRING QUANTITATIVE TYPOLOGICAL TRENDS FROM MULTILINGUAL TREEBANKS. A CASE STUDY}, YEAR = {2019}, ABSTRACT = {In the past decades, linguistic typology went through a renewing phase that involved a significant change in the research questions and methods of the discipline, which is now interested in fine-grained features underlying language diversity. In this paper, we propose a novel approach to address the newly defined needs of linguistic typology by extracting qualitative and quantitative information about a wide range of features from multilingual annotated corpora based on Natural Language Processing methods and techniques. We tested our method in a case study focusing on word order variation in two widely investigated constructions, VERB-SUBJ(ect) and NOUN-ADJ(ective), with a specific view to structural and functional factors underlying the preference for one or the other order, both intra- and cross-linguistically, and their interaction. Preliminary experiments have been carried out aimed at acquiring typological evidence from a selection of linguistically annotated treebanks for three different languages, namely Italian, Spanish and English. Our results show the effectiveness of the method in letting similarities and differences also emerge from typologically close languages.}, KEYWORDS = {language typology, multilingual annotated corpora, linguistic knowledge extraction and modelling, word order variation}, PAGES = {209-242}, URL = {https://www.rivisteweb.it/doi/10.1418/95391}, VOLUME = {18}, DOI = {10.1418/95391}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{CHAVEZ_2019_ARTICLE_CRCZBB_398298, AUTHOR = {Chavez, A. G. and Ranieri, A. and Chiarella, D. and Zereik, E. and Babić, A. and Birk, A.}, TITLE = {CADDY Underwater Stereo-Vision Dataset for Human-Robot Interaction (HRI) in the Context of Diver Activities}, YEAR = {2019}, ABSTRACT = {In this article, we present a novel underwater dataset collected from several field trials within the EU FP7 project "Cognitive autonomous diving buddy (CADDY)", where an Autonomous Underwater Vehicle (AUV) was used to interact with divers and monitor their activities. To our knowledge, this is one of the first efforts to collect a large public dataset in underwater environments with the purpose of studying and boosting object classification, segmentation and human pose estimation tasks. The first part of the dataset contains stereo camera recordings (?10 K) of divers performing hand gestures to communicate with an AUV in different environmental conditions. The gestures can be used to test the robustness of visual detection and classification algorithms in underwater conditions, e.g., under color attenuation and light backscatter. The second part includes stereo footage (?12.7 K) of divers free-swimming in front of the AUV, along with synchronized measurements from Inertial Measurement Units (IMU) located throughout the diver's suit (DiverNet), which serve as ground-truth for human pose and tracking methods. In both cases, these rectified images allow the investigation of 3D representation and reasoning pipelines from low-texture targets commonly present in underwater scenarios. This work describes the recording platform, sensor calibration procedure plus the data format and the software utilities provided to use the dataset.}, KEYWORDS = {dataset, underwater imaging, image processing, marine robotics, field robotics, human-robot interaction, stereo vision, object classification, human pose estimation}, PAGES = {1-14}, URL = {https://www.mdpi.com/2077-1312/7/1/16}, VOLUME = {7}, DOI = {10.3390/jmse7010016}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2077-1312}, JOURNAL = {Journal of marine science and engineering}, } @ARTICLE{CININI_2019_ARTICLE_CCFFMMN_407448, AUTHOR = {Cinini, A. and Cutugno, P. and Ferraris, C. and Ferretti, M. and Marconi, L. and Morgavi, G. and Nerino, R.}, TITLE = {Final results of the NINFA project: impact of new technologies in the daily life of elderly people}, YEAR = {2019}, ABSTRACT = {Background:The paper presents the work carried out within NINFA (iNtelligent Integrated Network For Aged people), a project for the wellbeing of the elderly people at home. Aims:The impact of new technologies on elderly people is evaluated with respect to the three main topics faced by NINFA. Methods:NINFA was structured into three main topics: (1) active user engagement from the very beginning of the plan-ning stage: the use of specially designed questionnaires to evaluate the acceptability of new technology in general and robot caregiver specifically; (2) assessment of the well-being through non-invasive techniques: natural language processing for language change monitoring in elderly subjects; (3) automated assessment of motor and cognitive functions at home: systems to deliver tests and exergames through user interfaces compliant with elderly subjects. Results:The analysis shows that there is no a priori closure to support the technology, but it must not be invasive and must allow social interactions. The study of speech transcripts shows that a large variations in the number of words used to describe the same situation could be a sign on the onset of cognitive impairments. The specifically designed systems highlight, after the training period, significant improvements in the performances of the participants and a satisfaction with regards to the systems usability. Conclusions: The outcomes of NINFA project highlight some important aspects of the relationship between elderly people and new technologies concerning: engagement and acceptability, assessment of the wellbeing and of the modifications of motor, cognitive and language functions.}, KEYWORDS = {User Engagement, Wellbeing assessment, Linguistic and Cognitive Analysis, Movement Analysis, Exergames, At-Home monitoring, Postural stability}, PAGES = {1-10}, URL = {https://link.springer.com/content/pdf/10.1007/s40520-019-01357-6.pdf}, DOI = {10.1007/s40520-019-01357-6}, PUBLISHER = {Editrice Kurtis (Milano, Italia)}, ISSN = {1720-8319}, JOURNAL = {Aging Clinical and Experimental Research (Online)}, } @ARTICLE{CONNOR_2019_ARTICLE_CVCR_403045, AUTHOR = {Connor, R. and Vadicamo, L. and Cardillo, F. A. and Rabitti, F.}, TITLE = {Supermetric search}, YEAR = {2019}, ABSTRACT = {Metric search is concerned with the efficient evaluation of queries in metric spaces. In general, a large space of objects is arranged in such a way that, when a further object is presented as a query, those objects most similar to the query can be efficiently found. Most mechanisms rely upon the triangle inequality property of the metric governing the space. The triangle inequality property is equivalent to a finite embedding property, which states that any three points of the space can be isometrically embedded in two-dimensional Euclidean space. In this paper, we examine a class of semimetric space which is finitely four-embeddable in three-dimensional Euclidean space. In mathematics this property has been extensively studied and is generally known as the four-point property. All spaces with the four-point property are metric spaces, but they also have some stronger geometric guarantees. We coin the term supermetric(1) space as, in terms of metric search, they are significantly more tractable. Supermetric spaces include all those governed by Euclidean, Cosine,(2) Jensen-Shannon and Triangular distances, and are thus commonly used within many domains. In previous work we have given a generic mathematical basis for the supermetric property and shown how it can improve indexing performance for a given exact search structure. Here we present a full investigation into its use within a variety of different hyperplane partition indexing structures, and go on to show some more of its flexibility by examining a search structure whose partition and exclusion conditions are tailored, at each node, to suit the individual reference points and data set present there. Among the results given, we show a new best performance for exact search using a well-known benchmark. (C) 2018 Elsevier Ltd. All rights reserved.}, KEYWORDS = {Similarity search, Metric space, Supermetric space, Metric indexing, Four-point property, Hilbert Exclusion}, PAGES = {108-123}, URL = {https://www.sciencedirect.com/science/article/pii/S0306437917301588?via%3Dihub}, VOLUME = {80}, DOI = {10.1016/j.is.2018.01.002}, PUBLISHER = {Pergamon (Oxford, Regno Unito)}, ISSN = {0306-4379}, JOURNAL = {Information systems (Oxf.)}, } @ARTICLE{DELGROSSO_2019_ARTICLE_DCCDGMSS_427276, AUTHOR = {Del Grosso, A. M. and Capizzi, E. and Cristofaro, S. and De Luca, M. R. and Giovannetti, E. and Marchi, S. and Seminara, G. and Spampinato, D.}, TITLE = {Bellini's Correspondence: a Digital Scholarly Edition for a Multimedia Museum}, YEAR = {2019}, ABSTRACT = {Within the "Museo Virtuale della Musica BellinInRete" project, a corpus of letters, written by the renowned composer Vincenzo Bellini (1801-1835) from Catania, will be encoded and made publicly available. This contribution aims at illustrating the part of the project regarding the implementation of the prototype for the metadata and text encoding, indexing and visualisation of Bellini's correspondence. The encoding scheme has been defined according to the latest guidelines of the Text Encoding Initiative and it has been instantiated on a sample of letters. Contextually, a first environment has been implemented by customizing two open source tools: Edition Visualization Technology and Omega Scholarly platform. The main objective of the digital edition is to engage general public with the cultural heritage held by the Belliniano Civic Museum of Catania. This wide access to Bellini's correspondence has been conceived preserving the scholarly transcriptions of the letters edited by Seminara within her most recent critical edition (Olschki, 2017). The digital edition of the corpus takes care of handling the correspondence metadata by means of the correspDesc TEI tagset. Finally, Bellini's letters will be accessible via the Web platform as well as integrated into a forthcoming interactive and multimedia tour hosted at the museum.}, KEYWORDS = {digital scholarly edition, correspondence, Digital and Computational Philology, Software Design, Vincenzo Bellini, Music, Multimedia Museum}, PAGES = {23-47}, URL = {https://umanisticadigitale.unibo.it/article/view/9162/9918}, VOLUME = {7}, DOI = {10.6092/issn.2532-8816/9162}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @ARTICLE{DELLORLETTA_2019_ARTICLE_DGMMRSV_423874, AUTHOR = {Dell'Orletta, F. and Greco, S. and Montemagni, S. and Morini, E. and Rossi, F. and Sagri, M. T. and Venturi, G.}, TITLE = {Le parole del miglioramento. Come le scuole descrivono il cambiamento}, YEAR = {2019}, ABSTRACT = {Il presente contributo intende illustrare i risultati di una ricerca condotta con l'uso di strumenti di trattamento automatico del linguaggio (Natural Language Processing: nlp) su quanto dichiarato dalle scuole in circa 2500 Piani di Miglioramento (modello indire ) con l'obiettivo di comprendere le scelte strategiche in un'ottica di miglioramento continuo. Il disegno d'analisi permette di restituire sia una visione complessiva dei Piani di Miglioramento che approfondimenti qualitativi di confronto tra tipologie di scuola e aree geografiche e relativi a tematiche strategiche quali formazione e innovazione.}, KEYWORDS = {Piano di Miglioramento, Natural Language Processing, Formazione, Innovazione}, PAGES = {47-68}, URL = {https://www.rivistainfanzia.it/pvw/app/default/pvw_sito.php?sede_codice=1PWPSE01\&page=2432193}, VOLUME = {1/2019}, PUBLISHER = {Edizioni Centro Studi Erickson (Gardolo (TN), Italia)}, ISSN = {1971-3711}, JOURNAL = {Psicologia dell'educazione}, } @ARTICLE{DELLAGALA_2019_ARTICLE_DCDPV_423878, AUTHOR = {Della Gala, V. and Chiriatti, G. and Dell'Orletta, F. and Pettenati, M. C. and Venturi, G.}, TITLE = {Analytics dei testi riflessivi scritti dai docenti neoassunti nel portfolio digitale}, YEAR = {2019}, ABSTRACT = {Presentiamo i risultati preliminari e l'analisi svolta su circa 50.000 testi scritti dai docenti neo nominati in ruolo per riflettere su due attività didattiche svolte con gli studenti, nel contesto del percorso dell'anno di formazione e prova 2016/17. Il percorso prevede attività in presenza e attività a distanza completate sul portfolio digitale, ospitato nell'ambiente online gestito dall'Indire. Nell'ambito del monitoraggio della formazione, con il fine di ottimizzare gli strumenti e il supporto fornito, abbiamo interrogato i dati testuali prodotti dai docenti nell'interazione con l'ambiente per capire se i testi presentassero evidenze riconducibili alle scritture riflessive. Obiettivi dell'indagine sono stati la definizione di uno schema per la classificazione dei testi sulla base del livello di riflessività evidenziato e l'impiego di strumenti di Trattamento Automatico del Linguaggio (TAL) per l'analisi dell'interocorpus testuale prodotto dai docenti. Descriveremo il contesto scientifico e progettuale,le caratteristiche dei dati analizzati, come questo abbia determinato il disegno d'indagine;descriveremo inoltre la sua implementazione e dunque le procedure, gli strumenti e le metriche adottate o elaborate per rappresentare il contenuto dei dati; infine discuteremo i primi risultati e alcuni vantaggi e limiti dell'approccio adottato.}, KEYWORDS = {Teacher professional development, Natural Language Processing, Reflective writing, Linguistic Profiling, Document Classification}, PAGES = {187-204}, URL = {https://ojs.pensamultimedia.it/index.php/sird/article/view/3454/3360}, VOLUME = {Special issue}, DOI = {10.7346/SIRD-2S2019-P189}, PUBLISHER = {Pensa Multimedia (Lecce, Italia)}, ISSN = {2038-9744}, JOURNAL = {Giornale italiano della ricerca educativa (Online)}, } @ARTICLE{GOGGI_2019_ARTICLE_GPBMBC_411599, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic Query Analysis from the Global Science Gateway}, YEAR = {2019}, ABSTRACT = {Nowadays web portals play an essential role in searching and retrieving information in the several fields of knowledge: they are ever more technologically advanced and designed for supporting the storage of a huge amount of information in natural language originating from the queries launched by users worldwide. Given this scenario, we focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal: the aim is to retrieve information related to social media which as of today represent a considerable source of data more and more widely used for research ends.}, KEYWORDS = {Information Extraction, Query Log, WorldWideScience Alliance, Information gateways, Social Media}, PAGES = {147-155}, URL = {https://publications.cnr.it/doc/411599}, VOLUME = {15}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{MARZI_2019_ARTICLE_MFP_406277, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {A processing-oriented investigation of inflectional complexity}, YEAR = {2019}, ABSTRACT = {Due to the typological diversity of their inflectional processes, some languages are intuitively more difficult than other languages. Yet, finding a single measure to quantitatively assess the comparative complexity of an inflectional system proves an exceedingly difficult endeavor. In this paper we propose to investigate the issue from a processing-oriented standpoint, using data processed by a type of recurrent neural network to quantitatively model the dynamic of word processing and learning in different input conditions. We evaluate the relative complexity of a set of typologically different inflectional systems (Greek, Italian, Spanish, German, English and Standard Modern Arabic) by training a Temporal Self-Organizing Map (TSOM), a recurrent variant of Kohonen's Self-Organizing Maps, on a fixed set of verb forms from top-frequency verb paradigms, with no information about the morphosemantic and morphosyntactic content conveyed by the forms. After training, the behavior of each language-specific TSOM is assessed on different tasks, looking at self-organizing patterns of temporal connectivity and functional responses. Our simulations show that word processing is facilitated by maximally contrastive inflectional systems, where verb forms exhibit the earliest possible point of lexical discrimination. Conversely, word learning is favored by a maximally generalizable system, where forms are inferred from the smallest possible number of their paradigm companions. Based on evidence from the literature and our own data, we conjecture that the resulting balance is the outcome of the interaction between form frequency and morphological regularity. Big families of stem-sharing, regularly inflected forms are the productive core of an inflectional system. Such a core is easier to learn but slower to discriminate. In contrast, less predictable verb forms, based on alternating and possibly suppletive stems, are easier to process but are learned by rote. Inflection systems thus strike a balance between these conflicting processing and communicative requirements, while staying within tight learnability bounds, in line with Ackermann and Malouf's Low Conditional Entropy Conjecture. Our quantitative investigation supports a discriminative view of morphological inflection as a collective, emergent system, whose global self-organization rests on a surprisingly small handful of language-independent principles of word coactivation and competition.}, KEYWORDS = {Morphological complexity, Discriminative learning, Recurrent neural networks (RNNs), self-organization, emergence, processing uncertainty, stem-family size}, PAGES = {1-23}, URL = {https://www.frontiersin.org/articles/10.3389/fcomm.2019.00048/full}, VOLUME = {4}, DOI = {10.3389/fcomm.2019.00048}, PUBLISHER = {Frontiers Media (Lausanne, Svizzera)}, ISSN = {2297-900X}, JOURNAL = {Frontiers in communication}, } @ARTICLE{SAURI_2019_ARTICLE_SMRB_407031, AUTHOR = {Sauri, R. and Mahon, L. and Russo, I. and Bitinis, M.}, TITLE = {Cross-dictionary linking at sense level with a double-layer classifier}, YEAR = {2019}, ABSTRACT = {We present a system for linking dictionaries at the sense level, which is part of a wider programme aiming to extend current lexical resources and to create new ones by automatic means. One of the main challenges of the sense linking task is the existence of non one-to-one mappings among senses. Our system handles this issue by addressing the task as a binary classification problem using standard Machine Learning methods, where each sense pair is classified independently from the others. In addition, it implements a second, statistically-based classification layer to also model the dependence existing among sense pairs, namely, the fact that a sense in one dictionary that is already linked to a sense in the other dictionary has a lower probability of being linked to a further sense. The resulting double-layer classifier achieves global Precision and Recall scores of 0.91 and 0.80, respectively.}, KEYWORDS = {Word sense linking, word sense mapping, lexical translation, lexical resources, language data construction, Word sense linking, word sense mapping, lexical translation, lexical resources, language data construction, multilingual data, word sense linking, word sense mapping, lexical translation, lexical resources, language data construction}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85068085716\&origin=inward}, VOLUME = {70}, DOI = {10.4230/OASIcs.LDK.2019.20}, PUBLISHER = {Schloss Dagstuhl, Leibniz-Zentrum für Informatik (Wadern, Germania)}, ISSN = {2190-6807}, JOURNAL = {Open access series in informatics}, } @ARTICLE{SPRUGNOLI_2019_ARTICLE_SPBD_403257, AUTHOR = {Sprugnoli, R. and Pardelli, G. and Boschetti, F. and Del Gratta, R.}, TITLE = {Un'Analisi Multidimensionale della Ricerca Italiana nel Campo delle Digital Humanities e della Linguistica Computazionale}, YEAR = {2019}, ABSTRACT = {This article proposes the first comparative study of four years of Italian conferences in the fields of Digital Humanities and Computational Linguistics. More specifically, we created a corpus consisting of the contributions presented in the AIUCD and CLiC-it conferences between 2014 and 2017 to which we applied a multidimensional analysis taking into consideration: (i) the study of collaborations between authors using social networks analysis techniques, (ii) the automatic extraction of terminology and information and (iii) the examination of citational practices. By combining both qualitative and quantitative methods of investigation, this paper aims to shed light on convergences and discrepancies between two research areas that historically have common origins.}, KEYWORDS = {Digital Humanities, Computational Linguistics, Comparative study}, PAGES = {59-89}, URL = {https://umanisticadigitale.unibo.it/article/view/8581}, VOLUME = {5}, DOI = {10.6092/issn.2532-8816/8581}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @ARTICLE{STEFANINI_2019_ARTICLE_SNM_452483, AUTHOR = {Stefanini, A. E. and Nicolosi, A. and Monachini, M.}, TITLE = {A Mock-up for the Development of a Digital Edition for Ancient Greek Fragmentary Poetry: Results of Its Evaluation}, YEAR = {2019}, ABSTRACT = {Ancient Greek poetry is an essential part of the western cultural heritage; thus, it is important that people have access to its texts and whatever relates to their understanding in a reliable and easy way. Whenever user evaluation is concerned, mock-ups are used by designers to acquire feedback from users. A mock-up is defined as a model of the final product, and may be used for demonstration, evaluation and other purposes. The authors prototyped a mock-up for focusing on the requirements of a scholarly digital edition of Archilochus. This was put under evaluation to assess its usability: it was submitted to extensive use and testing by a sample of prospective users, to better focus on the requirements from a product's perspective. Experimentation involved a group of university students, attending a Greek Philology course at Parma University. More than half of the respondents considered the mock-up a useful study support. The evaluation also pointed out that the mock-up had to be revised, so as to guarantee better cognitive simplicity of the user interface.}, KEYWORDS = {Ancient Greek Poetry, Digital Edition, Greek Philology, Digital Humanities, Digital Philology, Didactics, Evaluation}, PAGES = {41-57}, URL = {https://www.igi-global.com/article/a-mock-up-for-the-development-of-a-digital-edition-for-ancient-greek-fragmentary-poetry/237162}, VOLUME = {8}, DOI = {10.4018/IJACDT.2019070103}, PUBLISHER = {IGI Global (Hershey, PA, Stati Uniti d'America)}, ISSN = {2155-420X}, JOURNAL = {International journal of art, culture and design technologies (Online)}, } @BOOK{BERZINS_2019_BOOK_BCGLMPRSSVV_443016, AUTHOR = {Berzins, A. and Choukri, K. and Giagkou, M. and Lösch, A. and Mazo, H. and Piperidis, S. and Rigault, M. and Schnur, E. and Smal, L. and Van Genabith, J. and Vasiljevs, A.}, TITLE = {ELRC White Paper-Sustainable Language Data Sharing to Support Language Equality in Multilingual Europe}, YEAR = {2019}, ABSTRACT = {The ELRC White Paper "Sustainable Language Data Sharing to Support Language Equality in Multilingual Europe - Why Language Data Matters" provides an analysis of European practices for sharing language data and the corresponding challenges, as well as clear recommendations for policy-level decision-makers on how to overcome these challenges.}, KEYWORDS = {Sustainable Language Data Sharing Language Equality Multilingual Europe}, URL = {https://www.lr-coordination.eu/sites/default/files/Documents/ELRCWhitePaper.pdf?lang=bg}, ISBN = {978-3-943853-05-6}, } @INCOLLECTION{BELLANDI_2019_INCOLLECTION_BMK_429245, AUTHOR = {Bellandi, A. and Monachini, M. and Khan, F.}, TITLE = {LexO: Where Lexicography Meets the Semantic Web}, YEAR = {2019}, ABSTRACT = {LexO is a collaborative web editor used for the creation and management of (multilingual) lexical and terminological resources as linked data resources. The editor makes use of Semantic Web technologies (which enrich web data with semantic information in order to make them machine readable) and the linked data publishing paradigm in order to ensure that lexical resources can be more easily shared and reused by the scientific community.}, KEYWORDS = {Semantic Web technologies, multilingual lexical resources, collaborative web editor}, PAGES = {43-47}, URL = {https://publications.cnr.it/doc/429245}, VOLUME = {2}, BOOKTITLE = {Tour de CLARIN volume two}, EDITOR = {Fiser, D. and Lenardic, J.}, } @INCOLLECTION{GIOVANNETTI_2019_INCOLLECTION_G_457777, AUTHOR = {Giovannetti, E.}, TITLE = {Traduzione Talmud Babilonese}, YEAR = {2019}, ABSTRACT = {Traduzione Talmud Babilonese è un progetto di ricerca che ha come obiettivo la traduzione in lingua italiana del Talmud Babilonese, un testo fondamentale della cultura ebraica non solo in campo religioso ma che tocca anche ogni aspetto della conoscenza umana, dalla giurisprudenza alla scienza, dalla filosofia alla vita di tutti i giorni}, KEYWORDS = {traduzione assistita dal calcolatore, talmud babilonese}, PAGES = {126-126}, URL = {https://publications.cnr.it/doc/457777}, VOLUME = {1}, DOI = {10.36173/PLURIMI-2019-1}, PUBLISHER = {CNR EDIZIONI (ROMA, ITA)}, ISBN = {9788880803775}, BOOKTITLE = {Linguaggi, ricerca, comunicazione. Focus CNR}, EDITOR = {Cadeddu, M. E. and Marras, C.}, } @INCOLLECTION{RUSSO_2019_INCOLLECTION_RMCM_429036, AUTHOR = {Russo, I. and Marconi, L. and Cutugno, P. and Monachini, M.}, TITLE = {Le parole sono ponti: risorse digitali per l'integrazione in contesti multilingue}, YEAR = {2019}, ABSTRACT = {Nel presente lavoro esporremo due esperienze inerenti all'uso e alla produzione di risorse linguistiche multilingui, svolte da alcuni ricercatori dell'Istituto di Linguistica Computazionale "Antonio Zampolli" (ilc) del cnr. Più nello specifico verrà descritta la realizzazione di un glossario nell'ambito del progetto Ascolto Accoglienza Azioni Offresi (aaa Offresi) e l'uso sperimentale di ImagAct (Moneglia et alii 2012) - una risorsa lessicale multilingue sui verbi d'azione - in una scuola primaria caratterizzata da una forte presenza di alunni stranieri. Il fine della ricerca è quello di favorire l'emergere delle competenze metalinguistiche degli apprendenti, valorizzando la diversità linguistica e culturale.}, KEYWORDS = {Multilingual lexical resources, I2 teaching, Translanguaging, Public administration terminology}, PAGES = {127-136}, URL = {https://publications.cnr.it/doc/429036}, VOLUME = {I}, DOI = {10.36173/PLURIMI-2019-1/09}, PUBLISHER = {Consiglio Nazionale delle Ricerche (Roma, ITA)}, ISBN = {9788880803775}, BOOKTITLE = {Linguaggi, ricerca, comunicazione. Focus CNR}, EDITOR = {Cadeddu, M. E. and Marras, C.}, } @INPROCEEDINGS{ALZETTA_2019_INPROCEEDINGS_ADMV_423881, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Dissecting Treebanks to Uncover Typological Trends. A Multilingual Comparative Approach}, YEAR = {2019}, ABSTRACT = {Over the last years, linguistic typology started attracting the interest of the community working on cross- and multi-lingual NLP as a way to tackle the bottleneck deriving from the lack of annotated data for many languages. Typological information is mostly acquired from publicly accessible typological databases, manually constructed by linguists. As reported in Ponti et al. (2018), despite the abundant information contained in them for many languages, these resources suffer from two main shortcomings, i.e. their limited coverage and the discrete nature of features (only "the majority value rather than the full range of possible values and their corresponding frequencies" is reported). Corpus-based studies can help to automatically acquire quantitative typological evidence which might be exploited for polyglot NLP. Recently, the availability of corpora annotated following a cross-linguistically consistent annotation scheme such as the one developed in the Universal Dependencies project is prompting new comparative linguistic studies aimed to identify similarities as well as idiosyncrasies among typologically different languages (Nivre, 2015). The line of research described here is aimed at acquiring quantitative typological evidence from UD treebanks through a multilingual contrastive approach.}, KEYWORDS = {Natural Language Processing, Linguistic Typology}, PAGES = {1-3}, URL = {https://typology-and-nlp.github.io/2019/assets/2019/papers/5.pdf}, ISBN = {978-1-950737-29-1}, CONFERENCE_NAME = {1st TyP-NLP: The Workshop on Typology for Polyglot NLP, ACL workshop}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {01/08/2019}, } @INPROCEEDINGS{BOSCHETTI_2019_INPROCEEDINGS_BPV_409872, AUTHOR = {Boschetti, F. and Pardelli, G. and Venturi, G.}, TITLE = {Nove Anni di jTEI: What's New?}, YEAR = {2019}, ABSTRACT = {Questo contributo illustra metodi e strumenti per studiare il cambiamento diacronico degli interessi di ricerca della comunità TEI grazie all'uso di metodi di estrazione automatica della terminologia da corpora di dominio.}, KEYWORDS = {Natural Language Processing, Digital Humanities}, PAGES = {1-6}, URL = {http://ceur-ws.org/Vol-2481}, VOLUME = {Vol-2481 urn: nbn: de: 0074-2481-7}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, CONFERENCE_NAME = {CLiC-it 2019-Sesta Conferenza Italiana di Linguistica Computazionale}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {13-15/11/2019}, BOOKTITLE = {CLiC-it 2019 Italian Conference on Computational Linguistics}, EDITOR = {Bernardi, R. and Navigli, R. and Semeraro, G.}, } @INPROCEEDINGS{CERNIGLIA_2019_INPROCEEDINGS_CCCMMDF_403916, AUTHOR = {Cerniglia, A. and Chiarella, D. and Cutugno, P. and Marconi, L. and Magrini, A. and Di Feo, G. and Ferretti, M.}, TITLE = {QUESTIONNAIRE ANALYSIS TO DEFINE THE MOST SUITABLE SURVEY FOR PORT-NOISE INVESTIGATION}, YEAR = {2019}, ABSTRACT = {The high level of noise pollution affecting the areas between ports and logistic platforms represents a problem that can be faced from different points of view. Acoustic monitoring, mapping, short-term measurements, port and road traffic flows analyses can give useful indications on the strategies to be proposed for a better management of the problem. A survey campaign through the preparation of questionnaires to be submitted to the population exposed to noise in the back-port areas will help to better understand the subjective point of view. The paper analyses a sample of questions suitable for the specific research, chosen as part of the wide database of questionnaires internationally proposed for subjective investigations. The preliminary results of a first data collection campaign are consid-ered to verify the adequacy of the number, the type of questions, and the type of sample noise used for the survey. The questionnaire will be optimized to be distributed in the TRIPLO project (TRans-ports and Innovative sustainable connections between Ports and LOgistic platforms). The results of this survey will be the starting point for the linguistic investigation carried out in combination with the acoustic monitoring, to improve understanding the connections between personal feeling and tech-nical aspects.}, KEYWORDS = {port noise, acoustic monitoring, subjective survey, psychoacoustics}, URL = {https://publications.cnr.it/doc/403916}, ISBN = {978-1-9991810-0-0}, CONFERENCE_NAME = {26th International Congress on Sound \& Vibration}, CONFERENCE_PLACE = {Montréal, Canada}, CONFERENCE_DATE = {7-11/07/2019}, } @INPROCEEDINGS{CHIRIATTI_2019_INPROCEEDINGS_CBDV_423885, AUTHOR = {Chiriatti, G. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {What makes a review helpful? Predicting the helpfulness of Italian tripadvisor reviews}, YEAR = {2019}, ABSTRACT = {In this paper we introduce a classification system devoted to predict the helpfulness of Italian online reviews. It is based on a wide set of features reflecting the different factors involved and tested on different categories of TripAdvisor reviews. For this purpose, we collected the first Italian corpus of online reviews enriched with metadata related to their helpfulness and we carried out an in-depth analysis of the most predictive features.}, KEYWORDS = {Natural Language Processing, Documenti Classification, Linguistic Profiling}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85074834351\&origin=inward}, VOLUME = {2481}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {6th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {13-15/11/2019}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{CUTUGNO_2019_INPROCEEDINGS_CMFC_400412, AUTHOR = {Cutugno, P. and Marconi, L. and Ferretti, M. and Chiarella, D.}, TITLE = {Estudios lingüísticos en antologías narrativas sobre la experiencia del viaje}, YEAR = {2019}, ABSTRACT = {Las antologías "Partire: Antologia narrativa di geografia emozionale" y "Partire: Antologia illustrata per eterni viaggiatori" fueron elaboradas por el CTS: Centro turístico Studentesco e Giovanile, en 2009, 2010, 2011 y 2013. Las cuatro obras relacionan historias, imágenes y pequeñas frases referidas con un "viaje de los sueños", que contiene el lugar y por qué ese viaje es el de los deseos. Cada obra contiene imágenes o historias relacionadas con temas específicos en los que se coloca cada narración; para cada una de las ediciones se anzó un concurso para estimular la participación en la redacción de las narraciones de jóvenes italianos entre dieciocho y treinta y cinco años sobre el tema del viaje. En otro artículo previamente producido, fueron analizados los componentes gramaticales de las contribuciones escritas. Los objetivos fueron identificar las posibles diferencias de las partes del discurso en los textos que componen los distintos volúmenes y, al mismo tiempo, tratar de establecer para los diversos textos, dentro de qué gama de valores se coloca la relación entre sustantivos y verbos. El análisis cualitativo y cuantitativo de las palabras más frecuentes permitió monitorear cómo algunos de los relatos de viaje se convierten en reflexiones introspectivas cambiando el enfoque de los que escriben del viaje en el mundo a los que lo hacen sobre el viaje de la vida y viceversa.}, KEYWORDS = {viaggio, analisi linguistica, linguistica computazionale}, PAGES = {117-120}, URL = {https://publications.cnr.it/doc/400412}, ISBN = {9789597174363}, CONFERENCE_NAME = {XVI° Simposio Internacional de Comunicación Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {21-25/01/2019}, } @INPROCEEDINGS{FIEROMONTE_2019_INPROCEEDINGS_FBDV_423883, AUTHOR = {Fieromonte, M. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Italian and English sentence simplification: How many differences?}, YEAR = {2019}, ABSTRACT = {The paper proposes a cross-linguistic analysis of two parallel monolingual corpora conceived for automatic text simplification in two languages, Italian and English. The aim is to find similarities and differences in the process of simplification in two typologically different languages. To carry out the comparison, 1,000 sentences were extracted from the two corpora and annotated with a scheme previously used to annotate simplification phenomena..}, KEYWORDS = {Natural Language Processing, Automatic Text Simplification}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85074816689\&origin=inward}, VOLUME = {2481}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {6th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {13-15/11/2019}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{GOGGI_2019_INPROCEEDINGS_GPBMBC_400343, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic query analysis from the global science gateway}, YEAR = {2019}, ABSTRACT = {We focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal.}, KEYWORDS = {Information Extraction, Terminology}, PAGES = {105-113}, URL = {https://publications.cnr.it/doc/400343}, VOLUME = {20}, ISBN = {978-90-77484-33-3}, CONFERENCE_NAME = {GL20-Twentieth International Conference on Grey Literature: Research Data Fuels and Sustains Grey Literature}, CONFERENCE_PLACE = {New Orleans, USA}, CONFERENCE_DATE = {3-4 December 2018}, BOOKTITLE = {Research Data Fuels and Sustains Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GRECO_2019_INPROCEEDINGS_GMLSV_415175, AUTHOR = {Greco, A. and Marzi, C. and Lanata, A. and Scilingo, E. P. and Vanello, N.}, TITLE = {Combining Electrodermal Activity and Speech Analysis towards a more Accurate Emotion Recognition System}, YEAR = {2019}, ABSTRACT = {Current research in the emotion recognition field is exploring the possibility of merging the information from physiological signals, behavioural data, and speech. Electrodermal activity (EDA) is amongst the main psychophysiological arousal indicators. Nonetheless, it is quite difficult to be analyzed in ecological scenarios, like, for instance, when the subject is speaking. On the other hand, speech carries relevant information of subject emotional state and its potential in the field of affective computing is still to be fully exploited. In this work, we aim at exploring the possibility of merging the information from electrodermal activity (EDA) and speech to improve the recognition of human arousal level during the pronunciation of single affective words. Unlike the majority of studies in the literature, we focus on speakers' arousal rather than the emotion conveyed by the spoken word. Specifically, a support vector machine with recursive feature elimination strategy (SVM-RFE) is trained and tested on three datasets, i.e. using the two channels (i.e., speech and EDA) separately and then jointly. The results show that the merging of EDA and speech information significantly improves the marginal classifier (+11.64%). The six selected features by the RFE procedure will be used for the development of a future multivariate model of emotions.}, KEYWORDS = {emotion recognition, feature selection, pattern classification, physiology, psychology, support vector machines, human arousal level, single affective words, EDA, electrodermal activity, speech analysis, emotion recognition system, speech processing}, PAGES = {229-232}, URL = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=\&arnumber=8857745\&isnumber=8856280}, VOLUME = {41st Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC)}, DOI = {10.1109/EMBC.2019.8857745}, PUBLISHER = {IEEE Service Center (Piscataway, NJ, Stati Uniti d'America)}, ISSN = {1557-170X}, ISBN = {978-1-5386-1311-5}, CONFERENCE_NAME = {41st Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC)}, CONFERENCE_PLACE = {Berlin, Germany}, CONFERENCE_DATE = {23-27 July 20}, BOOKTITLE = {Conference proceedings (IEEE Eng. Med. Biol. Soc., Conf.)}, } @INPROCEEDINGS{MAGRINI_2019_INPROCEEDINGS_MDCMCCF_403914, AUTHOR = {Magrini, A. and Di Feo, G. and Cerniglia, A. and Marconi, L. and Cutugno, P. and Chiarella, D. and Ferretti, M.}, TITLE = {INDAGINE SOGGETTIVA FINALIZZATA ALLA VALUTAZIONE DEL DISTURBO DA RUMORE NELLE ZONE RETROPORTUALI}, YEAR = {2019}, ABSTRACT = {Vengono presentati alcuni risultati preliminari di una ricerca sulla percezione del rumore in zone retroportuali, che si inserisce nelle azioni previste nell'ambito del progetto TRIPLO (Programma Interreg Italia-Francia Marittimo 2014-2020). Le valutazioni riguardano le prime fasi di realizzazione e somministrazione di un questionario preliminare, realizzato su piattaforma web, e delle relative risposte: attraverso questo strumento si vogliono mettere in relazione rumori e percezione soggettiva, mediante l'uso di termini linguistici ritenuti più appropriati dai soggetti intervistati.}, KEYWORDS = {port noise, acoustic monitoring, subjective survey, psychoacoustics}, PAGES = {2}, URL = {https://publications.cnr.it/doc/403914}, ISBN = {978-88-88942-59-9}, CONFERENCE_NAME = {46° Convegno Nazionale Associazione Italiana di Acustica}, CONFERENCE_PLACE = {Pesaro}, CONFERENCE_DATE = {29-31 maggio 2019}, } @INPROCEEDINGS{MARZI_2019_INPROCEEDINGS_MGSV_430473, AUTHOR = {Marzi, C. and Greco, A. and Scilingo, E. P. and Vanello, N.}, TITLE = {Electrodermal activity and speech features as predictors for arousal level changes after affective word pronunciation}, YEAR = {2019}, ABSTRACT = {This work explores the possibility of estimating subject arousal through the analysis of speech and electrodermal activity (EDA). One critical issue to be clarified is the reliability of EDA signal during speech production. To accomplish this task, a relation among EDA, speech activity and subject arousal during isolated affective word pronunciation task, will be investigated. The results show that significant information on subject arousal can be still obtained by analyzing EDA during speech. In fact, a significant relationship between EDA features and self-reported arousal can be observed. In addition, a quantitative linear model relating EDA- and speech-related features could be identified. These preliminary results indicate how the analysis of concurrent acquisition of EDA and speech deserves further attention and could offer a valid approach for the prediction of subject arousal during speech production, as a method for validating self-assessment ratings.}, KEYWORDS = {electrodermal activity, regression model, word pronunciation, arousal, speech}, PAGES = {93-96}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85086605454\&origin=inward}, VOLUME = {122}, PUBLISHER = {Firenze University Press (Firenze, Italia)}, ISBN = {978-88-6453-961-4}, CONFERENCE_NAME = {11th international workshop on Models and Analysis of Vocal Emissions for Biomedical Applications}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {17-19/12/2019}, BOOKTITLE = {Models and Analysis of Vocal Emissions for Biomedical Applications}, EDITOR = {Manfredi, C.}, } @INPROCEEDINGS{RORBERI_2019_INPROCEEDINGS_RM_408259, AUTHOR = {Rorberi, S. and Marzi, C.}, TITLE = {Modelling the interaction of regularity and morphological structure: the case of Russian verb inflection}, YEAR = {2019}, ABSTRACT = {Modelling complex inflection systems, such as conjugation in Modern Greek, Italian or Russian, requires careful consideration of a number of factors, ranging from pervasive stem allomorphy to the identification of the appropriate inflection class and the inferential predictability of morpho-phonological processes. Descriptive approaches have taken different views on how to account for degrees of morphological (ir)regularity, while making different predictions about the way speakers process regular and irregular forms in highly-inflecting languages. In the present paper, we assess the psycholinguistic implications of two radically different approaches to the description of the Russian verb system: a more traditional approach dating back to Jakobson (1948), and a Words and Paradigm approach (Brown 1998). Based on recent fMRI evidence (Slioussar et al. 2014) and original results of a neural network simulation with recurrent self-organising maps (Ferro et al. 2011; Marzi et al. 2014; Pirrelli et al. 2015; Marzi et al. 2016), we suggest that both approaches are prima facie compatible with Russian data, while being in contrast with Pinker's claim that the regular-irregular distinction is an epiphenomenon of the storage-processing dichotomy in the human language faculty (Pinker \& Ullman 2002). We argue that this evidence lends support to integrative models of the mental lexicon (Marzi \& Pirrelli 2015), accounting for a graded interaction between regularity and morphological structure.}, KEYWORDS = {Inflectional complexity, Russian verb system, perception of morphological structure, recurrent self-organising neural network}, PAGES = {107-110}, URL = {http://drehu.linguist.univ-paris-diderot.fr/ismo-2019/?fichier=programme}, VOLUME = {2019}, CONFERENCE_NAME = {International Symposium of Morphology (ISMo) 2019}, CONFERENCE_PLACE = {Université de Paris, France}, CONFERENCE_DATE = {25-27/09/2019}, EDITOR = {Crysmann, B. and Villoing, F.}, } @INPROCEEDINGS{ALBANESI_2019_INPROCEEDINGS_AD_414913, AUTHOR = {Albanesi, D. and Del Gratta, R.}, TITLE = {OpeNER and PANACEA: Web Services for the CLARIN Research Infrastructure}, YEAR = {2019}, ABSTRACT = {This paper describes the necessary steps for the integration of OpeNer and PANACEA Web Services within the CLARIN research infrastructure. The original Web Services are wrapped into a framework and re-implemented as REST APIs to be further exploited through both Language Resource Switchboard and WebLicht and made available for the CLARIN community.}, KEYWORDS = {CLARIN-IT, Interoperabilità, Research Infrastructure}, PAGES = {19-23}, URL = {https://office.clarin.eu/v/CE-2019-1512_CLARIN2019_ConferenceProceedings.pdf}, CONFERENCE_NAME = {CLARIN Annual Conference 2019}, CONFERENCE_PLACE = {Leipzig, Germany}, CONFERENCE_DATE = {30/09/2019, 02/10/2019}, } @INPROCEEDINGS{CATERINO_2019_INPROCEEDINGS_CBMDSD_430369, AUTHOR = {Caterino, A. F. and Battaglino, G. and Marini, A. and Di Meglio, A. and Silvi, D. and Del Grosso, A. M.}, TITLE = {Letteratura e filologia come scienze esatte: per un nuovo statuto scientifico dell'umanista informatizzato}, YEAR = {2019}, ABSTRACT = {La leggenda vuole che fosse inciso all'entrata dell'accademia platonica il motto «???????????? ?????? ??????». La geometria è qui intesa come la scienza stessa, quell'esatezza di calcolo indispensabile a cogliere determinate strutture armoniche all'interno dell'arte; strutture che, in fin dei conti, costituiscono la vera e propria ossatura del concetto di bello. Per meglio concepire il bello nella sua struttura è quindi necessario avere una capacità d'analisi matematica, geometrica. Oggi l'informatica sa venire incontro proprio alla necessità dell'umanista di rendere il suo lavoro più specifico e preciso, al fine di ottenere dal proprio lavoro migliori esegesi, migliori ricostruzioni testuali, migliori approcci critici utili a una fruizione più consapevole dello stesso testo letterario. Il panel vorrebbe quindi raccogliere ricerche e testimonianze di chi si occupa e preoccupa di applicare l'informatica agli studi filologici e letterari, con la volontà ultima di superare le barriere che contrappongono in modalità integralista gli studia humanitatis alle cosiddette scienze esatte. Esso nasce come estensione delle ricerche informatico-umanistiche condotte presso l'Università degli Studi del Molise dal gruppo di studi Eterodossie e dissenso nella letteratura italiana, ma vuole ovviamente aprirsi a chiunque creda - in maniera critica e ragionata - alla causa delle digital humanities.}, KEYWORDS = {Digital Humanities}, URL = {https://www.italianisti.it/associazione/congressi-adi/letteratura-e-scienze}, CONFERENCE_NAME = {XXIII Congresso ADI-Associazione degli Italianisti}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {12/09/2019-14/09/2019}, } @INPROCEEDINGS{DELGROSSO_2019_INPROCEEDINGS_D_430160, AUTHOR = {Del Grosso, A. M.}, TITLE = {Multilingual Word-by-word alignment. Methodology and some preliminary outcomes towards the construction of multilingual Lexicon within the "Traduzione del Talmud Babilonese" project}, YEAR = {2019}, ABSTRACT = {Textual scholars have been exploiting for long time multilingual resources in their daily work to better understand the primary sources they inquire. Bitexts are parallel texts which turn out to be useful in a number of cross-linguistic and comparative processing tasks. This talk will show the workflow adopted within the research activities conducted on the Italian translation of the Babylonian Talmud. More specifically, I will illustrate the ongoing work towards the construction of a multilingual Hebrew/Aramaic/Italian terminological resource by means of stochastic generative approaches to word-by-word text alignment. The related literature discusses plenty of techniques concerning this topic. The alignment tool I developed is grounded on generative models (i.e., IBM and HMM models), which are a collection of non-supervised machine learning algorithms, to calculate the probability of linking two words in a multilingual term pair. From a technical standpoint, beside the adopted models, which are based on an alignment function and on an unsupervised training procedure devoted to estimating the unknown probability distributions, other machine learning approaches to word alignment exist that encompass discriminative techniques, which are based on a target function and on a supervised learning process exploiting labeled training data set. The implemented models were widely adopted in the literary domain, as they are able to profitably handle interpretative bitexts modeling also deletion, insertion, transposition phenomena without having an extant labeled data set. The workflow I will present encompasses four distinct phases: 1) The encoding of the parallel text, which has been carried out according to the last TEI recommendations. In particular, the linking-target approach described within the Module 16 of the guidelines was used. 2) The semi-automatic extraction of the Italian terms, which has been carried out by means of linguistic analysis technologies available at the Institute of Computational Linguistics (ILC-CNR). These tools include a stochastic component for terminology extraction. 3) The addition of Hebrew/Aramaic terms to the Italian extracted ones via word-by-word alignment to automatically process the three main ancient languages appearing in the Talmud, namely mishnaic Hebrew, biblical Hebrew and babylonian Aramaic. 4) Finally, the revision of the obtained results through an ad-hoc implemented web-based application. This final step is devoted to build a ground truth and/or a gold training set allowing us to perform a complete validation process of the alignment outcomes. For the time being, 219.000 tokens have been analyzed, extracted from four tractates of the Babylonian Talmud which were translated so far."}, KEYWORDS = {bilingual word alignment, translation}, URL = {http://www.ens-lyon.fr/evenement/recherche/machine-learning-donnees-textuelles-et-recherche-en-sciences-humaines-et}, CONFERENCE_NAME = {Machine learning, données textuelles et recherche en sciences humaines et sociales}, CONFERENCE_PLACE = {ENS de Lyon}, CONFERENCE_DATE = {25/11/2019-26/11/2019}, } @INPROCEEDINGS{DELGROSSO_2019_INPROCEEDINGS_D_430161, AUTHOR = {Del Grosso, A. M.}, TITLE = {Verso la definizione e l'implementazione di una piattaforma orientata allo studio critico del testo}, YEAR = {2019}, ABSTRACT = {Il contributo illustra le caratteristiche della piattaforma in sviluppo presso ILC per lo studio scientifico del testo e in parte impiegata per la realizzazione dell'edizione digitale delle lettere di Bellini.}, KEYWORDS = {BelliniInRete, Digital Scholarly Editing, Digital Philology, Computational Philology}, URL = {https://publications.cnr.it/doc/430161}, CONFERENCE_NAME = {Il progetto BellinInRete digital correspondence. Per un'edizione critica digitale delle lettere di Vincenzo Bellini}, CONFERENCE_PLACE = {Coro di Notte-ex Monastero dei Benedettini-piazza Dante 32, Catania}, CONFERENCE_DATE = {29/10/2019}, } @INPROCEEDINGS{DELGROSSO_2019_INPROCEEDINGS_DCCSS_429947, AUTHOR = {Del Grosso, A. M. and Capizzi, E. and Cristofaro, S. and Seminara, G. and Spampinato, D.}, TITLE = {Promoting Bellini's legacy and the Italian opera by scholarly digital editing his own correspondence}, YEAR = {2019}, ABSTRACT = {This contribution aims at illustrating the ongoing work towards the digital scholarly editing, long-term preservation, web publishing and computational exploiting of 41 letters, written by the renowned composer Vincenzo Bellini. The correspondence is kept at the Belliniano Civic Museum of Catania and is being encoded in XML according to the last TEI guidelines. The edition will be made accessible both via web - exploiting the Edition Visualization Technology (EVT) - as well as integrated into an interactive and multimedia tour within the museum. The digital edition is based on the recently published transcriptions made by Seminara. The encoding scheme has been defined according to the edition requirements, the TEI best practices and the Music Encoding Initiative (MEI) guidelines - where the musical context must be specified. Our initiative has some elements of innovation that distinguish it from similar projects, such as the Van Gogh letter project or the DALF project. For instance, we encode the circumstance that the letters themselves have also the purpose of acting as envelopes. In fact, they are folded on themselves and postmarks and wax seals are sometimes affixed on them. The edition takes care of handling the correspondence metadata by means of the correspDesc TEI tagset, thus providing the opportunity to exploit the correspSearch API. This approach has allowed us to enrich the encoding of the document both in its logical and physical structure and in indexing letters by sender, recipient, date, and places. The museum context and the educational purposes have even led us to the definitions of some lists of named entities. Within these resources we have adopted the Semantic Web and LOD paradigm by encoding external references to authoritative repositories such as RISM and DBpedia. Finally, we implemented some useful EVT extensions to automatically handle hotspots and to show critical notes that accompany the text.}, KEYWORDS = {TEI, DSE, Vincenzo Bellini, Digital Correspondence}, URL = {https://gams.uni-graz.at/o:tei2019.118}, DOI = {10.5281/zenodo.3461673}, CONFERENCE_NAME = {What is text, really? TEI and beyond (TEI 2019)}, CONFERENCE_PLACE = {University of Graz, Austria}, CONFERENCE_DATE = {16/09/2019-20/09/2019}, BOOKTITLE = {What is text, really? TEI and beyond}, EDITOR = {Vogeler, G.}, } @INPROCEEDINGS{DELGROSSO_2019_INPROCEEDINGS_DP_430371, AUTHOR = {Del Grosso, A. M. and Piccini, S.}, TITLE = {Fixing the Movements of Thought in Text and Terminology: the Effectiveness of Scholarly Digital Tools on Saussure's Writings}, YEAR = {2019}, ABSTRACT = {The aim of this contribution is to present and discuss some recent activities, carried out by the Institute of Computational Linguistics (CNR - Pisa), devoted to representing in an explicit and formal way the diachronic evolution of concepts and terms occurring in Saussure's manuscripts. In the first part of the presentation, we will briefly outline some theoretical aspects concerning the diachronic standpoint in terminology and the "best practices" to be followed when modelling (diachronic) terminological resources. Then, some examples of terminological evolution will be provided, taken from the electronic lexicon Simple_FdS, built within the PRIN project "Per un'edizione digitale dei manoscritti di Ferdinand de Saussure" (2008-2011), based on the Generative Lexicon theory elaborated by Pustejovsky (1995). Terminological data will be shown in LexO, a Web-based and collaborative web editor, which allows for building lexical and terminoontological resources, compliant with the Semantic Web technologies (RDF and OWL). In the second part of the talk, starting from the outcomes of the aforementioned project, we will present the scholarly digital platform aimed at describing and analysing text resources. The encoding approach follows the current de facto standard in representing textual resources, namely the Text Encoding Initiative (TEI) guidelines. In particular, we adopted the tag sets defined in the Module for Transcription of Primary Resources. An example will be illustrated on how describe and visualize the process of writing in Saussure's manuscripts. The example was implemented exploiting the Omega framework as well as the Edition Visualization Technology (EVT). In such a way, scholars have at their disposal a powerful tool for searching additions, deletions, substitutions, retracings, different hands, transpositions, marginal and interlinear notes, and all the other textual phenomena significant to understanding the "Movements of thought" which emerge in the handwritten pages.}, KEYWORDS = {Saussure, Digital Humanities, Digital Scholarly Editing, Digital Philology, Computational Philology}, URL = {https://publications.cnr.it/doc/430371}, CONFERENCE_NAME = {Philosophy of Language and Digital Humanities}, CONFERENCE_PLACE = {Rende (CS)}, CONFERENCE_DATE = {07/05/2019-09/05/2019}, } @INPROCEEDINGS{DELTURCO_2019_INPROCEEDINGS_DMDCDZ_430159, AUTHOR = {Del Turco, R. R. and Martignano, C. and Di Pietro, C. and Cacioli, G. and Del Grosso, A. M. and Zenzaro, S.}, TITLE = {DSE Visualisation with EVT: Simplicity is Complex}, YEAR = {2019}, ABSTRACT = {Edition Visualization Technology (EVT) is an open source tool to produce digital scholarly editions on the basis of TEI XML-encoded documents. Born to serve the goals of a single project, the Digital Vercelli Book, it has been developed in such a way as to become a general purpose tool. Several DSE projects are using it to publish digital editions, in fact many researchers have found in EVT the perfect tool for their needs: it is easy to configure and deploy, it is fully customizable, it includes several useful research tools out of the box.}, KEYWORDS = {Digital Philology, Digital Scholarly Editing}, URL = {https://doi.org/10.34894/B6T1YD}, DOI = {10.34894/B6T1YD}, CONFERENCE_NAME = {Compexities}, CONFERENCE_PLACE = {Utrecht}, CONFERENCE_DATE = {09/07/2019-12/07/2019}, BOOKTITLE = {Complexities}, EDITOR = {Pierazzo, E. and Ciotti, F.}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_M_429318, AUTHOR = {Monachini, M.}, TITLE = {Ricerche di alta qualità negli Studi umanistici: l'infrastruttura CLARIN-IT}, YEAR = {2019}, ABSTRACT = {Nella lezione a invito presso la Scuola di Dottorato di Ateneo al Corso di UMANESIMO E TECNOLOGIE vengono descritti, il ruolo, i vantaggi e le opportunità offerte dalla infrastruttura di ricerca CLARIN. Una platea di giovani in formazione (che costituiranno i ricercatori del futuro) potrà approfondire la conoscenza degli strumenti della infrastruttura che consentono di coniugare studi umanistiche ed approcci tecnologici, al fine di compiere ricerche di alta qualità.}, KEYWORDS = {CLARIN, studi umanistici, tecnologie linguistiche}, URL = {https://publications.cnr.it/doc/429318}, CONFERENCE_NAME = {Scuola di Dottorato di Ateneo Corso di UMANESIMO E TECNOLOGIE}, CONFERENCE_DATE = {13/11/2019}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_M_429336, AUTHOR = {Monachini, M.}, TITLE = {Success stories of collaboration in Social Sciences and Humanities (between Italy and Slovenia)}, YEAR = {2019}, ABSTRACT = {The collaboration Italy-Slovenia in the sector of SSH revolves, since many years, around Digital methods for language, in particular, language resources and standards for language data; with the development of e-technology and explosion of data, the support to language studies goes through RI; another hot topic linked to the digital era is "word meaning" which involves a new type of lexicography; the even greater challenge, the concept of Open Science, sees the two countries together in in the new big cluster project SSHOC, which aims to implement the EOSC vision and build the Open Cloud for the SSH sector.}, KEYWORDS = {collaboration Italy Slovenija, Social Sciences and Humanities}, URL = {https://publications.cnr.it/doc/429336}, CONFERENCE_NAME = {RESEARCH DAY ITALY-SLOVENIA Bilateral meeting Italy Slovenia on the role of research in the society}, CONFERENCE_PLACE = {University of Nova Gorica, Vipava, Glavni trg 8}, CONFERENCE_DATE = {16/4/2019}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_M_429355, AUTHOR = {Monachini, M.}, TITLE = {CLARIN-IT nella prospettiva delle Digital Humanities}, YEAR = {2019}, ABSTRACT = {Fornire una panoramica relativa alla infrastruttura europea CLARIN e la sua emanazione italiana CLARIN-IT rispondere ai quesiti relativi alla sua missione e ai suoi obiettivi e fare il punto sui vantaggi per la comunità a cui è diretto, ricercatori del settore delle scienze umane e sociali}, KEYWORDS = {digital public humanities, infrastrutture di ricerca}, URL = {https://publications.cnr.it/doc/429355}, CONFERENCE_NAME = {Seminars in Digital Public Humanities}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {23 ottobre 2019}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_MSC_429370, AUTHOR = {Monachini, M. and Stamuli, M. F. and Calamai, S.}, TITLE = {Folk in Tuscany: the Caterina Bueno sound archive}, YEAR = {2019}, ABSTRACT = {Caterina Bueno's sound archive is composed of 476 carriers (audio reels and compact cassettes), corresponding to nearly 714 hours of recording and was digitised during the PAR-FAS project Gra.fo (Grammo-foni. Le soffitte della voce, UNISI \& SNS, http://sns.grafo.it). It was located at two different owners': part of it was stored at Caterina's heirs' house, while the rest was kept by the former culture counsellor of the Municipality of San Marcello Pistoiese, in the Montagna Pistoiese, where a multi-media library was supposed to be set up. Unfortunately, disagreements and misunderstandings between the two parties have so far made the archive fragmented and inaccessible to the community. Both owners, independently, have turned to Silvia Calamai for the reassembly of the whole archive in the digital domain, in respect of the artist's wishes. After digitising, the carriers were returned to their owners, who helped in finding an arrangement for the sound archive, which can be divided according to the following categories: field-research (investigations carried out in the Tuscan countryside from the late 50s to the end of the artist's life); live performances (recordings of concerts and events); performances' rehearsals (recordings of rehearsals with musicians). In 2019 Regione Toscana decided to support the project of cataloguing and disseminating Caterina Bueno Archive and the following partners were involved: Università degli Studi di Siena (Silvia Calamai), Soprintendenza Archivistica e Bibliografica della Toscana (Maria Francesca Stamuli), CLARIN-IT (Monica Monachini), and Unione dei comuni del Casentino (Pierangelo Bonazzoli). Archivio Vi.vo will thus constitute a pilot study within CLARIN-IT to experiment methods and offer services to disciplines interested in oral sources. The ILC4CLARIN Italian node offers archiving preservation access and tools for linguistic data of a written type; within Archivio Vi.vo. the repository will be improved through experimental approach to conservation, management and access to audio and audio-video data and metadata. Archivio Vi.Vo. will develop a model which can be replicated on other audio-visual archives, even outside the context of Tuscany. The experimental activity will aim to adopt the model and high-performance computing and archiving services of the new GARR network infrastructure, built along the Cloud paradigm. This model will be disseminated both to the scientific community interested in accessing these data, and to the general public who enjoy ethnomusical materials produced in the territory.}, KEYWORDS = {long-term preservation, oral archives, infrastructures, conservation, access, metadata}, URL = {https://www.clarin.eu/sites/default/files/clarin2019_bazaar_calamai-stmuli-monachini.pdf}, CONFERENCE_NAME = {CLARIN 2019 Annual Conference}, CONFERENCE_PLACE = {Leipzig}, CONFERENCE_DATE = {30/09/2019-2/10/2019}, BOOKTITLE = {CLARIN Annual Conference 2019 Abstracts}, } @INPROCEEDINGS{PARDELLI_2019_INPROCEEDINGS_PGB_398956, AUTHOR = {Pardelli, G. and Goggi, S. and Boschetti, F.}, TITLE = {Strolling around the dawn of Digital Humanities}, YEAR = {2019}, ABSTRACT = {Nelle ricerche umanistiche l'impiego dell'elaboratore elettronico prende il via nella seconda metà del ventesimo secolo favorendo l'uso di metodi statistici sia nello studio di opere letterarie che nello studio delle lingue, promuovendo un sodalizio interdisciplinare che è arrivato ai giorni nostri senza interruzione. In questo contributo tentiamo di fissare alcuni momenti salienti del processo che ha visto la nascita comune della Linguistica Computazionale e delle Digital Humanities nonché i loro alterni allontanamenti e ricongiungimenti.}, KEYWORDS = {Digital Humanities (DH), Computational Linguistics (CL), History}, PAGES = {261-264}, URL = {http://aiucd2019.uniud.it/book-of-abstracts/}, CONFERENCE_NAME = {8th Annual Conference AIUCD 2019. Teaching and research in Digital Humanities' era}, CONFERENCE_PLACE = {Udine, Dipartimento Di Studi Umanistici e Patrimonio Culturale, Università di Udine}, CONFERENCE_DATE = {23-25 gennaio 2019}, } @INPROCEEDINGS{PICCINI_2019_INPROCEEDINGS_PSAMBAGE_427271, AUTHOR = {Piccini and , S. and Abrate and , M. and Bellandi and , A. and Giovannetti and , E.}, TITLE = {Rappresentazione e costruzione di risorse terminologiche diacroniche nell'era del web semantico}, YEAR = {2019}, ABSTRACT = {Con il presente contributo proponiamo un modello ed uno strumento volti a rappresentare formalmente, interrogare e visualizzare l'evoluzione diacronica di concetti e termini in un dato dominio, nel quadro del web semantico. Quest'ultimo sta attirando sempre più l'attenzione di lessicografi e terminologi computazionali, in quanto garantisce interoperabilità, facile accesso e riuso delle risorse lessicali/terminologiche all'interno di una comunità scientifica.}, KEYWORDS = {terminologia, terminologia diacronica, web semantico, lemon, lexO}, URL = {http://www.assiterm91.it/wp-content/uploads/2020/03/Piccini.pdf}, CONFERENCE_NAME = {XXIX Convegno Ass. I. Term}, CONFERENCE_PLACE = {Accademia della Crusca, Villa Medicea del Castello, Firenze}, CONFERENCE_DATE = {30-31/05/2019}, } @INPROCEEDINGS{PIRRELLI_2019_INPROCEEDINGS_P_424205, AUTHOR = {Pirrelli, V.}, TITLE = {Investigating inflection as a complex system}, YEAR = {2019}, ABSTRACT = {From a cross-linguistic perspective, different inflection systems appear to apportion word processing costs differently, depending on when and where, in the full form, morpho-lexical and morpho-syntactic information is encoded. The resulting balance is the outcome of an interaction between form frequency and morphological productivity, responding to basic communicative requirements. Big families of stem-sharing inflected forms constitute the productive core of an inflection system. This core is easy to learn, as it requires memorization of one stem only, with all inflected forms being redundantly built upon it. Unsurprisingly, generalizable paradigms are less sensitive to token frequency effects, and tend to be located in the long, low-frequency tail of the Zipfian distribution of word forms. In contrast, the head of the Zipfian distribution mostly contains small families of alternating and possibly suppletive stems, which, however shorter, morpho-phonologically simpler and easier to process, require high token frequency to be learned and resist pressure towards regularization.}, KEYWORDS = {Morphological paradigms, Mental Lexicon, Inflectional morphology}, PAGES = {23-24}, URL = {https://publications.cnr.it/doc/424205}, CONFERENCE_NAME = {International Symposium of Morphology (ISMo) 2019}, CONFERENCE_PLACE = {Université de Paris, Paris}, CONFERENCE_DATE = {25/9(2019, 27/9/2019}, } @INPROCEEDINGS{SALVATORI_2019_INPROCEEDINGS_SBD_400259, AUTHOR = {Salvatori, E. and Boschetti, F. and Del Grosso, A. M.}, TITLE = {From collaborative transcription to interdisciplinary education: the postcards of the Great War case}, YEAR = {2019}, KEYWORDS = {Digital Public History, Collaborative Philology, Text Encoding, Digital Philology, Web Application, Educational, Digital Textual Scholarship}, PAGES = {211-215}, URL = {http://amsacta.unibo.it/6361/}, DOI = {10.6092/unibo/amsacta/6361}, ISBN = {978-88-942535-3-5}, CONFERENCE_NAME = {Didattica e ricerca al tempo delle Digital Humanities / Teaching and research in Digital Humanities' era}, CONFERENCE_PLACE = {Udine}, CONFERENCE_DATE = {23-25/01/2019}, BOOKTITLE = {Didattica e ricerca al tempo delle Digital Humanities / Teaching and research in Digital Humanities' era. Ottavo Convegno Annuale 8th Annual Conference AIUCD 2019 (Udine, 23-25 gennaio 2019) Book of Abstracts}, EDITOR = {Allegrezza, S.}, } @TECHREPORT{CARDILLO_2019_TECHREPORT_CS_403463, AUTHOR = {Cardillo, F. A. and Straccia, U.}, TITLE = {Towards Ontology-based Explainable Classification of Rare Events}, YEAR = {2019}, ABSTRACT = {Rare events (e.g. major floods, violent conflicts) are events that have potentially widespread and/or disastrous impact on society. The overall goal is to build a framework capable to classify, predict and explain such rare events. To do so, we envisage the usage of a mixture of sub-symbolic Machine Learning (ML) and Ontology-based Statistical Relatio-nal Learning (OSRL) techniques to generate rare events classifiers and predictors, which additionally may be mapped into natural language to ease human interpretability of the decision process.}, KEYWORDS = {Ontologies Explainable Classification of Rare Events, Statistical Relational Machine Learning}, PAGES = {1-2}, URL = {https://hal.archives-ouvertes.fr/hal-02104520}, } @MISC{BOSCHETTI_2019_MISC_BD_430372, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {Digital Philology}, YEAR = {2019}, ABSTRACT = {Session outline: 1) Introduction 1a) Computational Linguistics and Digital Philology 1b) Collaboration vs Cooperation 1c) Defining Data Types and APIs for Scholarly Editing 1d) The Hermeneutical circle 2) Digital Ecdotics 2a) Representation of Textual Phenomena by TEI-XML 2b) Representation of Textual Phenomena by Domain-Specific Languages 2c) Visual Presentation of Encoded Data 2d) Stemma Codicum and Alignment of Variants 2e) Querying Encoded Data 3) Digital Hermeneutics 3a) Linguistic and Stylistic Analyses 3b) Thematic Analysis 3c) Interdisciplinary Approaches to Philological Issues 3d) Semantic Querying 4) Conclusion 4a) Putting All Together 4b) Further Perspectives Seminar readings - Boschetti, Federico, e Angelo Mario Del Grosso. 2015. «TeiCoPhiLib: A Library of Components for the Domain of Collaborative Philology». Journal of the Text Encoding Initiative, n. 8. https://doi.org/10.4000/jtei.1285 - Burnard, Lou. 2014. WHAT IS THE TEXT ENCODING INITIATIVE?. OpenEdition Press. http://books.openedition.org/oep/426 - Schmidt, Desmond. 2010. «The inadequacy of embedded markup for cultural heritage texts». Literary and Linguistic Computing 25 (3): 337-56. https://doi.org/10.1093/llc/fqq007 Further reading - Berti, Monica, Bridget Almas, David Dubin, Greta Franzini, Simona Stoyanova, e Gregory Ralph Crane. 2014. «The Linked Fragment: TEI and the Encoding of Text Reuses of Lost Authors». JTEI 8. https://doi.org/10.4000/jtei.1218 - Bozzi, Andrea. 2014. «Computer-assisted Scholarly Editing of Manuscript Sources». In New publication cultures in the humanities: exploring the paradigm shift, P. Davidhazi (ed.), 99-115. Amsterdam: Amsterdam University Press. http://www.oapen.org/record/515678 - Driscoll, Matthew James, e Elena Pierazzo, (eds) 2016. Digital Scholarly Editing: Theories and Practices. Vol. 4. Digital Humanities Series. Open Book Publishers. Chapters 2-4. http://www.openbookpublishers.com/product/483/digital-scholarly-editing-theories-and-practices/eec262cdd3121ebd5eb2bf78581594f2}, KEYWORDS = {digital philology, digital humanities}, URL = {https://github.com/SunoikisisDC/SunoikisisDC-2018-2019/wiki/Summer2019-Session2}, } @MISC{CARDAMONE_2019_MISC_CD_430891, AUTHOR = {Cardamone, R. D. and Del Grosso, A. M.}, TITLE = {L'edizione digitale: una risorsa per tutti}, YEAR = {2019}, ABSTRACT = {Presentazione dell'edizione digitale degli statuti quattrocenteschi di Monterosso al Mare completa di immagini, trascrizione e traduzione.}, KEYWORDS = {digital humanities, digital scholarly edition}, URL = {https://www.cfs.unipi.it/2019/11/27/gli-statuti-quattrocenteschi-di-monterosso-restituiti-alla-comunita/}, } @MISC{DELGROSSO_2019_MISC_D_430821, AUTHOR = {Del Grosso, A. M.}, TITLE = {Introduzione sistema git per edizioni collaborative}, YEAR = {2019}, ABSTRACT = {Il sistema più diffuso per il controllo di versione per risorse elettroniche (VCS - Version Control System) e' oggi "git", un sistema open source ad architettura distribuita tra i più utilizzati per lo sviluppo di grandi progetti collaborativi, come ad esempio il kernel di Linux. La prima parte del seminario introdurrà il modello generale e i principi di progettazione che sottendono l'ambiente di versionamento, come ad esempio lo stato delle risorse, il workflow di lavoro, l'ambiente in locale e quello in remoto. Verranno anche mostrati i comandi più importanti per un efficace utilizzo dello strumento, come la creazione di un repository, fare commit degli aggiornamenti e salvare le modifiche su un host remoto. L'obiettivo è quello di fornire ai partecipanti una prima panoramica sul funzionamento del sistema git facendo ampio uso dell'interfaccia a riga di comando da terminale. La seconda parte del seminario introdurrà la piattaforma github, uno dei più comuni host per la gestione remota e collaborativa di repository git. Sarà mostrata quindi sia la procedura per creare un account sulla piattaforma sia le modalità di gestione per un semplice progetto collaborativo di codifica di testi.}, KEYWORDS = {git, github, summer school, digital humanities}, URL = {http://digitaltools.labcd.unipi.it/past-editions/program2019/}, } @MISC{DELGROSSO_2019_MISC_D_430828, AUTHOR = {Del Grosso, A. M.}, TITLE = {AliEval-Revisione allineamento di testi paralleli multilingua}, YEAR = {2019}, ABSTRACT = {Sistema per la revisione di testi paralleli allineati parola per parola}, KEYWORDS = {bitext alignment, digital humaniteis, talmud}, URL = {http://omega.ilc.cnr.it/dev/proofreader/}, } @MISC{DELGROSSO_2019_MISC_D_430903, AUTHOR = {Del Grosso, A. M.}, TITLE = {Visualizzatore immagini con tiling per software evt2js}, YEAR = {2019}, ABSTRACT = {Sviluppo componente web per la visualizzazione ottimizzata delle immagini ad alta risoluzione in seno al progetto di visualizzazione di edizioni digitali scientifiche EVT. Nello specifico il visualizzatore è stato implementato per l'edizione digitale della vita di San Teobaldo. La fonte originale è conservata presso la Diocesi di Alba.}, KEYWORDS = {digital scholarly edition, EVT, viewer}, URL = {https://www.visitmudi.it/EVT/}, } @MISC{DELGROSSO_2019_MISC_DP_430373, AUTHOR = {Del Grosso, A. M. and Piccini, S.}, TITLE = {Approcci digitali e computazionali allo studio dei documenti manoscritti della tarda latinità: il caso Clavius}, YEAR = {2019}, ABSTRACT = {Introduzione alle digital humanities e alla lessicografia digitale con esempi e riferimenti tratti dal progetto Clavius on the Web.}, KEYWORDS = {digital humanities, digital philology, digital lexicography}, URL = {https://www.dipartimentidieccellenza-dilef.unifi.it/vp-122-calendario-attivita-didattica-marzo-2019.html}, } @MISC{DELGROSSO_2019_MISC_DS_445759, AUTHOR = {Del Grosso, A. M. and Spampinato, D.}, TITLE = {Edizione digitale delle Lettere di Bellini}, YEAR = {2019}, ABSTRACT = {La codifica dell'edizione scientifica digitale è condotta seguendo le ultime linee guida della Text Encoding Initiative e istanziata su un campione rappresentativo di missive. L'edizione è accessibile via web con il software open source: Edition Visualization Technology; ma è anche stata progettata per essere integrata nel percorso museale interattivo e multimediale in allestimento, senza rinunciare al rigore scientifico della trascrizione delle lettere presente nella recente edizione critica.}, KEYWORDS = {Digital Scholarly Edition, TEI, Vincenzo Bellini, Digital Correspondence}, URL = {http://licodemo.ilc.cnr.it/bellini-in-rete}, } @ARTICLE{CARDILLO_2018_ARTICLE_CFMP_396348, AUTHOR = {Cardillo, F. A. and Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Deep Learning of Inflection and the Cell-Filling Problem}, YEAR = {2018}, ABSTRACT = {Machine learning offers two basic strategies for morphology induction: lexical segmentation and surface word relation. The first approach assumes that words can be segmented into morphemes. Inferring a novel inflected form requires identification of morphemic constituents and a strategy for their recombination. The second approach dispenses with segmentation: lexical representations form part of a network of associatively related inflected forms. Production of a novel form consists in filling in one empty node in the network. Here, we present the results of a task of word inflection by a recurrent LSTM network that learns to fill in paradigm cells of incomplete verb paradigms. Although the task does not require morpheme segmentation, we show that accuracy in carrying out the inflection task is a function of the model's sensitivity to paradigm distribution and morphological structure.}, KEYWORDS = {Deep Learning, LSTM, Cell-Filling Problem}, PAGES = {57-75}, URL = {https://publications.cnr.it/doc/396348}, VOLUME = {4}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{CHIARELLA_2018_ARTICLE_CBBCRZMC_393262, AUTHOR = {Chiarella, D. and Bibuli, M. and Bruzzone, G. and Caccia, M. and Ranieri, A. and Zereik, E. and Marconi, L. and Cutugno, P.}, TITLE = {A Novel Gesture-Based Language for Underwater Human-Robot Interaction}, YEAR = {2018}, ABSTRACT = {The underwater environment is characterized by hazardous conditions that make it difficult to manage and monitor even the simplest human operation. The introduction of a robot companion with the task of supporting and monitoring the divers during their activities and operations underwater can help to solve some of the problems that usually arise in this scenario. In this context, a proper communication between the diver and the robot is imperative for the success of the dive. However, the underwater environment poses a set of technical challenges which are not readily surmountable thus limiting the spectrum from which possibilities can be chosen. This paper presents the design and development of a gesture-based communication language which has been employed for the entire duration of the European project CADDY (Cognitive Autonomous Diving Buddy). This language, the Caddian, was built upon consolidated and standardized underwater gestures that are commonly used in recreational and professional diving. Its use and integration during field tests with a remotely operated underwater vehicle (ROV) is also shown.}, KEYWORDS = {marine robotics, underwater human-robot interaction, gesture-based language, field trials}, PAGES = {19}, URL = {https://www.mdpi.com/2077-1312/6/3/91}, VOLUME = {6}, DOI = {10.3390/jmse6030091}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2077-1312}, JOURNAL = {Journal of marine science and engineering}, } @ARTICLE{DISEGNI_2018_ARTICLE_D_395302, AUTHOR = {Di Segni, D. G.}, TITLE = {Il Talmud nella nuova traduzione italiana}, YEAR = {2018}, ABSTRACT = {Origine, struttura e caratteristiche del Talmud. I roghi e la censura del Talmud. La nuova traduzione italiana del Talmud. La rilevanza del Talmud nel mondo contemporaneo.}, KEYWORDS = {Talmud, Istituto Linguistica Computazionale CNR, Traduco}, PAGES = {633-644}, URL = {https://publications.cnr.it/doc/395302}, VOLUME = {XV}, PUBLISHER = {Il Mulino (Bologna, Italia)}, ISSN = {1824-0771}, JOURNAL = {Nuova informazione bibliografica}, } @ARTICLE{FERRO_2018_ARTICLE_FMP_397012, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Discriminative word learning is sensitive to inflectional entropy}, YEAR = {2018}, ABSTRACT = {Psycholinguistic evidence based on inflectional and derivational word families has emphasised the combined role of Paradigm Entropy and Inflectional Entropy in human word processing. Although the way frequency distributions affect behavioural evidence is clear in broad outline, we still miss a clear algorithmic model of how such a complex interaction takes place and why. The main challenge is to understand how the local interaction of learning and processing principles in morphology can result in global effects that require knowledge of the overall distribution of stems and affixes in word families. We show that principles of discriminative learning can shed light on this issue. We simulate learning of verb inflection with a discriminative recurrent network of specialised processing units, whose level of temporal connectivity reflects the frequency distribution of input symbols in context. We analyse the temporal dynamic with which connection weights are adjusted during discriminative learning, to show that self-organised connections are optimally functional to word processing when the distribution of inflected forms in a paradigm (Paradigm Entropy) and the distribution of their inflectional affixes across paradigms (Inflectional Entropy) diverge minimally.}, KEYWORDS = {discriminative learning, word processing, recurrent neural networks, relative entropy}, PAGES = {307-327}, URL = {https://www.rivisteweb.it/doi/10.1418/91871}, VOLUME = {XVII}, DOI = {10.1418/91871}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{GOGGI_2018_ARTICLE_GPRBM_388612, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2018}, ABSTRACT = {"In the electronic age, the World Wide Web has played a major role in making scientific information accessible to a wide audience more rapidly and efficiently. This democratic approach to information dissemination in science is changing the way science is perceived and implemented in our daily lives" (Weintraub, 2000).}, KEYWORDS = {CLARIN-IT, CLARIN-European Research Infrastructure for Language Resources and Technology, Grey Literature}, PAGES = {87-93}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85048643343\&origin=inward}, VOLUME = {14}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{PECCHIOLI_2018_ARTICLE_PABGM_397525, AUTHOR = {Pecchioli, A. and Albanesi, D. and Bellandi, A. and Giovannetti, E. and Marchi, S.}, TITLE = {Annotazione Linguistica Automatica dell'Ebraico Mishnaico: Esperimenti sul Talmud Babilonese}, YEAR = {2018}, ABSTRACT = {The automatic linguistic analysis of ancient Hebrew represents a new research opportunity in the field of Jewish studies. In fact, very little has been produced, both in terms of linguistic resources and, above all, of tools for the analysis of ancient Hebrew. This article illustrates a work born within the Italian Translation of the Babylonian Talmud Project aimed at the construction of an automatic linguistic annotator of Mishnaic Hebrew.}, KEYWORDS = {Babylonian Talmud, Natural Language Processing, Mishnaic Hebrew}, PAGES = {281-291}, URL = {http://aisg.cise.unipi.it/Materia-giudaica-2018/018-Pecchioli%20pp%20281-292B.pdf}, VOLUME = {XXIII}, PUBLISHER = {Giuntina (Firenze, Italia)}, ISSN = {2282-4499}, JOURNAL = {Materia giudaica Print}, } @BOOK{GARCIAMACHO_2018_BOOK_GS_389832, AUTHOR = {Garcia Macho, M. L. and Sassi, M.}, TITLE = {Léxico del Tratado del esphera y del arte de marear con el regimiento de las alturas, con algunas reglas nuevamente escritas muy necessarias de Francisco de Falero}, YEAR = {2018}, ABSTRACT = {El léxico del Tratado del esphera y del Arte del marear de Francisco Faleiro, forma parte del conjunto lexicográfico del Diccionario de la navegación del Siglo de Oro. Para la realización de este diccionario, se ha contado con dos proyectos de investigación: HUM2006, financiado por el Ministerio de Educación y Ciencia de España, y FFI2012-36768, del Ministerio de Economía y Competitividad y cuatro ayudas de movilidad: dos concedidas por el Consiglio Nazionale della Ricerca italiano, CNR [Istituto di Linguistica Computazionale de Italia (2006 y 2007)] y dos por el Ministerio de Ciencia e Innovación de España [Programa de Estancias de Profesores de Universidad e Investigadores del CSIC en Centros de Investigación Extranjeros (2005 y 2010)]. Este volumen contiene la concordancia lematizada, los índices de frecuencia de lemas y formas, los índices de los nombres propios y el diccionario inverso del Tratado.}, KEYWORDS = {Indici vari, Dizionario della Navigazione, Siglo de Oro, Concordanze per lemma}, PAGES = {1-488}, URL = {http://portal.uned.es/portal/page?_pageid=93,62295002\&_dad=portal\&_schema=PORTAL}, ISBN = {978-84-362-7383-0}, } @INCOLLECTION{AGNOLONI_2018_INCOLLECTION_AV_423867, AUTHOR = {Agnoloni, T. and Venturi, G.}, TITLE = {Semantic processing of legal texts}, YEAR = {2018}, ABSTRACT = {The paper provides an overview of the field of semantic processing of legal texts, combining views and perspectives from the computational linguistic and Artificial Intelligence and Law (AI \& Law) communities. The last few years have seen a growing body of research and practice in the field of AI \& Law which addresses a range of topics: semantic and cross-language legal Information Retrieval, document classification, legal drafting, legal knowledge extraction, automated legal argumentation, as well as the construction of legal ontologies and their application. The increasing availability of legal corpora accessible as processable data is making viable their partially automated conversion into legal knowledge bases. In this context, it is of paramount importance the use of Natural Language Processing (NLP) techniques and tools that automate the process of knowledge extraction from legal texts. Accordingly, the paper aims at discussing how the two research communities can benefit from the interaction of the different perspectives: the legal artificial intelligence community can gain insight into state-of-the-art linguistic technologies, tools and resources, and the computational linguists can take advantage of the large and often multilingual legal resources (corpora as well as lexicons and ontologies) for training, domain adaptation and evaluation of current NLP technologies and tools. The authors will present an overview on semantic resources for legal texts annotation and processing. Different kind of resources (linguistic, lexical, conceptual, formal) will be introduced and their differences, methodological premises, intended use and possible integration will be highlighted. The peculiarities of the legal domain and legal language will be discussed in relation with the construction and use of legal semantic resources. The issue of multilingualism, multilingual and multi-legal system access to legal information will be also discussed showing how formalized lexical, linguistic and conceptual legal resources can support the task. How NLP tools and techniques can be fruitfully exploited to semantically process collections of legal texts will be introduced in the second part of the paper. In particular, the authors will show how they can be used to automatically extract the relevant knowledge contained in legal text corpora, to structure the extracted knowledge in semantic resources (such as domain-specific ontologies or thesauri), and to semantically annotate the texts with the extracted information to pave the way to content-based access and querying.}, KEYWORDS = {Semantic Processing, Natural Language Processing, Ontology Learning, Legal Texts}, PAGES = {109-137}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85061292435\&origin=inward}, DOI = {10.1515/9781614514664-006}, PUBLISHER = {Walter De Gruyter Inc (Boston/Berlin/Munich, USA)}, ISBN = {978-1-61451-669-9}, } @INCOLLECTION{MARZI_2018_INCOLLECTION_M_390949, AUTHOR = {Marzi, C.}, TITLE = {Morpho - phonotactic typicality and second language acquisition and processing}, YEAR = {2018}, ABSTRACT = {According to many accounts of word processing and access, an input word concurrently activates non-target lexical neighbours that become available for further processing stages. Psycholinguistic evidence shows how prediction and competition based on word similarity and lexical redundancy affect speakers' anticipation of incoming stimuli, so as to speed input recognition and improve lexical decision (Luce/Pisoni 1998; Bailey/Hahn 2001; Hahn/Bailey 2005, among others). As observed by Bailey and Hahn (2001), wordlikeness affects both language acquisition and processing. Wordlikeness can be defined in terms of phonotactic/ orthotactic likelihood and lexical density. Both neighbourhood size and frequency distribution of neighbours are known to play a role in word prediction and competition. In this perspective, monitoring this competing behaviour can shed some light on the relationship between phonotactic/orthotactic likelihood and lexical density, and their connection with issues of word recognition and production. My goal in this chapter is to provide a computational model of bilingual lexical self-organisation, with language-independent architectural and functional requirements of the lexical store, together with language-specific phonotactic constraints, appearing to control aspects of interaction of first and second language (hereafter L1-L2) and define the propensity to acquire novel words, showing how acquisitional strategies are affected by past knowledge of language and entrenched expectations on incoming stimuli. On the one hand, a strong expectation based on L1 affects the way L2 inputs are perceived. On the other hand, language-independent architectural and functional requirements of the lexical store, such as its highly integrated organisation and language-non-selective access (Dijkstra/van Heuven 2002), appear to control aspects of L1-L2 interaction. Simulations in the neuro-computational framework of Temporal Self-Organising Maps (TSOMs, Ferro et al. 2011; Marzi et al. 2012, 2014a, 2016; Pirrelli et al. 2014, 2015), where word processing and lexical acquisition are implemented as recoding and storage strategies for time-series of symbolic units, will highlight how partially overlapping phonological representations may cause competition in incremental learning, and how weaker connections and recycled memory resources make L2 representations underspecified due to the lack of strong lexical expectations and selective specialisation typical of the L1 representations.}, KEYWORDS = {L1-L2 acquisition, bilingual lexical self-organisation, phonotactic typicality, discriminative recurrent network}, PAGES = {219-232}, URL = {https://www.francoangeli.it/Ricerca/Scheda_Libro.aspx?ID=25216\&Tipo=Libro\&strRicercaTesto=25216\&lingua=it\&titolo=tipologia%2c+acquisizione%2c+grammaticalizzazione.+typology%2c++acquisition%2c+grammaticalization+studies}, VOLUME = {1095. 79}, PUBLISHER = {Franco Angeli (Milano, ITA)}, ISBN = {978-88-917-7847-5}, BOOKTITLE = {Tipologia, Acquisizione, Grammaticalizzazione-Typology, Acquisition, Grammaticalization studies}, EDITOR = {Chini, M. and Cuzzolin, P.}, } @INCOLLECTION{MONACHINI_2018_INCOLLECTION_MNS_387374, AUTHOR = {Monachini, M. and Nicolosi, A. and Stefanini, A.}, TITLE = {Digital Classics and CLARIN-IT: What Italian Scholars of Ancient Greek Expect from Digital Resources and Technology}, YEAR = {2018}, ABSTRACT = {This paper presents and discusses the findings of a survey carried out to assess the use of digital resources and digital technologies with respect to work in ancient Greek scholarship, with the aim to identify the factors that are likely to constrain its use as well as to elicit needs and requirements of ancient Greek scholars in Italy. The survey is in line with the principles behind the user engagement strategy developed by CLARIN-ERIC and constitutes one of the national efforts undertaken by CLARIN-IT to contribute to the wider impact of CLARIN on Digital Classicists. The survey, as well as other surveys carried out in the sector in the last decade, points out that most of the available resources do not respond to users' requirements. This motivated us to develop a mock-up of a digital editor of Archilochus, which, mostly grounded on previous studies by Nicolosi, draws on the outcomes of the survey. The experiment includes a sample prototype to submit for evaluation by end-users. The final aim is to identify good practices and new models to enable new approaches to the study of classical texts and profile a new workbench for scholarly digital edition.}, KEYWORDS = {Digital Classics, User Involvement, User requirements, CLARIN ERIC, CLARIN Infrastructure}, PAGES = {61-74}, URL = {https://ep.liu.se/ecp/147/006/ecp17147006.pdf}, VOLUME = {147}, ISBN = {978-91-7685-273-6}, BOOKTITLE = {Selected papers from the CLARIN Annual Conference 2017, Budapest, 18-20 September 2017}, } @INCOLLECTION{PIRRELLI_2018_INCOLLECTION_P_398877, AUTHOR = {Pirrelli, V.}, TITLE = {Morphological Theory And Computational Linguistics}, YEAR = {2018}, ABSTRACT = {For decades, processing issues have taken centre stage in the debate on the theoretical foundations of linguistic morphology. The present chapter provides a computer-based, algorithmic view on these issues, ranging from the encoding of input data to the structure of output representations, going through the basic operations of word splitting, storage, access, retrieval, and assembly of intermediate representations.}, KEYWORDS = {word processing, word storage, computational morphology, lexical modelling, machine language learning, finite state technology, artificial neural networks}, PAGES = {573-593}, URL = {http://www.oxfordhandbooks.com/view/10.1093/oxfordhb/9780199668984.001.0001/oxfordhb-9780199668984-e-32?rskey=qZuY8Z\&result=9}, DOI = {10.1093/oxfordhb/9780199668984.013.32}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {978-0-19-966898-4}, BOOKTITLE = {The Oxford Handbook of Morphological Theory}, EDITOR = {Audring, J. and Masini, F.}, } @EDITORIAL{ASCOLI_2018_EDITORIAL_AD_395313, AUTHOR = {Ascoli, M. and Di Segni, G.}, TITLE = {Talmud Babilonese - Trattato Ta'anìt}, YEAR = {2018}, ABSTRACT = {Traduzione e commento del trattato Ta'anit (Digiuno) del Talmud Babilonese con testo originale a fronte}, KEYWORDS = {Talmud, Traduco, Linguistica computazionale}, PAGES = {332}, URL = {https://www.talmud.it/}, VOLUME = {9}, PUBLISHER = {Giuntina (Firenze, ITA)}, ISBN = {978-88-8057-748-5}, } @EDITORIAL{BRANCO_2018_EDITORIAL_BCC_401835, AUTHOR = {Branco, A. and Calzolari, N. and Choukri, K.}, TITLE = {4REAL 2018 Workshop on Replicability and Reproducibility of Research Results in Science and Technology of Language Proceedings}, YEAR = {2018}, ABSTRACT = {This workshop sought to contribute to the discussion and the advancement on a topic that has been given insufficient attention in the research area of language processing tools and resources and that has been an important topic emerging in other scientific areas, continuing the objectives of the first edition of the 4REAL workshop, at LREC 2016. We invited the submission of articles that present cases, either with positive or negative results, of actual replication or reproduction exercises of previous published results in our area.}, KEYWORDS = {Reproduction, Replication, Validation}, PAGES = {1-36}, URL = {http://4real2018.di.fc.ul.pt/wp-content/uploads/2018/05/lrec2018_workshop_proceedings_4REAL.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-21-4}, } @EDITORIAL{CALZOLARI_2018_EDITORIAL_CCCDGHIMMMMOPT_401744, AUTHOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, TITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC-2018)}, YEAR = {2018}, ABSTRACT = {It is the LREC 20th Anniversary and LREC has become one of the most successful conferences of the field. Data are pervasive in Natural Language Processing and Language Technology: we call our data Language Resources (LR). But when LREC was started by ELRA, in 1998 in Granada, from an idea of Antonio Zampolli and Joseph Mariani, it was really a new adventure and a challenge. There were well established big conferences but he thought that the new emerging field of Language Resources deserved its own dedicated forum. In the keynote talk I gave at LREC1998 I could say: "the infrastructural role of Language Resources as the necessary common platform on which new technologies and applications can be based is nowadays widely recognised." This could not have been said only few years before. I had the pleasure and the honour of being involved in LREC from the beginning, first as member of the Program Committee and since 2004 as Conference Chair.}, KEYWORDS = {Language Resources, Language Technology}, PAGES = {1-4628}, URL = {https://www.aclweb.org/anthology/L18-1}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, } @EDITORIAL{SORIA_2018_EDITORIAL_SBP_387365, AUTHOR = {Soria, C. and Besacier, L. and Pretorius, L.}, TITLE = {Proceedings of CCURL 2018-Sustaining knowledge diversity in the digital age}, YEAR = {2018}, ABSTRACT = {Proceedings of the CCURL 2018 workshop}, KEYWORDS = {knowledge diversity, digital age, language resources, language technologies}, PAGES = {i-75}, URL = {http://lrec-conf.org/workshops/lrec2018/W26/pdf/book_of_proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-22-1}, } @EDITORIAL{BERNHARD_2018_EDITORIAL_BS_443019, AUTHOR = {Bernhard, D. and Soria, C.}, TITLE = {Automatic processing of under-resourced languages|Traitement automatique des langues peu dotées}, YEAR = {2018}, KEYWORDS = {less-resourced languages, NLP}, PAGES = {7-14}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85063404297\&origin=inward}, VOLUME = {59}, PUBLISHER = {TAL (Saint-Cloud, Francia)}, ISSN = {1248-9433}, BOOKTITLE = {TAL. Traitement automatique des langues}, } @INPROCEEDINGS{ADORNI_2018_INPROCEEDINGS_ADKTV_385339, AUTHOR = {Adorni, G. and Dell'Orletta, F. and Koceva, F. and Torre, I. and Venturi, G.}, TITLE = {Extracting dependency relations from digital learning content}, YEAR = {2018}, ABSTRACT = {Digital Libraries present tremendous potential for developing e-learning applications, such as text comprehension and question-answering tools. A way to build this kind of tools is structuring the digital content into relevant concepts and dependency relations among them. While the literature offers several approaches for the former, the identification of dependencies, and specifically of prerequisite relations, is still an open issue. We present an approach to manage this task.}, KEYWORDS = {Prerequisite relationship, Concept extraction, Graph mining}, PAGES = {114-119}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85041860435\&origin=inward}, VOLUME = {806}, DOI = {10.1007/978-3-319-73165-0_11}, PUBLISHER = {Springer (Heidelberg, Germania)}, ISSN = {1865-0929}, CONFERENCE_NAME = {14th Italian Research Conference on Digital Libraries (IRCDL 2018)}, CONFERENCE_PLACE = {Udine}, CONFERENCE_DATE = {25-26 gennaio 2018}, BOOKTITLE = {Communications in computer and information science (Print)}, } @INPROCEEDINGS{ALZETTA_2018_INPROCEEDINGS_ADMSV_391617, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Simi, M. and Venturi, G.}, TITLE = {Assessing the Impact of Iterative Error Detection and Correction. A Case Study on the Italian Universal Dependency Treebank}, YEAR = {2018}, ABSTRACT = {Detection and correction of errors and inconsistencies in "gold treebanks" are becoming more and more central topics of corpus annotation. The paper illustrates a new incremental method for enhancing treebanks, with particular emphasis on the extension of error patterns across different textual genres and registers. Impact and role of corrections have been assessed in a dependency parsing experiment carried out with four different parsers, whose results are promising. For both evaluation datasets, the performance of parsers increases, in terms of the standard LAS and UAS measures and of a more focused measure taking into account only relations involved in error patterns, and at the level of individual dependencies.}, KEYWORDS = {Error Detection, Universal Dependency Treebanks, Syntactic parsing}, PAGES = {1-7}, URL = {http://universaldependencies.org/udw18/PDFs/39_Paper.pdf}, ISBN = {978-1-948087-84-1}, CONFERENCE_NAME = {Universal Dependencies Workshop 2018 (UDW 2018)}, CONFERENCE_PLACE = {Brussels}, CONFERENCE_DATE = {01/11/2018}, } @INPROCEEDINGS{ALZETTA_2018_INPROCEEDINGS_ADMV_382333, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Dangerous Relations in Dependency Treebanks}, YEAR = {2018}, ABSTRACT = {The paper illustrates an effective and innovative method for detecting erroneously annotated arcs in gold dependency treebanks based on an algorithm originally developed to measure the reliability of automatically produced dependency relations. The method permits to significantly restrict the error search space and, more importantly, to reliably identify patterns of systematic recurrent errors which represent dangerous evidence to a parser which tendentially will replicate them. Achieved results demonstrate effectiveness and reliability of the method.}, KEYWORDS = {Dependency treebanks, Error Detection, Linguistic Annotation}, PAGES = {201-210}, URL = {http://aclweb.org/anthology/W/W17/W17-7624.pdf}, ISBN = {978-80-88132-04-2}, CONFERENCE_NAME = {16th International Workshop on Treebanks and Linguistic Theories}, CONFERENCE_PLACE = {Praga}, CONFERENCE_DATE = {23-24 gennaio 2018}, } @INPROCEEDINGS{ALZETTA_2018_INPROCEEDINGS_ADMV_385342, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Universal Dependencies and Quantitative Typological Trends. A Case Study on Word Order}, YEAR = {2018}, ABSTRACT = {The paper presents a new methodology aimed at acquiring typological evidence from "gold" treebanks for different languages. In particular, it investigates whether and to what extent algorithms developed for assessing the plausibility of automatically produced syntactic annotations could contribute to shed light on key issues of the linguistic typological literature. It reports the first and promising results of a case study focusing on word order patterns carried out on three different languages (English, Italian and Spanish).}, KEYWORDS = {Linguistic Knowledge Extraction, Dependency Treebanks, Linguistic Typology}, PAGES = {4540-4549}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/1109.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Proceedings of the 11th Edition of the Language Resources and Evaluation Conference (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki (Japan)}, CONFERENCE_DATE = {7-12 maggio 2018}, } @INPROCEEDINGS{BARTOLINI_2018_INPROCEEDINGS_BGMP_387159, AUTHOR = {Bartolini, R. and Goggi, S. and Monachini, M. and Pardelli, G.}, TITLE = {The LREC Workshops Map}, YEAR = {2018}, ABSTRACT = {The aim of this work is to present an overview of the research presented at the LREC workshops over the years 1998-2016 with the aim to shed light on the community represented by workshop participants in terms of country of origin, type of affiliation, gender. There has been also an effort towards the identification of the major topics dealt with as well as of the terminological variations noticed in this time span. Data has been retrieved from the portal of the European Language Resources Association (ELRA) which organizes the conference and the resulting corpus made up of workshops titles and of the related presentations has then been processed using a term extraction tool developed at ILC-CNR.}, KEYWORDS = {corpus creation, terminology, LREC}, PAGES = {557-562}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/summaries/639.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{BOSCO_2018_INPROCEEDINGS_BSDPT_398987, AUTHOR = {Bosco, C. and Sanguinetti, M. and Dell'Orletta, F. and Poletto, F. and Tesconi, M.}, TITLE = {Overview of the EVALITA 2018 hate speech detection task}, YEAR = {2018}, ABSTRACT = {The Hate Speech Detection (HaSpeeDe) task is a shared task on Italian social media (Facebook and Twitter) for the detection of hateful content, and it has been proposed for the first time at EVALITA 2018. Providing two datasets from two different online social platforms differently featured from the linguistic and communicative point of view, we organized the task in three tasks where systems must be trained and tested on the same resource or using one in training and the other in testing: HaSpeeDe-FB, HaSpeeDe-TW and Cross-HaSpeeDe (further subdivided into Cross-HaSpeeDe FB and Cross-HaSpeeDe TW sub-tasks). Overall, 9 teams participated in the task, and the best system achieved a macro F1-score of 0.8288 for HaSpeeDe-FB, 0.7993 for HaSpeeDe-TW, 0.6541 for Cross-HaSpeeDe FB and 0.6985 for Cross-HaSpeeDe TW. In this report, we describe the datasets released and the evaluation measures, and we discuss results.}, KEYWORDS = {Hate Speech Detection, Social Media Analysis}, PAGES = {9}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-85058647605\&partnerID=q2rCbXpz}, VOLUME = {2263}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {EVALITA 2018-Sixth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian}, CONFERENCE_PLACE = {Torino, Italia}, CONFERENCE_DATE = {10-12/12/2018}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{BRUNATO_2018_INPROCEEDINGS_BDDIV_391619, AUTHOR = {Brunato, D. and De Mattei, L. and Dell'Orletta, F. and Iavarone, B. and Venturi, G.}, TITLE = {Is this sentence difficult? Do you agree?}, YEAR = {2018}, ABSTRACT = {In this paper, we present a crowdsourcing-based approach to model the human perception of sentence complexity. We collect a large corpus of sentences rated with judgments of complexity for two typologically-different languages, Italian and English. We test our approach in two experimental scenarios aimed to investigate the contribution of a wide set of lexical, morpho-syntactic and syntactic phenomena in predicting i) the degree of agreement among annotators independently from the assigned judgment and ii) the perception of sentence complexity.}, KEYWORDS = {Linguistic complexity, Crowdsourcing, Human perception}, PAGES = {1-10}, URL = {https://www.aclweb.org/anthology/D18-1289/}, DOI = {10.18653/v1/D18-1289}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-948087-84-1}, CONFERENCE_NAME = {Conference on Empirical Methods in Natural Language Processing (EMNLP)}, CONFERENCE_PLACE = {Brussels}, CONFERENCE_DATE = {31/10/2018-04/11/2018}, } @INPROCEEDINGS{CALZOLARI_2018_INPROCEEDINGS_C_401831, AUTHOR = {Calzolari, N.}, TITLE = {Introduction to LREC 2018 by Nicoletta Calzolari Chair of the 11th edition of LREC ELRA Honorary President}, YEAR = {2018}, ABSTRACT = {It is the LREC 20th Anniversary and LREC has become one of the most successful conferences of the field. Data are pervasive in Natural Language Processing and Language Technology: we call our data Language Resources (LR).}, KEYWORDS = {Language Resources, Language Technology}, PAGES = {1-6}, URL = {https://www.aclweb.org/anthology/L18-1}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, JAPAN}, CONFERENCE_DATE = {MAY 7-12, 2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{CHIRIATTI_2018_INPROCEEDINGS_CDDMPSV_423871, AUTHOR = {Chiriatti, G. and Della Gala, V. and Dell'Orletta, F. and Montemagni, S. and Pettenati, M. C. and Sagri, M. T. and Venturi, G.}, TITLE = {A NLP-based analysis of reflective writings by Italian teachers}, YEAR = {2018}, ABSTRACT = {This paper reports first results of a wider study devoted to exploit the potentialities of a NLP-based approach to the analysis of a corpus of reflective writings on teaching activities. We investigate how a wide set of linguistic features allows reconstructing the linguistic profile of the texts written by the Italian teachers and predicting whether are reflective.}, KEYWORDS = {Natural Language Processing, Reflective Writings, Linguistic Profiling, Document Classification}, PAGES = {1-7}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85057733802\&origin=inward}, VOLUME = {2253}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {5th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {10-12/12/2018}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{CIMINO_2018_INPROCEEDINGS_CDBV_423870, AUTHOR = {Cimino, A. and Dell'Orletta, F. and Brunato, D. and Venturi, G.}, TITLE = {Sentences and documents in native language identification}, YEAR = {2018}, ABSTRACT = {Starting from a wide set of linguistic features, we present the first in depth feature analysis in two different Native Language Identification (NLI) scenarios. We compare the results obtained in a traditional NLI document classification task and in a newly introduced sentence classification task, investigating the different role played by the considered features. Finally, we study the impact of a set of selected features extracted from the sentence classifier in document classification.}, KEYWORDS = {Natural Language Processing, Native Language Identification}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85057749754\&origin=inward}, VOLUME = {2253}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {5th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {10-12/12/2018}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{COCCIU_2018_INPROCEEDINGS_CBVD_423873, AUTHOR = {Cocciu, E. and Brunato, D. and Venturi, G. and Dell'Orletta, F.}, TITLE = {Gender and Genre Linguistic profiling: A case study on female and male journalistic and diary prose}, YEAR = {2018}, ABSTRACT = {This paper intends to investigate the linguistic profile of male- and female-authored texts belonging to two very different textual genres: newspaper articles and diary prose. By using a wide set of linguistic features automatically extracted from text and spanning across different levels of linguistic description, from lexicon to syntax, our analysis highlights the peculiarities of the two examined genres and how the genre dimension is influenced by variation depending on author's gender (and vice versa).}, KEYWORDS = {Natural Language Processing, Genre Classification, Linguistic Profiling}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85057759773\&origin=inward}, VOLUME = {2253}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {5th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {10-12/12/2018}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{DEFELICE_2018_INPROCEEDINGS_DDVLM_423872, AUTHOR = {De Felice, I. and Dell'Orletta, F. and Venturi, G. and Lenci, A. and Montemagni, S.}, TITLE = {Italian in the Trenches: Linguistic annotation and analysis of texts of the great war}, YEAR = {2018}, ABSTRACT = {The paper illustrates the design and development of a textual corpus representative of the historical variants of Italian during the Great War, which was enriched with linguistic (lemmatization and pos-tagging) and meta-linguistic annotation. The corpus, after a manual revision of the linguistic annotation, was used for specializing existing NLP tools to process historical texts with promising results.}, KEYWORDS = {Natural Language Processing, Automatic Linguistic Annotation}, PAGES = {1-5}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85057734451\&origin=inward}, VOLUME = {2253}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {5th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {10-12/12/2018}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{DELGRATTA_2018_INPROCEEDINGS_DGPC_387155, AUTHOR = {Del Gratta, R. and Goggi, S. and Pardelli, G. and Calzolari, N.}, TITLE = {LREMap, a Song of Resources and Evaluation}, YEAR = {2018}, ABSTRACT = {After 8 years we revisit the LRE Map of Language Resources, introduced at LREC 2010, to try to get a picture of the field and its evolution as reflected by the creation and use of Language Resources. The purpose of the Map was in fact "to shed light on the vast amount of resources that represent the background of the research presented at LREC". It also aimed at a "change of culture in the field, actively engaging each researcher in the documentation task about resources". The data analysed here have been provided by the authors of several conferences during the phase of submission of papers, and contain information about ca. 7500 resources. We analysed the LRE Map data from many different viewpoints and the paper reports on the global picture, on different trends emerging from the diachronic perspective and finally on some comparisons between the 2 major conferences present in the Map: LREC and COLING.}, KEYWORDS = {LR Infratructure, Metadata, LR Documentation}, PAGES = {1275-1281}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/summaries/300.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{DELGROSSO_2018_INPROCEEDINGS_DBGMN_390296, AUTHOR = {Del Grosso, A. M. and Bellandi, A. and Giovannetti, E. and Marchi, S. and Nahli, O.}, TITLE = {Scanning is Just the Beginning: Exploiting Text and Language Technologies to Enhance the Value of Historical Manuscripts}, YEAR = {2018}, ABSTRACT = {In this paper we present a digital process for the explicitation of the textual, linguistic and semantic content of historical manuscripts. The proposed workflow is composed of a sequence of incremental steps, each of which is described both on a methodological and practical perspective. The steps are: 1) visualization and structuring of metadata, 2) transcription, 3) structural encoding, 4) annotation, 5) lexical and conceptual structuring.}, KEYWORDS = {Computational Lexica, Digital Scholarly Editing, Digital Humanities, al-Qamus al-Muhit}, PAGES = {214-219}, URL = {https://publications.cnr.it/doc/390296}, DOI = {10.1109/CIST.2018.8596373}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {CIST 2018 WH-MNLP}, CONFERENCE_PLACE = {MARRAKECH, MOROCCO}, CONFERENCE_DATE = {21-27/10/2018}, BOOKTITLE = {Colloquium in Information Science and Technology, CIST}, EDITOR = {Al Achhab, M. and El Mohajir, M. and Jellouli, I. and El Mohajir, B. E.}, } @INPROCEEDINGS{FERRO_2018_INPROCEEDINGS_FCGMNCP_390504, AUTHOR = {Ferro, M. and Cappa, C. and Giulivi, S. and Marzi, C. and Nahli, O. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: Reading for Understanding}, YEAR = {2018}, ABSTRACT = {This paper focuses on motivation, objectives, design issues and preliminary results of ReadLet, an ICT platform for assessing reading efficiency in primary school children. Test data are discussed on a sample of 200 early graders, reading French, Italian and Standard Modern Arabic (SMA).}, KEYWORDS = {Reading, text comprehension, Specific Learning Disorders, multimodal signal processing, cloud computing, portable assistive technology}, PAGES = {404-409}, URL = {https://publications.cnr.it/doc/390504}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {IEEE-CIST2018 LED-ICT}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {21-27/10/2018}, } @INPROCEEDINGS{GOGGI_2018_INPROCEEDINGS_GPRBM_385571, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2018}, ABSTRACT = {This work will provide a map of the documentation archived in the CLARIN infrastructure, whose purpose is to share language resources produced and managed in the various European countries but finally merged into the CLARIN data centers for allowing access, interoperability, reuse and preservation of scientific documentation as well as Grey Literature.}, KEYWORDS = {CLARIN Infrastructure, Language Resources, Grey Literature}, PAGES = {93-99}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2018/03/GL19_Conference_Proceedings.pdf}, VOLUME = {19}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-31-9}, CONFERENCE_NAME = {Nineteenth International Conference on Grey Literature, GL19}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {October 23-24, 2017}, BOOKTITLE = {Nineteenth International Conference on Grey Literature "Public Awareness and Access to Grey Literature"}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{KHAN_2018_INPROCEEDINGS_KBFM_387178, AUTHOR = {Khan, F. and Bellandi, A. and Frontini, F. and Monachini, M.}, TITLE = {One Language to rule them all: modelling Morphological Patterns in a Large Scale Italian Lexicon with SWRL}, YEAR = {2018}, ABSTRACT = {We present an application of Semantic Web Technologies to computational lexicography. More precisely we describe the publication of the morphological layer of the Italian Parole Simple Clips lexicon (PSC-M) as linked open data. The novelty of our work is in the use of the Semantic Web Rule Language (SWRL) to encode morphological patterns, thereby allowing the automatic derivation of the inflectional variants of the entries in the lexicon. By doing so we make these patterns available in a form that is human readable and that therefore gives a comprehensive morphological description of a large number of Italian word.}, KEYWORDS = {Morphology, Linked Open Data, Italian Lexicon, SWRL, SQVRL}, PAGES = {4385-4389}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/844.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N.}, } @INPROCEEDINGS{MARZI_2018_INPROCEEDINGS_MFNBBP_388016, AUTHOR = {Marzi, C. and Ferro, M. and Nahli, O. and Belik, P. and Bompolas, S. and Pirrelli, V.}, TITLE = {Evaluating Inflectional Complexity Crosslinguistically: a Processing Perspective}, YEAR = {2018}, ABSTRACT = {The paper provides a cognitively motivated method for evaluating the inflectional complexity of a language, based on a sample of "raw" inflected word forms processed and learned by a recurrent self-organising neural network with fixed parameter setting. Training items contain no information about either morphological content or structure. This makes the proposed method independent of both meta-linguistic issues (e.g. format and expressive power of descriptive rules, manual or automated segmentation of input forms, number of inflectional classes etc.) and language-specific typological aspects (e.g. word-based, stem-based or template-based morphology). Results are illustrated by contrasting Arabic, English, German, Greek, Italian and Spanish.}, KEYWORDS = {paradigm-based morphology, inflectional complexity, prediction-based processing, recurrent self-organising networks, Statistical And Machine Learning Methods, Language Modelling}, PAGES = {3860-3866}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/summaries/745.html}, VOLUME = {2018}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{MONACHINI_2018_INPROCEEDINGS_MK_387203, AUTHOR = {Monachini, M. and Khan, A. F.}, TITLE = {Towards the Construction of a Lexical Data and Technology Ecosystem: The Experience of ILC-CNR}, YEAR = {2018}, ABSTRACT = {This paper describes the activities and projects being carried on at the "A. Zampolli" Institute for Computational Linguistics (ILC) at the crossroads between computational lexicography and e- lexicography and that are intended to assist in the creation of a queryable and interconnected ecosystem of standardised lexicographic datasets and technologies.}, KEYWORDS = {e-lexicography, computational lexicography, lexical resources, standards, LOD}, PAGES = {52-54}, URL = {https://globalex.link/globalex2018/wp-content/uploads/2018/03/Globalex-2018_proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-28-3}, CONFERENCE_NAME = {LREC 2018 Workshop "Globalex 2018-Lexicography \& WordNets}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the LREC 2018 Workshop "Globalex 2018-Lexicography \& WordNets"}, EDITOR = {Kernerman, I. and Krek, S.}, } @INPROCEEDINGS{NAHLI_2018_INPROCEEDINGS_N_390405, AUTHOR = {Nahli, O.}, TITLE = {Arabic Language Alignment with English Ontologies-Some Ontological Reflections}, YEAR = {2018}, ABSTRACT = {There have been several attempts to build lexico-conceptual resources by extension of the English WordNet, i.e. by means of translation of English synsets. However, the extension approach is arguable because it assumes that the target resource is isomorphic to English WordNet. Yet, some languages, such as English and Arabic, can be very different. The problem would be to know, first, whether they conceptualize reality in the same way; and if not, to identify different concepts types. The mapping of a lexical resource of a different language onto Princeton WordNet of English (PWN) answers these questions. The experiment, in this article, describes results obtained from mapping the Arabic dictionary, al=q?m?s al=mu???, onto English WordNet and SUMO (Standard Upper Merged Ontology), also developed for the English language.}, KEYWORDS = {Ontology, concept, Arabic, PWN, SUMO, al=q?m?s al=mu???}, PAGES = {7}, URL = {https://publications.cnr.it/doc/390405}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {CIST 2018 WH-MNLP}, CONFERENCE_PLACE = {MARRAKECH, MAROCCO}, CONFERENCE_DATE = {21-27/10/2018}, } @INPROCEEDINGS{NICOLAS_2018_INPROCEEDINGS_NKMDCAEBQS_387361, AUTHOR = {Nicolas, L. and König, A. and Monachini, M. and Del Gratta, R. and Calamai, S. and Abel, A. and Enea, A. and Biliotti, F. and Quochi, V. and Stella, F. V.}, TITLE = {CLARIN-IT: State of Affairs, Challenges and Opportunities}, YEAR = {2018}, ABSTRACT = {his paper gives an overview on the Italian national CLARIN consortium as it currently stands two years after its creation at the end of 2015. It thus discusses the current state of affairs of the consortium on several aspects, especially with regards to members. It also discusses the events and initiatives that have been undertaken, as well as the ones that are planned in the close future. It finally outlines the conclusions of a user survey performed to understand the expectations of a targeted user population and provides indications regarding the next steps planned.}, KEYWORDS = {CLARIN-IT Consortium Pisa Bolzano Siena}, PAGES = {1-14}, URL = {http://www.ep.liu.se/ecp/contents.asp?issue=147}, VOLUME = {147}, ISBN = {978-91-7685-273-6}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {18-20 September, 2017}, BOOKTITLE = {Selected papers from the CLARIN Annual Conference 2017, Budapest, 18-20 September 2017}, } @INPROCEEDINGS{SORIA_2018_INPROCEEDINGS_SQR_387362, AUTHOR = {Soria, C. and Quochi, V. and Russo, I.}, TITLE = {The DLDP Survey on Digital Use and Usability of EU Regional and Minority Languages}, YEAR = {2018}, ABSTRACT = {This paper reports about the design, the results and the key findings of a survey launched by the Digital Language Diversity Project about the digital use and usability of regional and minority languages. The aim of the survey - the first of this kind - was to investigate the real needs and expectations of European minority language speakers regarding digital opportunities. The focus on four languages (Basque, Breton, Karelian and Sardinian) at different stages of digital development offers a starting point to develop strategies for assessing digital vitality of these languages and overcoming specific difficulties.}, KEYWORDS = {minority languages, digital survival, electronic communication}, PAGES = {4155-4160}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/684.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{BELLANDI_2018_INPROCEEDINGS_BGP_385403, AUTHOR = {Bellandi, A. and Giovannetti, E. and Piccini, S.}, TITLE = {Collaborative Editing of Lexical and Termino-ontological Resources: a Quick Introduction to LexO}, YEAR = {2018}, ABSTRACT = {We here present LexO, a web collaborative editor of lexical and termino-ontological resources. As the underlying lexical model we adopted lemon, which appeared to be perfect for our purposes, in particular regarding the separation between the conceptual and linguistic dimensions .}, KEYWORDS = {lemon model, lexo, collaborative editor, termino-ontological resource}, PAGES = {23-27}, URL = {http://euralex2018.cjvt.si/wp-content/uploads/sites/6/2018/12/Euralex2018_book_of_abstracts_FINAL.pdf}, CONFERENCE_NAME = {XVIII EURALEX International Congress}, CONFERENCE_PLACE = {Ljubljana, Slovenia}, CONFERENCE_DATE = {17-21/07/2018}, BOOKTITLE = {The XVIII EURALEX International Congress: Lexicography in Global Contexts-Book of Abstracts}, EDITOR = {Čibej, J. and Gorjanc, V. and Kosem, I. and Krek, S.}, } @INPROCEEDINGS{CAPPA_2018_INPROCEEDINGS_CFGMNCP_396593, AUTHOR = {Cappa, C. and Ferro, M. and Giulivi, S. and Marzi, C. and Nahli, O. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: piattaforma ICT per valutare l'efficienza di lettura}, YEAR = {2018}, ABSTRACT = {ReadLet è una piattaforma ICT pensata per valutare accuratamente l'efficienza di lettura nei bambini della scuola primaria. Combina tecnologia ICT portatile e cloud-computing con una serie di moduli software, specifici per modalità di somministrazione. Questi, implementati come servizi web, includono: i) valutazione dell'elaborazione del testo e della leggibilità; ii) valutazione della velocità di lettura (ad alta voce e silente) e delle sue fluttuazioni); iii) valutazione della correttezza della decodifica ad alta voce; iv) valutazione della comprensione del testo (in lettura silente e da ascolto). Un prototipo della tecnologia ReadLet è stato sperimentato su circa 200 alunni (8-11 anni), che variano per stato socio-economico, lingua (italiana, francese, araba) e area geografica (Italia, Svizzera, Marocco). L'utilizzo del tablet per la lettura è stato percepito dai bambini come un'esperienza coinvolgente e piacevole. Gli insegnanti hanno trovato lo strumento facile da utilizzare e in grado di fornire maggiori informazioni rispetto agli strumenti tradizionali.}, KEYWORDS = {leggere per capire, disturbi del linguaggio, screening}, URL = {https://www.airipa.it/congresso/pluginfile.php/2781/mod_resource/content/1/Programma%20Congresso%20AIRIPA_Arezzo_dettagliato-3.pdf}, CONFERENCE_NAME = {XXVII Congresso Nazionale AIRIPA}, CONFERENCE_PLACE = {Arezzo (Italy)}, CONFERENCE_DATE = {28-29/09/2018}, } @INPROCEEDINGS{DELGROSSO_2018_INPROCEEDINGS_D_390305, AUTHOR = {Del Grosso, A. M.}, TITLE = {Verso la definizione e l'implementazione di un processo per la gestione dell'informazione in ambito bibliografico e archivistico}, YEAR = {2018}, ABSTRACT = {L'intervento ripercorre alcune iniziative svolte negli anni passati in collaborazione con il liceo classico Medi-Livatino. Si introduce un processo di digitalizzazione e di analisi di documenti testuali volto alla gestione e allo studio dell'informazione testuale in ambito filologico. In particolare si sottolineano gli aspetti che accomunano il lavoro ingegneristico-tecnologico con quelli maggiormente bibliografici e archivistici relativi alla conservazione e alla fruizione di risorse testuali. Si evidenziano gli sviluppi di attività dedicate alla cultura digitale e all'applicazione di strumenti computazionali per l'analisi e lo studio di tesi storici nell'ambito della didattica. In perfetta sintonia quindi con le linee guida del Piano Nazionale Scuola Digitale.}, KEYWORDS = {PNSD, liceo classico, biblioteche innovative, archivi digitali}, URL = {https://publications.cnr.it/doc/390305}, CONFERENCE_NAME = {Cultura Digitale: a scuola di innovazione}, CONFERENCE_PLACE = {San Marco dei Cavoti (Benevento)}, CONFERENCE_DATE = {17-18/5/2018}, } @INPROCEEDINGS{DELGROSSO_2018_INPROCEEDINGS_DCCCDR_390989, AUTHOR = {Del Grosso, A. M. and Cacioli, G. and Cavallero, C. and Cioffi, R. and Di Pietro, C. and Rosselli Del Turco, R.}, TITLE = {Encoding and publishing the Life of San Teobaldo using EVT: challenges and rewards}, YEAR = {2018}, ABSTRACT = {This contribution aims at illustrating both the scholarly work and the development outcomes that have been achieved while working towards a digital edition of the Life of San Teobaldo (an hagiography of the patron saint of the city of Alba, Italy). The text, physically embodied in an ancient palimpsest scroll, has been encoded using the TEI-XML standard and published by means of the Edition Visualization Technology tool. EVT has been appropriately customized and extended with new features concerning image visualization, diplomatic edition display and textual search. A working progress demo is available at < http://licodemo.ilc.cnr.it/evt-rotulo >.}, KEYWORDS = {digital philology, evt, computational philology, Rotulo vita San Teobaldo}, URL = {https://drive.google.com/file/d/19SQqvy4vwG_-irpelu7ro3Q1QdZjcsZJ/view?usp=sharing}, CONFERENCE_NAME = {EADH 2018: "Data in Digital Humanities"}, CONFERENCE_DATE = {7-9/12/2018}, } @INPROCEEDINGS{DELGROSSO_2018_INPROCEEDINGS_DCDGMSS_384781, AUTHOR = {Del Grosso, A. M. and Cristofaro, S. and De Luca, M. R. and Giovannetti, E. and Marchi, S. and Seminara, G. and Spampinato, D.}, TITLE = {Le lettere di Bellini: dalla Carta al Web}, YEAR = {2018}, ABSTRACT = {Nel contesto del progetto "Museo virtuale della Musica BellinInRete" sarà reso fruibile, attraverso un processo di acquisizione, codifica e pubblicazione digitale, un corpus di lettere di Vincenzo Bellini, compositore catanese del XIX secolo. L'edizione digitale delle lettere belliniane sarà consultabile in rete e, inoltre, sarà integrata in un percorso museale interattivo in allestimento presso il Museo Civico Belliniano di Catania.}, KEYWORDS = {Digital Edition, Digital Scholarly Platform}, PAGES = {60-64}, URL = {http://www.aiucd2018.uniba.it/content/AIUCD2018-BoA.pdf}, DOI = {10.6092/unibo/amsacta/5997}, ISBN = {9788894253528}, CONFERENCE_NAME = {AIUCD 2018 Conference}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {31/01/2018-02/02/2018}, BOOKTITLE = {AIUCD 2018-Book of abstracts}, EDITOR = {Spampinato, D.}, } @INPROCEEDINGS{FERRO_2018_INPROCEEDINGS_FCGMCP_396591, AUTHOR = {Ferro, M. and Cappa, C. and Giulivi, S. and Marzi, C. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: an ICT platform for the assessment of reading efficiency in early graders}, YEAR = {2018}, ABSTRACT = {Reading is not just word decoding, but the joint product of decoding and deep linguistic comprehension [ 1 , 2 ]. Effective linguistic comprehension relies on language skills such as semantic and syntactic awareness. Both decoding and linguistic comprehension are necessary for reading comprehension, and neither is by itself sufficient [ 2 ]. However, current protocols for reading assessment measure decoding (reading accuracy and speed) and reading comprehension separately [ 3 , 4 , 5 ]. This does not allow evaluation of reading efficiency [ 6 ], defined as the ability to fully understand connected texts by minimising reading time, a cognitive ability that lies at the roots of students' academic achievement [ 8 , 7 ]. ReadLet is an ICT platform specifically designed to provide accurate, evidence-based assessment of reading efficiency in early grade children, by offering an ecological, non-invasive protocol for extensive data elicitation, storage and analysis. With ReadLet, early graders at school can read a one or two page text displayed on a tablet touchscreen, either silently or aloud. Children are asked to slide their finger across the words as they read, to guide directional tracking. After reading, the child is prompted with a few multiple-answer questions on text content presented one at a time, while the text remains displayed on the screen for the child to be able to retrieve relevant information. In the process, the tablet keeps track of time-aligned multimodal data: voice recording, finger sliding time, time of reading, time of question answering, and number of correct answers. Data are recorded, stored locally, sent to the ReadLet server through an internet connection, and processed remotely by a battery of cloud-based services, analysing data automatically to produce a detailed quantitative signature of each reading session. A server-based database aggregates anonymised data to make them available for specialists. Also individual's longitudinal profiles are stored, for them be queried and inspected upon authorised access. The platform combines portable ICT technology and cloud computing with a number of modality-specific software modules, implemented as web services including: i) a text processing and readability assessment service, consisting in a battery of tools for automated linguistic annotation of written texts and a machine-learning component assigning a readability score to annotated texts [ 9 ]; ii) a finger touch processing service aligning the child's finger sliding with the written text and measuring speed fluctuations; iii) a speech processing and decoding assessment service, aligning the acoustic record of child's reading with the written text and assessing correctness of recoding [ 10 ]. At the time of writing, the platform includes the first two modules only. Preliminary testing of a prototype version of ReadLet technology with a population of about 200 pupils aged 8 to 11, both male and female, varying for socio-economic status, language (Italian, French and Arabic) and geographical area (Italy and Morocco), showed that children are extremely responsive to using a tablet for reading, and very easy to engage in what they perceive as an enjoyable experience. We expect online databases of automatically classified cross-sectional and longitudinal data, accurate statistical modelling and developmental trends of reading literacy to help education professionals and clinical specialists assess the level of reading skills reached by the child, and decide which intervention programmes and measures are most appropriate. While information technology cannot and should not supplant the role and professional judgement of teachers and therapists, the project intends to provide portable tools, models and data for timely screening and daily management of reading difficulties and disorders.}, KEYWORDS = {reading efficiency, decoding, comprehension, language specific disorders}, PAGES = {61-61}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, CONFERENCE_PLACE = {Edmonton, Alberta (Canada)}, CONFERENCE_DATE = {25-28/09/2018}, } @INPROCEEDINGS{GIOVANNETTI_2018_INPROCEEDINGS_GABDDPP_385407, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Dattilo, D. and Dollinar, M. and Pecchioli, A. and Piperno, C.}, TITLE = {Il Progetto Traduzione del Talmud Babilonese: il Ruolo della Tecnologia e della Linguistica Computazionale}, YEAR = {2018}, ABSTRACT = {L'obiettivo principale del Progetto Traduzione del Talmud Babilonese è produrre la traduzione del Talmud in italiano. La traduzione, affidata ad un team di circa 80 studiosi, è condotta con l'aiuto di Traduco, un software preposto ad agevolare tutte le fasi di lavoro previste dal progetto, dall'attribuzione degli utenti alle sezioni da tradurre, fino al supporto all'impaginazione finale. La presenza di una piattaforma collaborativa digitale che già, di per sé, costituisce una innovazione nell'ambito dei grandi progetti di traduzione, è arricchita da algoritmi per il trattamento automatico del testo e della lingua, in costante evoluzione, attraverso i quali il traduttore, il revisore o lo studioso possono contare su funzionalità sempre più avanzate.}, KEYWORDS = {Linguistica Computazionale, Traduzione di Testi Religiosi, Traduzione Assistita dal Calcolatore, Traduzione Collaborativa}, PAGES = {144-146}, URL = {http://amsacta.unibo.it/5997/}, DOI = {10.6092/unibo/amsacta/5997}, ISBN = {9788894253528}, CONFERENCE_NAME = {AIUCD 2018 Conference}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {31/01/2018-02/02/2018}, BOOKTITLE = {AIUCD 2018-Book of abstracts}, EDITOR = {Spampinato, D.}, } @INPROCEEDINGS{GOGGI_2018_INPROCEEDINGS_GPBMBC_395584, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic query analysis from the global science gateway}, YEAR = {2018}, ABSTRACT = {We focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal: the aim is to retrieve information related to social media which as of today represent a considerable source of data more and more widely used for research ends. This project includes eight months of query logs3 registered between July 2017 and February 2018 for a total of 445,827 queries. The analysis mainly concentrates on the semantics of the queries received from the portal clients: it is a process of information retrieval from a rich digital catalogue whose language is dynamic, is evolving and follows - as well as reflects - the cultural changes of our modern society.}, KEYWORDS = {Global Science Gateway, Semantic Query Analysis, Terminology}, PAGES = {93-95}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2018/12/GL20_ProgramBook.pdf}, VOLUME = {20}, ISBN = {978-90-77484-34-0}, CONFERENCE_NAME = {Twentieth International Conference on Grey Literature "Research Data Fuels and Sustains Grey Literature"}, CONFERENCE_PLACE = {New Orleans, USA (Loyola University)}, CONFERENCE_DATE = {December 3-4, 2018}, BOOKTITLE = {Research Data Fuels and Sustains Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{LEONI_2018_INPROCEEDINGS_LMCFG_396592, AUTHOR = {Leoni, F. and Muzio, C. and Cappa, C. and Ferro, M. and Giulivi, S.}, TITLE = {Il progetto AEREST: primi risultati in Italia e in Canton Ticino}, YEAR = {2018}, ABSTRACT = {Il progetto AEREST, per una valutazione ecologica dell'efficienza di lettura, è attualmente in corso presso alcune classi di scuola primaria di istituti italiani e ticinesi. Si presentano qui i risultati ottenuti a seguito della prima sessione di raccolta dati, che si è svolta nell'A.A. 2017-18 su circa 160 bambini italofoni di età compresa tra 8 e 11 anni. Lo scopo di questa prima fase sperimentale è stato duplice: 1. ottenere indicazioni sull'efficacia, ai fini della valutazione dell'efficienza di lettura, dei testi utilizzati nelle prove di cui si compone il test AEREST; 2. ottenere indicazioni sulla fattibilità dell'implementazione dello screening su tablet, in termini di facilità di somministrazione e di gradimento da parte dei soggetti; 3. esplorare e confrontare le performance di lettura nel campione italiano e ticinese, al fine di individuare strategie didattiche volte a potenziare le eventuali abilità carenti.}, KEYWORDS = {efficienza di lettura, screening}, URL = {https://www.airipa.it/congresso/pluginfile.php/2781/mod_resource/content/1/Programma%20Congresso%20AIRIPA_Arezzo_dettagliato-3.pdf}, CONFERENCE_NAME = {XXVII Congresso Nazionale AIRIPA}, CONFERENCE_PLACE = {Arezzo (Italy)}, CONFERENCE_DATE = {28-29/09/2018}, } @INPROCEEDINGS{MARZI_2018_INPROCEEDINGS_MFP_396356, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Is inflectional irregularity dysfunctional to human processing?}, YEAR = {2018}, ABSTRACT = {Regularly inflected verb forms are classically associated with the formal transparency and predictability of their internal constituents [ 1 , 2 , 3 ]. Transparency ensures that full forms can be segmented uniquely into their internal constituents: as in walk-s/walk-ed. Predictability allows for a speaker to fill in an empty paradigm cell, using information from other known forms of the same lexical paradigm and its inflection macro-class. From this perspective, irregulars appear to be dysfunctional to the human processing system, as they make it hard to infer - say - bought from buy , or segment bought appropriately into its constituent parts. Likewise, an influential psycholinguistic tradition relegates irregulars to the lexical store, whereas regulars are segmented by rules into their simpler constituents [ 4 , 5 ]. Here, we offer a few reasons for questioning this view. First, transparency and predictability are not dichotomous notions. Secondly, their influence on processing is not unidirectional. Unpredictable stems in irregularly inflected forms of complex inflectional systems provide a lot of processing information, by dynamically constraining the number of possible alternative endings during serial processing. Thirdly, acquisition of word inflection does not consist in associating co-occurring cues and outcomes, but in discriminating between multiple cues that are constantly in competition for their predictive value for a given outcome. We present the results of a few computer simulations with Self-organising Recurrent Neural Networks (TSOMs, [ 8 , 9 ]) that learn how to inflect high-frequency verb paradigms in 6 languages: English, German, Italian, Modern Greek, Modern Standard Arabic and Spanish. After training, each TSOM was tested on a word recognition (serial recoding) and a word production (serial recall) task, and results were analysed with generalised regression models. Processing uncertainty is differently apportioned on regulars and irregulars, depending on the nature of the processing task. While irregulars are harder to produce when they are unknown because they typically have fewer neighbours than regulars have, they are readily accessed once they are acquired, for exactly the same reason. Our data are in line with psycholinguistic evidence [ 10 , 11 ] that lexical processing is paced by two types of uniqueness point: Marslen-Wilson's Uniqueness Point (UP), distinguishing unrelated onset-overlapping words [ 12 ], and the Complex Uniqueness Point (CUP), distinguishing paradigmatically-related words [ 11 ]. Late UPs are inhibitory and elicit prolonged reaction times in acoustic word recognition, explaining an early delay in word recognition of irregular stems. Similarly, late CUPs are inhibitory, and this accounts for a slowdown in the processing advantage of regulars, compared to irregulars, after UP. These structural factors interact in a variety of ways and concurrently affect human processing, to show that irregularly-inflected forms may in fact reflect communicative and processing constraints of the word processor. They provide strong evidence against a processing architecture that assumes compartmentalized, independent processing routes for some specific combinations of these factors (e.g. a rule-based route for a combination of transparency and predictability, and a memory-based route for all other combinations). In addition, they seem incompatible with Bayesian approaches to auditory word comprehension ignoring a word's internal structure [ 13 ]. We suggest that a different design of the human language processor, based on a computational architecture integrating memory and processing as two different dynamics of the same underlying mechanism, can shed light on the complexity of inflection, and vindicate the role of irregular inflection in the system.}, KEYWORDS = {inflectional processing, temporal self organizing maps, letter prediction, morpheme boundary}, PAGES = {60-60}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, CONFERENCE_PLACE = {Edmonton, Alberta (Canada)}, CONFERENCE_DATE = {25-28/09/2018}, } @INPROCEEDINGS{PICCINI_2018_INPROCEEDINGS_PBG_385401, AUTHOR = {Piccini, S. and Bellandi, A. and Giovannetti, E.}, TITLE = {A Semantic Web Approach to Modelling and Building a Bilingual Chinese-Italian Termino-ontological Resource}, YEAR = {2018}, ABSTRACT = {This paper introduces a bilingual Chinese-Italian onto-terminological resource, devoted to modelling the Chinese terminology of Matteo Ricci's World Map (1602), together with the Italian translation by Pasquale D'Elia (1835) [3]. The Map was created in collaboration with the Chinese mathematician and astronomer Li Zizhao, and is entitled ? ? ? ? ? ? Kunyu Wanguo Quantu (literally "Map of the Ten Thousand Countries of the Earth"). Its publication in China was significant as it was the first map to show the Americas, and to represent the world as a sphere. Its large number of cartouches provide information about the geography, history and customs of the world at that time as well as cosmological and cosmographic data. The map had a revolutionary impact from a linguistic standpoint as well: a large number of neologisms were introduced by Ricci, many of which have survived until today.}, KEYWORDS = {termino-ontological resource, classical chinese, lemon model}, PAGES = {87-90}, URL = {http://euralex2018.cjvt.si/wp-content/uploads/sites/6/2018/12/Euralex2018_book_of_abstracts_FINAL.pdf}, CONFERENCE_NAME = {XVIII EURALEX International Congress}, CONFERENCE_PLACE = {Ljubljana, Slovenia}, CONFERENCE_DATE = {17-21/07/2018}, BOOKTITLE = {The XVIII EURALEX International Congress: Lexicography in Global Contexts-Book of Abstracts}, EDITOR = {Čibej, J. and Gorjanc, V. and Kosem, I. and Krek, S.}, } @INPROCEEDINGS{PIRRELLI_2018_INPROCEEDINGS_P_399032, AUTHOR = {Pirrelli, V.}, TITLE = {NLP-based assessment of reading efficiency in early grade children}, YEAR = {2018}, ABSTRACT = {Assessing reading skills is a laborious and time-consuming task, which requires monitoring a variety of interlocked abilities, ranging from accurate word rendering, reading fluency and lexical access, to linguistic comprehension, and interpretation, management and inference of complex events in working memory. No existing software, to our knowledge, is able to cover and integrate reading performance monitoring, instant feedback, personalised potentiation and intelligent decision support to teachers and speech therapists, assessment of response to intervention. NLP and ICT technologies can make such an ambitious platform an achievable target.}, KEYWORDS = {NLP-based methods, reading efficiency, early graders}, PAGES = {5-6}, URL = {http://dcl.bas.bg/clib/wp-content/uploads/2018/07/CLIB_2018_Proceedings_v2_final.pdf}, CONFERENCE_NAME = {Computational Linguistics in Bulgaria}, CONFERENCE_PLACE = {Sofia, Bulgaria}, CONFERENCE_DATE = {27-29/05/2018}, } @INPROCEEDINGS{PIRRELLI_2018_INPROCEEDINGS_PFMGSM_396353, AUTHOR = {Pirrelli, V. and Ferro, M. and Marzi, C. and Gagné, C. and Spalding, T. and Marelli, M.}, TITLE = {Processing compounds: what frequency (alone) cannot explain}, YEAR = {2018}, ABSTRACT = {Observed elevation in typing latency for the initial letter of the second constituent of an English compound, compared with the typing time of the final letter of the first constituent (Gagné \& Spalding 2016), suggests that both compounds ( snowball ) and pseudo-compounds ( carpet ) are decomposed but also that full form representations are available in the lexical store. To gain further insight into the lexical representations underlying typing, we used computational modelling. In particular, we used superpositional models of word memory, based on Self-Organising Recurrent Maps (TSOMs) (Ferro et al. 2016; Marzi et al. 2016), where both simple and compound words are processed (and stored) using the same pool of processing (and memory) resources, to model the elevation in typing time at the constituent boundary and the rate of typing. In addition, we also considered models based in the Compositional Distributional Semantics framework (CAOSS, Marelli et al. 2017), to simulate independent effects of semantic transparency on compound typing (Gagné \& Spalding 2016). Due to co-activation and competition between compounds and their constituent words in TSOMs, levels of activation of processing nodes per letter positions appear to reflect degrees of context-sensitive predictability: the higher the level, the more expected the letter in that position. In English compounds, activation levels appeared to exhibit a characteristically U-shaped pattern, with min values centred on the constituent boundary. A similar pattern was found for pseudo-compounds, which nonetheless present a less pronounced U-shaped pattern and a higher activation value at the morpheme boundary than compounds do. The difference is in line with the higher speed-up rate in typing pseudo-compounds than compounds reported in Gagné and Spalding (2016). TSOMs were trained on letter-based representations, so computer experiments could simulate peripheral effects of serial processing of compound structure before lexical access. To investigate post-lexical issues, we also tested computational models of generation of the meanings of novel compounds based on CAOSS, which proved to be able to account for well-established relational effects in compound processing (Gagné 2001; Gagné \& Shoben 1997) with an unsupervised data-driven framework (Marelli et al. 2017). We ran a mixed-effects regression analysis of the data in Gagné and Spalding (2016) using vector-semantics estimates and TSOM activation levels to predict typing time for the initial letter of the second constituent. There was a negative effect of TSOM letter activation levels: i.e. the more active a letter node is, the faster a subject is at typing the letter ( t =-2.7 p =.007). Also, there was a positive effect of CAOSS-based compositionality estimates: i.e. the more easily a compound's lexicalized meaning can be obtained through compositional operations on single constituent vectors, the slower participants were at typing the first letter of the second constituent ( t =2.4, p =.017). These results have interesting implications for an integrative computational architecture accounting for the whole range of experimental evidence reported by Gagné and Spalding (2016). In particular we will focus on evidence of a stronger competition (and longer typing time) in Transparent-Transparent and Transparent-Opaque compounds, vs. Opaque-Transparent compounds, which gives an indication of a non-trivial interaction between semantic compositionality and serial processing effects.}, KEYWORDS = {compound processing, Temporal Self-organizing Map, letter production latency, constituent boundary}, PAGES = {60-60}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, CONFERENCE_PLACE = {Edmonton (Canada)}, CONFERENCE_DATE = {25-28/09/2018}, } @INPROCEEDINGS{STEFANINI_2018_INPROCEEDINGS_SNM_385585, AUTHOR = {Stefanini, A. and Nicolosi, A. and Monachini, M.}, TITLE = {An experiment on the development of a digital edition for ancient Greek fragmentary poetry: A case study on Archilochus of Paros}, YEAR = {2018}, ABSTRACT = {This paper overviews ongoing experiments on a digital edition of Archilochus which is based on the readings, translations and comments by Nicolosi [1] and also integrates feedback and requirements from the Digital Classics community. The experiment encompasses a few fragments of the poet of Paros, so as to provide a mock-up of the prototype for evaluation by its intended end-users, in view of developing a fully fledged digital edition. The mock-up provides the philologist with a set of resources and tools that ease a critical appraisal of the text.}, KEYWORDS = {Digital methods in the humanities, Interfaces and user-friendly data presentation}, PAGES = {86-89}, URL = {http://amsacta.unibo.it/5997/1/AIUCD-2018-BoA-rev.pdf}, DOI = {10.6092/unibo/amsacta/5997}, ISBN = {9788894253528}, CONFERENCE_NAME = {Settimo Convegno Annuale AIUCD 2018}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {31/01/2018-2/02/2018}, BOOKTITLE = {Settimo Convegno Annuale AIUCD 2018. Patrimoni culturali nell'era digitale. Memorie, culture umanistiche e tecnologia. Book of Abstracts}, EDITOR = {Daria, S.}, } @TECHREPORT{BARONI_2018_TECHREPORT_BQRSCGHKSS_483257, AUTHOR = {Baroni, P. and Quochi, V. and Russo, I. and Soria, C. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Salonen, T. and Sarhimaa, A.}, TITLE = {Kit per la sopravvivenza digitale della lingua sarda-Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua sarda}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua sarda (versione italiana)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Sardinian}, PAGES = {12}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Sardinian_IT.pdf}, } @TECHREPORT{CARLINO_2018_TECHREPORT_C_483693, AUTHOR = {Carlino, M.}, TITLE = {Rapporto annuale 2017 del CNR-ILC}, YEAR = {2018}, ABSTRACT = {Rapporto Annuale 2017 del Cnr-Istituto di Linguistica Computazionale "Antonio Zampolli" (CNR-ILC)}, KEYWORDS = {CNR-ILC, ILC, Annual Report, Rapporto Annuale, Istituto di Linguistica Computazionale, Zampolli, Activity report}, PAGES = {1-64}, URL = {https://publications.cnr.it/doc/483693}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443050, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Euskarak Mundu Digitalean Bizirauteko Kita-DLDPren gomendioak, euskararen bizitasun digitala hobetu dadin}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale della lingua basca (versione basca)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Basque}, PAGES = {27}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Basque_EU.pdf}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443051, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Kit de Supervivencia Lingüística Digital del Euskera-Recomendaciones del DLDP para mejorar la Vitalidad Digital del euskera}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del basco (versione spagnola)}, KEYWORDS = {digital diversity, digital vitality, recommendations, Basque, digital survival}, PAGES = {28}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Basque_ES.pdf}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443020, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Digital Language Survival Kit}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale delle lingue (versione inglese integrale)}, KEYWORDS = {sopravvivenza digitale, lingue minoritarie, less-resourced languages}, PAGES = {38}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit.pdf}, } @TECHREPORT{CININI_2018_TECHREPORT_C_390082, AUTHOR = {Cinini, A.}, TITLE = {LigurArch900: Itinerari di architettura contemporanea in Liguria}, YEAR = {2018}, ABSTRACT = {Realizzazione di un prototipo di applicazione per sistema operativo Android per la divulgazione dei risultati di un Progetto di ricerca "Censimento e schedatura di complessi di architettura moderna e contemporanea in Liguria". L'attività finalizzata allo "Studio e realizzazione di moduli software per accedere, gestire ed estrarre informazioni sulle architetture del Novecento in Liguria", è stata svolta nell'ambito della collaborazione con il Dipartimento dell'Università di Architettura di Genova (DSA-UNIGE). L'applicazione affianca la consultazione delle architetture censite con visualizzazione su mappa, a quella dei contenuti descrittivi per le architetture di maggior rilievo.}, KEYWORDS = {Android, Mobile, Term extraction}, PAGES = {1-9}, URL = {http://dbtvm1.ilc.cnr.it/Download/app-release_20170318.zip}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_443047, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Roadmap}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione inglese integrale)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {19}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Roadmap.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483247, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Roadmap-Policy Recommendations & Timeline}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione inglese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_EN.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483251, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {DLDP etenemissuunnitelma-Toimenpidesuunnitelmat ja aikajana}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione finlandese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_FI.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483254, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {La DLDP Hoja de Ruta-Políticas recomendadas & Cronograma}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione spagnola sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_ES.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483255, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Diversità Linguistica Digitale: la Roadmap-Raccomandazioni strategiche & Sequenza}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione italiana sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_IT.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483256, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {DLDP Bide Orria-Gomendatutako politikak & Kronograma}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione basca sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_EU.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483262, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Die DLDP Roadmap-Strategieempfehlungen & Zeitplan}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione tedesca sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_DE.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483263, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {La Roadmap DLDP-Recommandations de politique et calendrier}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione francese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_FR.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HSBCGKQRSS_443354, AUTHOR = {Hicks, D. and Soria, C. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A.}, TITLE = {Pak treuzveviñ ar Brezhoneg niverel-Erbedoù an DLDP evit gwellaat buhezegezh niverel ar brezhoneg}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del bretone (versione bretone)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Breton_BR.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HSBCGKQRSS_443359, AUTHOR = {Hicks, D. and Soria, C. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A.}, TITLE = {Kit de survie numerique pour la langue bretonne-Les recommandations du DLDP pour améliorer la vitalité numérique du Breton}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del bretone (versione francese)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Breton_FR.pdf}, } @TECHREPORT{SALONEN_2018_TECHREPORT_SBCGHKQRSS_443365, AUTHOR = {Salonen, T. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Sarhimaa, A. and Soria, C.}, TITLE = {Karjalan digitaalinen kielenselviytymispakkaus-DLDP-suositukset karjalan kielen digitaalisen elinvoimaisuuden parantamiseksi}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale della lingua careliana (versione finlandese)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Karelian}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Karelian_FI.pdf}, } @TECHREPORT{SALONEN_2018_TECHREPORT_SBCGHKQRSS_483261, AUTHOR = {Salonen, T. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Sarhimaa, A. and Soria, C.}, TITLE = {Karjalan digitualine hengihjiämispakkavus-DLDP-rekomendatsiet karjalan kielen digitualizen elinvoimazuon kohendamizeh}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua careliana (versione careliana)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Karelian}, PAGES = {12}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Karelian_KRL.pdf}, } @THESIS{NAHLI_2018_THESIS_N_390506, AUTHOR = {Nahli, O.}, TITLE = {Vers une ontologie de la culture arabo-musulmane}, YEAR = {2018}, ABSTRACT = {Le projet vise à décrire les méthodologies permettant de développer un réseau de connaissance pour la culture arabo-islamique sur la base d'un processus d'extractions automatiques de données à partir du lexique arabe al=qamus al=muHiT (qamus). Le choix de qamus est justifié par le fait qu'il a un statut d'autorité dans le monde arabe, au point que la parole qamus [océan] a supplanté la parole mungid 'dictionnaire'. Le projet prévoit divers étapes de travail et, avant tout, l'acquisition d'une version numérique de qamus. La mise au point d'algorithmes pour la codification partielle et automatique de la macrostructure lexicale et la conversion du lexique en format XML. D'autres algorithmes permettent l'identification de la microstructure lexicale et, l'annotation de chaque partie constituante de l'entrée lexicale, entre autres, le lemme, sa nature morphologique, ses définitions, etc. En utilisant deux dictionnaires bilingues arabe-anglais, un système de recherche permet de trouver, de manière automatique et quand c'est possible, la traduction de chaque lemme, ce qui permet de le lier au synset correspondant dans PWN et au concept de SUMO à qui il pourrait faire référence. Une autre étape serait l'analyse de divers échantillons de lemmes pour détecter la validité des résultats.}, KEYWORDS = {al qamus al muHiyT, ontologie, langue arabe, Wordnet, PWN, SUMO (The Suggested Upper Merged Ontology)}, PAGES = {317}, URL = {https://publications.cnr.it/doc/390506}, } @MISC{BOSCHETTI_2018_MISC_BD_390656, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {Euporia: Piattaforma digitale per l'annotazione tramite Domain Specific Languages di testi multilingui disposti in parallelo}, YEAR = {2018}, ABSTRACT = {Piattaforma digitale per l'annotazione tramite Domain Specific Languages di testi multilingui disposti in parallelo}, KEYWORDS = {digital humanities, computational philology, digital philology}, URL = {https://github.com/CoPhi/euporia}, } @MISC{CEBERIO_2018_MISC_CGSRQ_440548, AUTHOR = {Ceberio, K. and Gurrutxaga, A. and Soria, C. and Russo, I. and Quochi, V.}, TITLE = {How to Use the Digital Language Vitality Scale}, YEAR = {2018}, ABSTRACT = {The Digital Language Vitality Scale is an instrument developed within the framework of the Digital Language Diversity Project (www.dldp.eu) for estimating the degree of digital vitality of any given language. It aims to be an instrument for self-assessment of the digital vitality of any language, although it is aimed in particular at identifying current gaps, needs and requirements regarding the extent to which a language community is active/vital on digital media and devices so that adequate digital language planning can be done. This document instructs prospective adopters on how to best use it.}, KEYWORDS = {Diversità Linguistica, BLARK, Sopravvivenza linguistica digitale}, PAGES = {18}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Vitality-Scale.pdf}, } @MISC{CRISTOFARO_2018_MISC_CDS_445805, AUTHOR = {Cristofaro, S. and Del Grosso, A. M. and Spampinato, D.}, TITLE = {Chiosco Voci di Pietra}, YEAR = {2018}, ABSTRACT = {Il chiosco multimediale Voci di Pietra è stato installato come postazione in locale all'interno del percorso museale in occasione della omonima mostra. Il totem touch screen permette la navigazione tra le informazioni raccolte nelle schede, nei video e nelle immagini digitalizzate, sia in italiano che in inglese, per favorire l'approfondimento della visita.}, KEYWORDS = {Museum, Epigraphy, EpiDoc, TEI}, URL = {https://publications.cnr.it/doc/445805}, } @MISC{DELGROSSO_2018_MISC_DM_484667, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Edizione digitale del Rotulo di San Teobaldo}, YEAR = {2018}, ABSTRACT = {Applicazione web per la consultazione dell'edizione digitale del Rotulo di San Teobaldo proprietà della Diocesi di Alba.}, KEYWORDS = {digital philology, web application, evt, enhanced visualization}, URL = {https://www.visitmudi.it/rotulo-di-san-teobaldo/}, } @MISC{DELGROSSO_2018_MISC_DM_484669, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Bellininrete Web Application}, YEAR = {2018}, ABSTRACT = {Applicazione web sviluppata in seno al progetto Bellininrete per lo studio e la consultazione della corrispondenza del maestro catanese Vincenzo Bellini.}, KEYWORDS = {Digital Edition, Digital Scholarly Platform, web application}, URL = {http://bellinicorrespondence.cnr.it/evt}, } @MISC{DELGROSSO_2018_MISC_DMA_390394, AUTHOR = {Del Grosso, A. M. and Marchi, S. and Albanesi, D.}, TITLE = {Omega Project: Omega: Piattaforma Multi-modulare per lo studio scientifico del testo}, YEAR = {2018}, ABSTRACT = {Piattaforma per lo studio del testo con prospettiva scientifico-filologica.}, KEYWORDS = {digital humanities, computational philology, software engineering}, URL = {https://github.com/literarycomputinglab/OmegaProject}, } @ARTICLE{BARTOLINI_2017_ARTICLE_BPGGB_369103, AUTHOR = {Bartolini, R. and Pardelli, G. and Goggi, S. and Giannini, S. and Biagioni, S.}, TITLE = {A terminological "journey" in the Grey Literature domain}, YEAR = {2017}, ABSTRACT = {"It is by means of terms that the expert usually transfer their knowledge and again through terms scientific communication reaches the highest effectiveness. Therefore we can assert that terminology - in the sense of a set of representative and domain-specific units - is necessary for representing and connecting specialized fields as well as any attempt to represent and/or transfer scientific knowledge requires, more or less extensively, the use of terminology." (Cabré, 2000). "When we read the articles or papers of a particular domain, we can recognize some lexical items in the texts as technical terms. In a domain where new knowledge is generated, new terms are constantly created to fulfill the needs of the domain, while others become obsolete. In addition, existing terms may undergo changes of meaning..." (Kageura K., 1998/1999). Specialized lexicons are made up of the terms which are specific to each field of knowledge, «a subset which is distinct but not separated from the common language» (Cassese, 1992): it is usually difficult to extract the relevant domain-specific terminology, meaning to discern terms which belong to a specialized glossary from those belonging to the common dictionary. The interest in the study of terminology and the "truth" contained in the above definitions has led us to make a "journey" in the Grey Literature (GL) domain in order to offer an overall vision on the terms used and the links between them. Within this scenario, the work analyzes a corpus constituted of the entire amount of full research papers published in the GL conference series over a time-span of more than one decade (2003-2014) with the aim of creating a terminological map of relevant words in the various GL research topics. "... corpora used to extract terminological units can be further investigated to find semantic and conceptual information on terms or to represent conceptual relationships between terms. (Bourigault D. et al., 2001). Another interesting inquiry is the terminology used in the GL conferences for describing the types of documents which can be detected (Pej?ová P. et al., 2012).}, KEYWORDS = {Grey Literature, Information Extraction IE, Terminology}, PAGES = {41-53}, URL = {http://www.greynet.org/thegreyjournal/currentissue.html}, VOLUME = {13}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{BIZZONI_2017_ARTICLE_BRD_382152, AUTHOR = {Bizzoni, Y. and Reboul, M. and Del Grosso, A.}, TITLE = {Diachronic trends in Homeric translations}, YEAR = {2017}, ABSTRACT = {In this paper we intend to present a tool we developed for translation studies and diachronically compare various French translations of the Odyssey.}, KEYWORDS = {Digital Humanities, Diachronic translations, Software design and engineering, Natural Language Processing}, PAGES = {26}, URL = {http://www.digitalhumanities.org/dhq/vol/11/2/000297/000297.html}, VOLUME = {11}, PUBLISHER = {Alliance of Digital Humanities Organizations ([Providence, RI?], Stati Uniti d'America)}, ISSN = {1938-4122}, JOURNAL = {Digital humanities quarterly}, } @ARTICLE{BOMPOLAS_2017_ARTICLE_BFMCP_380237, AUTHOR = {Bompolas, S. and Ferro, M. and Marzi, C. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {For a performance-oriented notion of regularity in inflection: the case of Modern Greek conjugation}, YEAR = {2017}, ABSTRACT = {Paradigm-based approaches to word processing/learning assume that word forms are not acquired in isolation, but through associative relations linking members of the same word family (e.g. a paradigm, or a set of forms filling the same paradigm cell). Principles of correlative learning offer a set of equations that are key to modelling this complex dynamic at a considerable level of detail. We use these equations to simulate acquisition of Modern Greek conjugation, and we compare the results with evidence from German and Italian. Simulations show that different Greek verb classes are processed and acquired differentially, as a function of their degrees of formal transparency and predictability. We relate these results to psycholinguistic evidence of Modern Greek word processing, and interpret our findings as supporting a view of the mental lexicon as an emergent integrative system.}, KEYWORDS = {paradigm-based morphology, gradient (ir)regularity, recurrent self-organisng networks}, PAGES = {77-92}, URL = {http://www.ai-lc.it/IJCoL/v3n1/IJCOL_3_1_5_bompolas_et_al.pdf?v=2a47ad90f2ae}, VOLUME = {3}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{CONNOR_2017_ARTICLE_CCVR_363052, AUTHOR = {Connor, R. and Cardillo, F. A. and Vadicamo, L. and Rabitti, F.}, TITLE = {Hilbert exclusion: improved metric search through finite isometric embeddings}, YEAR = {2017}, ABSTRACT = {Most research into similarity search in metric spaces relies on the triangle inequality property. This property allows the space to be arranged according to relative distances to avoid searching some subspaces. We show that many common metric spaces, notably including those using Euclidean and Jensen-Shannon distances, also have a stronger property, sometimes called the four-point property: In essence, these spaces allow an isometric embedding of any four points in three-dimensional Euclidean space, as well as any three points in two-dimensional Euclidean space. In fact, we show that any space that is isometrically embeddable in Hilbert space has the stronger property. This property gives stronger geometric guarantees, and one in particular, which we name the Hilbert Exclusion property, allows any indexing mechanism which uses hyperplane partitioning to perform better. One outcome of this observation is that a number of state-of-the-art indexing mechanisms over high-dimensional spaces can be easily refined to give a significant increase in performance; furthermore, the improvement given is greater in higher dimensions. This therefore leads to a significant improvement in the cost of metric search in these spaces.}, KEYWORDS = {Similarity search, Metric space, Metric indexing, Four-point property, Hilbert embedding, H. Information systems. Data structures, H. Information systems. Multidimensional range search, H. Information systems. Proximity search, H. Information systems. Database query processing, H. Information systems. Retrieval models and ranking, Information systems. Retrieval efficiency, H. Information systems. Multimedia information systems, F. Theory of computation. Random projections and metric embeddings}, PAGES = {17-27}, URL = {http://doi.acm.org/10.1145/3001583}, VOLUME = {35}, DOI = {10.1145/3001583}, PUBLISHER = {Association for Computing Machinery (New York, NY, Stati Uniti d'America)}, ISSN = {1046-8188}, JOURNAL = {ACM transactions on information systems}, } @ARTICLE{FERRARI_2017_ARTICLE_FDEGG_382166, AUTHOR = {Ferrari, A. and Dell'Orletta, F. and Esuli, A. and Gervasi, V. and Gnesi, S.}, TITLE = {Natural language requirements processing: a 4D vision}, YEAR = {2017}, ABSTRACT = {Natural language processing (NLP) and requirements engineering (RE) have had a long relationship, yet their combined use isn't well established in industrial practice. This situation should soon change. The future evolution of the application of NLP technologies in RE can be viewed from four dimensions: discipline, dynamism, domain knowledge, and datasets.}, KEYWORDS = {Natural Language Processing, Requirement Processing}, PAGES = {28-35}, URL = {http://ieeexplore.ieee.org/abstract/document/8106888/}, VOLUME = {34}, DOI = {10.1109/MS.2017.4121207}, PUBLISHER = {IEEE Computer Society ([Los Alamitos, CA, Stati Uniti d'America)}, ISSN = {0740-7459}, JOURNAL = {IEEE software}, } @ARTICLE{GIANNINI_2017_ARTICLE_GBGP_369104, AUTHOR = {Giannini, S. and Biagioni, S. and Goggi, S. and Pardelli, G.}, TITLE = {Grey Literature Citations in the age of Digital Repositories and Open Access}, YEAR = {2017}, ABSTRACT = {The work measures grey citations in the years 2012, 2013 and 2014 and then describes the features of GL documents cited in different areas of knowledge: Computational Linguistics, Computer Science and Engineering. With the aim of surveying a wide and varied range of resources, we selected a sample data based on the bibliographical references of articles contained in four journals - all indexed by Scopus Citation Database and ISI Web of Science, with an Impact Factor (IF) over the last three years - and two proceedings of international conferences held in 2012 and 2014.}, KEYWORDS = {Grey Literature, Citations}, PAGES = {23-31}, URL = {http://www.greynet.org/thegreyjournal/currentissue.html}, VOLUME = {13}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{GIOVANNETTI_2017_ARTICLE_GABB_364947, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Benotto, G.}, TITLE = {Traduco: A collaborative web-based CAT environment for the interpretation and translation of texts}, YEAR = {2017}, ABSTRACT = {Traduco is a web-based collaborative tool aimed at supporting the translation of texts that pose particular challenging interpretative issues. Nowadays, Computer-Assisted Translation (CAT) tools are mainly applied to the translation of technical manuals or legislative texts and are aimed at speeding up the translation process. Traduco extends most of the standard components of a traditional CAT tool with specific features necessary to support the interpretation and translation of complex texts (like the Babylonian Talmud, that we here present as a case study), which pose particular comprehension issues. Traduco goes beyond the translation and its printing: it includes features for the addition of notes and annotations and the creation of glossaries. Translators, editors, supervisors, and end-users accessing Traduco are able to use components that can ease the translation process through the use of CAT technologies, the supervision and managing of the whole process of translation and publishing, the exporting of translations and notes in standard formats for desktop publishing software and TEI format, and, soon, the possibility to perform automatic linguistic analysis of the text. Moreover, Traduco allows the users to insert notes, comments, annotations, and bibliographical references. The design and development of Traduco required the adoption of a multidisciplinary approach, leveraging on advances in software engineering, computational linguistics, knowledge engineering, and publishing.}, KEYWORDS = {computer-assisted translation, talmud, progetto traduzione del talmud babilonese}, PAGES = {47-62}, URL = {http://dsh.oxfordjournals.org/content/early/2016/10/26/llc.fqw054}, VOLUME = {32}, DOI = {10.1093/llc/fqw054}, PUBLISHER = {Oxford University Press (Oxford, UK, Regno Unito)}, ISSN = {2055-7671}, JOURNAL = {Digital Scholarship in the Humanities}, } @ARTICLE{MARZI_2017_ARTICLE_MFN_363116, AUTHOR = {Marzi, C. and Ferro, M. and Nahli, O.}, TITLE = {Arabic word processing and morphology induction through adaptive memory self-organisation strategies}, YEAR = {2017}, ABSTRACT = {Aim of the present study is to model the human mental lexicon, by focussing on storage and processing dynamics, as lexical organisation relies on the process of input recoding and adaptive strategies for long-term memory organisation. A fundamental issue in word processing is represented by the emergence of the morphological organisation level in the lexicon, based on paradigmatic relations between fully-stored word forms. Morphology induction can be defined as the task of perceiving and identifying morphological formatives within morphologically complex word forms, as a function of the dynamic interaction between lexical representations and distribution and degrees of regularity in lexical data. In the computational framework we propose here (TSOMs), based on Self-Organising Maps with Hebbian connections defined over a temporal layer, the identification/perception of surface morphological relations involves the alignment of recoded representations of morphologically-related input words. Facing a non-concatenative morphology such as the Arabic inflectional system prompts a reappraisal of morphology induction through adaptive organisation strategies, which affect both lexical representations and long-term storage. We will show how a strongly adaptive self-organisation during training is conducive to emergent relations between word forms, which are concurrently, redundantly and competitively stored in human mental lexicon, and to generalising knowledge of stored words to unknown forms.}, KEYWORDS = {Non-concatenative morphological structure, Lexical storage and access, Topological alignment, Synchronisation, Self-Organising Maps}, PAGES = {179-188}, URL = {http://www.sciencedirect.com/science/article/pii/S1319157816301148}, VOLUME = {29}, DOI = {10.1016/j.jksuci.2016.11.006}, PUBLISHER = {Elsevier (Amsterdam, Paesi Bassi)}, ISSN = {2213-1248}, JOURNAL = {Journal of King Saud University. Computer and information sciences (Online)}, } @ARTICLE{ROBERTSON_2017_ARTICLE_RB_381705, AUTHOR = {Robertson, B. and Boschetti, F.}, TITLE = {Large-Scale Optical Character Recognition of Ancient Greek}, YEAR = {2017}, ABSTRACT = {This paper documents our campaign to undertake the large-scale optical character recognition of ancient, or polytonic, Greek. Building upon the Gamera OCR engine and developing a suite of post-processing tools, including automatic spellcheck, we processed 1,200 volumes comprising 329,002,271 Greek words. A sample of 10 pages is studied in detail; they demonstrate the degree to which each step of post-processing improved the results, and with which source documents. These pages attain an average character accuracy of about 96%. These results will provide a basis for further improvements, including the training of other open-source OCR engines.}, KEYWORDS = {OCR, Ancient Greek}, PAGES = {341-359}, URL = {https://doi.org/10.3138/mous.14.3-3}, VOLUME = {14 (III series)}, PUBLISHER = {University of Calgary Press, for the Classical Association of Canada (Calgary, Canada)}, ISSN = {1496-9343}, JOURNAL = {Mouseion (Calg.)}, } @ARTICLE{VENTURI_2017_ARTICLE_VDMFB_382249, AUTHOR = {Venturi, G. and Dell'Orletta, F. and Montemagni, S. and Flore, E. and Bellandi, T.}, TITLE = {La qualità dei consensi informati. Un'analisi linguistico-computazionale della leggibilità dei testi}, YEAR = {2017}, ABSTRACT = {La leggibilità dei testi delle informative di consenso per le procedure diagnostico-terapeutiche è un requisito fondamentale, per offrire alle persone assistite l'accesso alle informazioni necessarie a una scelta consapevole delle opzioni disponibili per curare i diversi problemi di salute. La disponibilità di un testo leggibile è inoltre un aiuto per i medici responsabili della comunicazione e della raccolta del consenso, che possono impiegarlo come un ausilio alle informazioni presentate in forma verbale durante il colloquio, in modo tale da poter condividere una base di conoscenze minime da condividere con il paziente e i suoi familiari. Seppure le evidenze siano limitate in merito alla relazione tra la qualità del consenso e l'attitudine al contenzioso da parte dei pazienti in caso di trattamenti che esitano in un danno attribuibile alle cure (Durand et al., 2015), si tratta di un ambito di ricerca di crescente interesse nella letteratura sulla sicurezza (Wu et al., 2005; Manta et al., 2017). Nella casistica regionale della Toscana sulle richieste di risarcimento, solo l'1% dei sinistri include problemi di consenso informato (dati Centro GRC), probabilmente anche a causa di una sottovalutazione del diritto all'informazione da parte dei cittadini che si sottopongono a interventi programmati, connessa con una limitata consapevolezza del potere di scegliere le proprie cure che ogni persona dovrebbe poter esercitare posta di fronte alle opzioni terapeutiche disponibili per i propri problemi di salute.}, KEYWORDS = {Consenso informato, valutazione automatica della leggibilità, Trattamento Automatico del Linguaggio}, PAGES = {35-39}, URL = {http://www.formas.toscana.it/rivistadellasalute/fileadmin/files/fascicoli/2017/212/SeT_fascicolo_212.pdf}, VOLUME = {212}, PUBLISHER = {ETS (Pisa, Italia)}, ISSN = {0392-4505}, JOURNAL = {Salute e territorio}, } @INCOLLECTION{BOSCHETTI_2017_INCOLLECTION_BDD_382022, AUTHOR = {Boschetti, F. and Del Gratta, R. and Del Grosso, A. M.}, TITLE = {The role of digital scholarly editors in the design of components for cooperative philology}, YEAR = {2017}, ABSTRACT = {This contribution is focused on the role of the digital scholarly editor in the continuous process of analysis, development and evaluation of libraries of components for cooperative philology.}, KEYWORDS = {digital editions, collaborative and cooperative philology, digital philology}, PAGES = {249-253}, URL = {https://www.sidestone.com/books/advances-in-digital-scholarly-editing}, ISBN = {978-90-8890-484-4}, BOOKTITLE = {Advances in Digital Scholarly Editing}, EDITOR = {Boot, P. and Cappellotto, A. and Dillen, W. and Fischer, F. and Kelly, A. and Mertgens, A. and Sichani, A. and Spadini, E. and Van Hulle, D.}, } @INCOLLECTION{COPPOLA_2017_INCOLLECTION_CMRT_371344, AUTHOR = {Coppola, D. and Moretti, R. and Russo, I. and Tranchida, F.}, TITLE = {In quante lingue mangi? Tecniche glottodidattiche e language testing in classi plurilingui e ad abilità differenziata}, YEAR = {2017}, ABSTRACT = {La diversità linguistica e culturale costituisce oggi una caratteristica struttura-le delle nostre scuole. La necessità, caldeggiata dai principali documenti nazio-nali ed europei sull'educazione, di considerare le lingue "immigrate", assieme a tutte le altre presenti in classe, come un'importante risorsa linguistica e cultura-le per gli alunni, impone la ricerca di metodologie d'insegnamento idonee a va-lorizzare l'intero repertorio linguistico dei ragazzi e di prove di verifica atte a testare in modo più adeguato le loro competenze linguistico-comunicative, senza trascurare gli importanti aspetti culturali veicolati dalle diverse lingue. Il presente contributo riporta i primi dati di un'indagine che si colloca nell'ambito di una più ampia sperimentazione, avviata nel gennaio 2015, in al-cune classi del quinto anno della scuola primaria e del primo anno della secon-daria di primo grado della provincia di Firenze, con l'obiettivo di implementare tecniche glottodidattiche cooperative supportate dalla tecnologia in classi pluri-lingui e ad abilità differenziata.}, KEYWORDS = {language testing, glottodidattica, plurilinguismo}, PAGES = {199-231}, URL = {https://publications.cnr.it/doc/371344}, PUBLISHER = {Pisa University Press (Pisa, ITA)}, ISBN = {978-88-6741-789-6}, BOOKTITLE = {Strutture linguistiche e dati empirici in diacronia e sincronia}, EDITOR = {Marotta, G. and Lievers, F. S.}, } @INCOLLECTION{DELGRATTA_2017_INCOLLECTION_D_483914, AUTHOR = {Del Gratta, R.}, TITLE = {WordNets per lingue classiche}, YEAR = {2017}, ABSTRACT = {The Wordnet for Ancient Greek (AGWN) is presented and illustrated both as a stand-alone semantic net and as a participant in a more complex net of wordnets for historical and modern languages. Two applications of AGWN carried out within the Memorata Poetis project are described: in the first application, the AGWN is used to investigate multilingual synonyms; in the second one, AGWN is used to extract Greek synonyms in order to classify epigrams in terms of similar content.}, KEYWORDS = {WordNet, Perseus project, Ancient Greek Wordnet, oNLP, Classical Languages}, PAGES = {117-122}, URL = {https://publications.cnr.it/doc/483914}, VOLUME = {14/3}, DOI = {10.14277/6969-182-9/ANT-14-9}, PUBLISHER = {Edizioni Ca' Foscari (Venezia, ITA)}, ISBN = {978-88-6969-183-6}, BOOKTITLE = {Strumenti digitali e collaborativi per le Scienze dell'antichità}, EDITOR = {Mastandrea, P.}, } @INCOLLECTION{MANZELLA_2017_INCOLLECTION_MBBDDFMMMNS_368363, AUTHOR = {Manzella, G. M. R. and Bartolini, R. and Bustaffa, F. and D'Angelo, P. and De Mattei, M. and Frontini, F. and Maltese, M. and Medone, D. and Monachini, M. and Novellino, A. and Spada, A.}, TITLE = {Semantic Search Engine for Data Management and Sustainable Development: Marine Planning Service Platform}, YEAR = {2017}, ABSTRACT = {This chapter presents a computer platform supporting a Marine Information and Knowledge System based on a repository that gathers, classify and structures marine scientific literature and data, guaranteeing their accessibility by means of standard protocols. This requires the access to quality controlled data and to information that is provided in grey literature and/or in relevant scientific literature. There exist efforts to develop search engines to find author's contributions to scientific literature or publications. This implies the use of persistent identifiers. However very few efforts are dedicated to link publications to data that was used, or cited in them or that can be of importance for the published studies. Full-text technologies are often unsuccessful since they assume the presence of specific keywords in the text; to fix this problem,it is suggested to use different semantic technologies for retrieving the text and data and thus getting much more complying results.}, KEYWORDS = {Marine Information and Knowledge System}, PAGES = {127-154}, URL = {http://www.igi-global.com/chapter/semantic-search-engine-for-data-management-and-sustainable-development/166839#}, VOLUME = {Volume 7}, DOI = {10.4018/978-1-5225-0700-0.ch006}, PUBLISHER = {IGI Global (Hershey, USA)}, BOOKTITLE = {Oceanographic and Marine Cross-Domain Data Management for Sustainable Development}, EDITOR = {Diviacco, P. and Leadbetter, A. and Glaves, H.}, } @INCOLLECTION{MARCONI_2017_INCOLLECTION_MMC_406349, AUTHOR = {Marconi, L. and Miyares, L. R. and Cutugno, P.}, TITLE = {Características distributivas del español de Cuba a partir del análisis de dos estudios lingüísticos}, YEAR = {2017}, ABSTRACT = {El objetivo de este artículo es realizar un estudio sobre las características distributivas de un subconjunto del español de Cuba; se refiere a la información recogida en el Diccionario Ortográfico del Español -con la exclusión de las lexías complejas y de las locuciones-, o sea de los 7927 lemas que han sido generados a partir de las 93759 palabras flexionadas con sus categorías gramaticales; también se analizarán los datos del Léxico Activo-Funcional del Escolar Cubano para hacer un mapeo de las palabras utilizadas por los escolares. El artículo contiene también una serie de comparaciones de algunas propiedades distributivas derivadas del tratamiento de los datos del DOE con los resultados obtenidos mediante el análisis del Léxico Activo-Funcional del Escolar Cubano. Cuando se habla de propiedades distributivas de una lengua, estas pueden ser consideradas en relación con los lemas, con las diversas palabras flexionadas o con el número de apariciones en un corpus específico. En este artículo nos referiremos a una muestra de lemas y flexiones derivada del Diccionario Ortográfico del Español y solamente a las flexiones del Léxico Activo-Funcional del Escolar Cubano.}, KEYWORDS = {Léxico, escolares, Cuba, propiedades distributivas}, PAGES = {87-111}, URL = {https://publications.cnr.it/doc/406349}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {978-959-7174-34-9}, BOOKTITLE = {Estudios de Lexicología y Lexicografía Homenaje a Eloína Miyares Bermúdez}, EDITOR = {Miyares, L. R.}, } @INCOLLECTION{MONTEMAGNI_2017_INCOLLECTION_MW_367892, AUTHOR = {Montemagni, S. and Wieling, M.}, TITLE = {Exploring the role of extra-linguistic factors in defining dialectal variation patterns through cluster comparison}, YEAR = {2017}, ABSTRACT = {This paper contributes to two open issues in the dialectometric literature, i.e. i) whether and how patterns of linguistic variation are influenced by extra-linguistc features such as the geomorphology of the area, or cultural, administrative and political boundaries, and ii) whether and how the influence of extra-linguistic factors remains stable across linguistically-grounded partitions of data. To investigate these issues, a case study focusing on lexical variation has been carried out on a regional lexical atlas of Tuscan dialects. A variety of extra-linguistic features was taken into account, whose impact and role has been evaluated with respect to both the whole dialectal dataset and across different semantic fields.}, KEYWORDS = {dialectometric literature, dialectology, linguistic variation, dialect, Tuscan, lexical atlas}, PAGES = {241-251}, URL = {http://www.let.rug.nl/festschriftnerbonne/25.%20Montemagni%20\&%20Wieling.pdf}, VOLUME = {Tributes 32}, BOOKTITLE = {From Semantics to Dialectometry. Festschrift in honor of John Nerbonne}, EDITOR = {Wieling, M. and Kroon, M. and Van Noord, G. and Bouma, G.}, } @INCOLLECTION{SASSI_2017_INCOLLECTION_S_382198, AUTHOR = {Sassi, M.}, TITLE = {Cuestiones pertinentes e impertinentes de los Diccionarios Temáticos}, YEAR = {2017}, ABSTRACT = {The Dictionaries in general, and in particular the thematic ones, have represented the thread of my career in the Istituto di Linguistica Computazionale of Pisa from and before its origins (in the years 1965-1978 it was called Divisione Linguistica of the CNUCE and later ILC- CNR). In the '60 -'70 we worked on the DMI (Italian Machine Dictionary) under the auspices of the Italian Parliament. In the 1980s, the first studies of dictionaries organized in lexical fields began, starting from the intuitions of Julio Casares and the same principles were applied to Italian. From these distant experiences, over 50 years, different studies, applications, corpora and databases have been developed in relation to several disciplines: Literature, Philology, Law, Justice, Administration, Tourism, Medicine, etc. There have been compilations of several Authors in Spanish language: Bolivar, Carpentier, Cervantes, Encina, Machado, Marquez, Neruda, Salinas, Teresa de Avila, Unamuno, Vallejo, that will be described in this presentation, with examples of online search with the DBT-Web interface. It is also discussed to preserve this data for the future through its maintenance for consultation on the network.}, KEYWORDS = {Computational Linguistics, Thematic Dictionaries, Corpora, Preservation and Reuse of data, Historical overview of Text Processing}, PAGES = {37-49}, URL = {https://publications.cnr.it/doc/382198}, VOLUME = {VII}, ISBN = {978-84-617-4512-8}, BOOKTITLE = {El diccionario en la encrucijada: de la sintaxis y la cultura al desafío digital}, EDITOR = {López, S. and Cuadrado, I. G. and Escribano, J. G. and Cecilio}, } @INCOLLECTION{SORIA_2017_INCOLLECTION_S_382104, AUTHOR = {Soria, C.}, TITLE = {What is Digital Language Diversity and why should we care?}, YEAR = {2017}, ABSTRACT = {The relationship between language and the Internet is a growing area of policy interest and academic study, see for instance (MAAYA 2012), (Paolillo et al. 2005), (Pimienta 2001), (Kornai 2013), (Pimienta et al. 2009), (Rehm and Uszkoreit 2012). The emerging picture is one where language profoundly affects a person's experience of the Internet. It determines how much - if any - information you can access on Wikipedia. It orients a person's choices and decisions by shaping the results of a search engine, depending on the language used. It determines the range of services that can be available over the Internet, and therefore the amount of everyday tasks (such as buying a ticket, reviewing opinions about hotel and restaurants, purchasing books or other goods, etc.) that can be carried out virtually. Far from infinite, the Internet, it seems, is only as big as one's language. Should this hold true, it would be at odds with the original spirit of the Internet, which - according to the words of Tim Berners-Lee - would be a place "to cross barriers and connect cultures". But it is safe to argue that the extent to which a language can be used over the Internet not only affects a person's experience and choice of opportunities; it also affect the language itself. If a language is poorly or not supported to be used over digital devices, for instance if the keyboard of the PC is not equipped with the characters and diacritics necessary to write in the language, or if there is no spell checker for a language, then its usability becomes severely affected, and it might will never be used online. The language could become "digitally endangered", and its value and profile could be lessened, especially in the eyes of the new generations. These considerations call for closer examination of a number of related issues. First, the "digital language diversity", i.e. the linguistic diversity of the Internet. Second, it is important to reflect on the conditions that make it possible for a language to be used over digital devices, and about what can be done in order to grant this possibility to languages other than so-called "major" ones.}, KEYWORDS = {digital language diversity}, PAGES = {13-28}, URL = {http://www.linguapax.org/wp-content/uploads/2015/03/LinguapaxReview2016web.pdf}, } @EDITORIAL{DISEGNI_2017_EDITORIAL_D_383158, AUTHOR = {Di Segni, D. G.}, TITLE = {Talmud Babilonese - Trattato Berakhòt}, YEAR = {2017}, ABSTRACT = {Curatela della traduzione italiana commentata del trattato del Talmud Babilonese "Berakhòt" in due volumi}, KEYWORDS = {Talmud, Traduco, Linguistica computazionale}, PAGES = {932}, URL = {https://publications.cnr.it/doc/383158}, VOLUME = {1*, 1**}, PUBLISHER = {Giuntina (Firenze, ITA)}, ISBN = {978-88-8057-668-6}, } @EDITORIAL{PRETORIUS_2017_EDITORIAL_PS_382299, AUTHOR = {Pretorius, L. and Soria, C.}, TITLE = {Language Resources and Evaluation. Special Issue: Collaboration and Computing for Under-resourced Languages}, YEAR = {2017}, ABSTRACT = {Special issue of the journal "Language Resources and Evaluation", dedicated to under-resourced languages}, KEYWORDS = {under-resourced languages, language resources, minority languages, endangered languages, small languages}, PAGES = {891-1084}, URL = {https://link.springer.com/journal/10579/51/4/page/1}, VOLUME = {51}, DOI = {10.1007/s10579-017-9405-8}, PUBLISHER = {Springer (Berlin, DEU)}, } @EDITORIAL{SORIA_2017_EDITORIAL_SRQ_382301, AUTHOR = {Soria, C. and Russo, I. and Quochi, V.}, TITLE = {Reports on Digital Language Diversity in Europe}, YEAR = {2017}, ABSTRACT = {In these reports we present the results of the first survey about the actual needs of European minority languages speakers in terms of digital opportunities}, KEYWORDS = {regional languahges, minority languages, digital vitality, digital use}, URL = {http://www.dldp.eu/content/reports-digital-language-diversity-europe}, } @EDITORIAL{BRANCO_2017_EDITORIAL_BCVIC_383337, AUTHOR = {Branco, A. and Cohen, K. B. and Vossen, P. and Ide, N. and Calzolari, N.}, TITLE = {Replicability and reproducibility of research results for human language technology: introducing an LRE special section}, YEAR = {2017}, KEYWORDS = {Human Language Technology (HLT), Language Resources (LR)}, PAGES = {1-5}, URL = {https://link.springer.com/article/10.1007/s10579-017-9380-0}, VOLUME = {51}, DOI = {10.1007/s10579-017-9380-0}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, BOOKTITLE = {Language resources and evaluation (Print)}, } @EDITORIAL{PIRRELLI_2017_EDITORIAL_PZ_381161, AUTHOR = {Pirrelli, V. and Zarghili, A.}, TITLE = {Arabic Natural Language Processing: Models, systems and applications}, YEAR = {2017}, KEYWORDS = {Natural Language Processing, Standard Modern Arabic}, PAGES = {A1-A3}, URL = {https://www.sciencedirect.com/science/article/pii/S1319157817301155}, VOLUME = {29}, DOI = {10.1016/j.jksuci.2017.04.004}, PUBLISHER = {Elsevier (Amsterdam, Paesi Bassi)}, ISSN = {2213-1248}, BOOKTITLE = {Journal of King Saud University. Computer and information sciences (Online)}, } @EDITORIAL{PRETORIUS_2017_EDITORIAL_PS_382062, AUTHOR = {Pretorius, L. and Soria, C.}, TITLE = {Introduction to the Special Issue}, YEAR = {2017}, KEYWORDS = {language resources, under-resourced languages, minority languages}, PAGES = {891-895}, URL = {https://link.springer.com/article/10.1007%2Fs10579-017-9405-8}, VOLUME = {51}, DOI = {10.1007/s10579-017-9405-8}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, BOOKTITLE = {Language resources and evaluation (Print)}, } @INPROCEEDINGS{BARTOLINI_2017_INPROCEEDINGS_BPGGB_368487, AUTHOR = {Bartolini, R. and Pardelli, G. and Goggi, S. and Giannini, S. and Biagioni, S.}, TITLE = {A terminological "journey" in the Grey Literature domain}, YEAR = {2017}, ABSTRACT = {The work analyzes a corpus constituted of the entire amount of full research papers published in the GL conference series over a time-span of more than one decade (2003-2014) with the aim of creating a terminological map of relevant words in the various GL research topics. "... corpora used to extract terminological units can be further investigated to find semantic and conceptual information on terms or to represent conceptual relationships between terms. (Bourigault D. et al., 2001). Another interesting inquiry is the terminology used in the GL conferences for describing the types of documents which can be detected (Pej?ová P. et al., 2012).}, KEYWORDS = {Grey Literature, Information Extraction IE, Terminology}, PAGES = {117-130}, URL = {https://publications.cnr.it/doc/368487}, VOLUME = {18}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-30-2}, CONFERENCE_NAME = {Eighteenth International Conference on Grey Literature (GL18): Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {New York, US}, CONFERENCE_DATE = {November 28-29, 2016}, BOOKTITLE = {Proceedings of the Eighteenth International Conference on Grey Literature (GL18): Leveraging Diversity in Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{BELLANDI_2017_INPROCEEDINGS_BGPW_377421, AUTHOR = {Bellandi, A. and Giovannetti, E. and Piccini, S. and Weingart, A.}, TITLE = {Developing LexO: a Collaborative Editor of Multilingual Lexica and Termino-ontological Resources in the Humanities}, YEAR = {2017}, ABSTRACT = {In this paper we present a first version of LexO, a collaborative editor of multilingual lexica and termino-ontological resources. It is based on the lemon model, and aims at supporting lexicographers and terminologists in their work. Although the development of LexO is still ongoing, the editor is already being used within two research projects in the field of Computational Linguistics applied to Humanities: DiTMAO and Totus Mundus. This allowed to test the functionalities of LexO, and prove its high degree of flexibility according to the different extensions of the lemon model needed to fulfill the needs of the involved scholars.}, KEYWORDS = {lexo, multi-lingual termino-ontological resources, DiTMAO, Totus Mundus}, URL = {http://www.aclweb.org/anthology/W17-7000}, CONFERENCE_NAME = {Workshop on Language, Ontology, Terminology and Knowledge Structures-LOTKS 2017}, CONFERENCE_PLACE = {Montpellier}, CONFERENCE_DATE = {19/09/2017}, BOOKTITLE = {Proceedings of Language, Ontology, Terminology and Knowledge Structures Workshop (LOTKS 2017)}, } @INPROCEEDINGS{BRUNATO_2017_INPROCEEDINGS_BD_382461, AUTHOR = {Brunato, D. and Dell'Orletta, F.}, TITLE = {On the order of words in Italian: a study on genre vs complexity}, YEAR = {2017}, ABSTRACT = {In this paper we present a cross-genre study on word order variation in Italian based on automatically dependency-parsed corpora. A comparative analysis focused on dependency direction and dependency distance for major constituents in the sentence is carried out in order to assess the influence of both textual genre and linguistic complexity on the distribution of phenonemena of syntactic markedeness.}, KEYWORDS = {word order, syntactic analysis, linguistic complexity, natural language processing}, PAGES = {25-31}, URL = {https://publications.cnr.it/doc/382461}, CONFERENCE_NAME = {International Conference on Dependency Linguistics (Depling 2017)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {18-20/09/2017}, } @INPROCEEDINGS{CARDILLO_2017_INPROCEEDINGS_CFMP_381090, AUTHOR = {Cardillo, F. A. and Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {How "deep" is learning word inflection?}, YEAR = {2017}, ABSTRACT = {Machine learning offers two basic strategies for morphology induction: lexical segmentation and surface word relation. The first one assumes that words can be segmented into morphemes. Inducing a novel inflected form requires identification of morphemic constituents and a strategy for their recombination. The second approach dispenses with segmentation: lexical representations form part of a network of associatively related inflected forms. Production of a novel form consists in filling in one empty node in the network. Here, we present the results of a recurrent LSTM network that learns to fill in paradigm cells of incomplete verb paradigms. Although the process is not based on morpheme segmentation, the model shows sensitivity to stem selection and stem-ending boundaries.}, KEYWORDS = {LSTM, Morphology induction, Cognitive modelling}, PAGES = {77-82}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85037368972\&origin=inward}, VOLUME = {2006}, DOI = {10.4000/books.aaccademia.2314}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {978-88-99982-76-8}, CONFERENCE_NAME = {Fourth Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {11-13/12/2017}, BOOKTITLE = {Proceedings of the Fourth Italian Conference on Computational Linguistics (CLiC-it 2017)}, EDITOR = {Basili, R. and Nissim, M. and Satta, G.}, } @INPROCEEDINGS{CIMINO_2017_INPROCEEDINGS_CWDMV_382252, AUTHOR = {Cimino, A. and Wieling, M. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Identifying predictive features for textual genre classification: The key role of syntax}, YEAR = {2017}, ABSTRACT = {The paper investigates impact and role of different feature types for the specific task of Automatic Genre Classification with the final aim of identifying the most predictive ones. The goal was pursued by carrying out incremental feature selection through Grafting using different sets of linguistic features. Achieved results for discriminating among four traditional textual genres show the key role played by syntactic features, whose impact turned out to vary across genres.}, KEYWORDS = {Textual Genre Classification, Feature Selection, Syntactic Features}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85037370866\&origin=inward}, VOLUME = {2006}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {11-12 dicembre 2017}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{DELVIGNA_2017_INPROCEEDINGS_DCDPT_369760, AUTHOR = {Del Vigna, F. and Cimino, A. and Dell'Orletta, F. and Petrocchi, M. and Tesconi, M.}, TITLE = {Hate me, hate me not: Hate speech detection on Facebook}, YEAR = {2017}, ABSTRACT = {While favouring communications and easing information sharing, Social Network Sites are also used to launch harmful campaigns against specific groups and individuals. Cyberbullism, incitement to self-harm practices, sexual predation are just some of the severe effects of massive online offensives. Moreover, attacks can be carried out against groups of victims and can degenerate in physical violence. In this work, we aim at containing and preventing the alarming diffusion of such hate campaigns. Using Facebook as a benchmark, we consider the textual content of comments appeared on a set of public Italian pages. We first propose a variety of hate categories to distinguish the kind of hate. Crawled comments are then annotated by up to five distinct human annotators, according to the defined taxonomy. Leveraging morpho-syntactical features, sentiment polarity and word embedding lexicons, we design and implement two classifiers for the Italian language, based on different learning algorithms: the first based on Support Vector Machines (SVM) and the second on a particular Recurrent Neural Network named Long Short Term Memory (LSTM). We test these two learning algorithms in order to verify their classification performances on the task of hate speech recognition. The results show the effectiveness of the two classification approaches tested over the first manually annotated Italian Hate Speech Corpus of social media text.}, KEYWORDS = {Hate speech, NLP, Social Networks}, PAGES = {86-95}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-85017337270\&partnerID=q2rCbXpz}, VOLUME = {1816}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {ITA-SEC 17}, CONFERENCE_PLACE = {Venezia, Italia}, CONFERENCE_DATE = {17-20/01/2017}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{GIOVANNETTI_2017_INPROCEEDINGS_GABDD_377423, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Dattilo, D. and Dell'Orletta, F.}, TITLE = {Stylometry in Computer-Assisted Translation: Experiments on the Babylonian Talmud}, YEAR = {2017}, ABSTRACT = {The purpose of this research is to experiment the application of stylometric techniques in the area of Computer-Assisted Translation to reduce the revision effort in the context of a collaborative, large scale translation project. The obtained results show a correlation between the editing extent and the compliance to some specific linguistic features, proving that supporting translators in writing translations following a desired style can actually reduce the number of following necessary interventions (and, consequently, save time) by revisors, editors and curators.}, KEYWORDS = {traduco, babylonian talmud, computer-assisted translation, stylometry, readability}, PAGES = {177-182}, URL = {https://publications.cnr.it/doc/377423}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {9788899982942}, CONFERENCE_NAME = {Fourth Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {11-13/12/2017}, BOOKTITLE = {Proceedings of 4th Italian Conference on Computational Linguistics (CLiC-it)}, EDITOR = {Basili, R. and Nissim, M. and Satta, G.}, } @INPROCEEDINGS{MORGAVI_2017_INPROCEEDINGS_MMFMCLC_370916, AUTHOR = {Morgavi, G. and Morando, M. and Ferretti, M. and Marconi, L. and Cutugno, P. and Lucentini, R. and Chiarella, D.}, TITLE = {Active aging: a user centred approach for designing a virtual village network architecture}, YEAR = {2017}, ABSTRACT = {Currently many solutions for domotic housing have been created to provide a better autonomous life for older people and to reduce health and caregiving costs. Unfortunately, assistive technology is often not accepted by old users. This may be due to the technology-oriented approach adopted in designing such systems, which does not consider enough the users' needs. This paper proposes a procedure to design an AAL(Ambient Assisted Living) system to support active aging based on the User-Centred Design approach. A virtual network architecture integrating different solutions have been designed involving final users from the very beginning of the planning stage. The result is a high usable and flexible platform that allows creating user-friendly products as well as services and realizing also high-level functions by integrating data from completely different contexts.}, KEYWORDS = {Virtual Village Network, Active aging, Ambient Assisted Living, User-Centred Design Approach}, PAGES = {5}, URL = {https://publications.cnr.it/doc/370916}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {9789597174325}, CONFERENCE_NAME = {XV° Simposio Internacional de Comunicación Social, Eloína Miyares in memoriam}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {23-27/01/2017}, } @INPROCEEDINGS{PARDELLI_2017_INPROCEEDINGS_PGBRM_367782, AUTHOR = {Pardelli, G. and Goggi, S. and Bartolini, R. and Russo, I. and Monachini, M.}, TITLE = {A Geographical Visualization of GL Communities: A Snapshot}, YEAR = {2017}, ABSTRACT = {This quotation stresses the important role of the several international organizations in producing and disseminating knowledge in the field of Grey Literature (GL): the paper aims to provide a first snapshot of the geographical distribution of GL organizations and their participation to the annual International Conference on Grey Literature over the time (in the period from 2003 to 2015. See List of Conferences on Table 2 ). Nowadays a visual representation of data is often associated with the traditional statistical graphs, in particular for representing complex phenomena by means of maps and diagrams, which allow a deeper and more focused analysis of the data. In our case the geographical representation of stakeholders in government, academics, business and industry aims at visualizing the GL community across the globe: it concerns 674 organizations which over the years have contributed to the development of a common vision on the most pressing issues of the field by using new paradigms such as Open Access and the social networks.}, KEYWORDS = {Geographical Visualization, Grey Literature Communities}, PAGES = {109-113}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2017/04/GL18_Conference_Proceedings.pdf}, VOLUME = {18}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-30-2}, CONFERENCE_NAME = {Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {Washington}, CONFERENCE_DATE = {November 28-29, 2016}, BOOKTITLE = {GL18 Conference Proceedings Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{PIRRELLI_2017_INPROCEEDINGS_P_398875, AUTHOR = {Pirrelli, V.}, TITLE = {Co-activation and competition effects in lexical storage and processing}, YEAR = {2017}, ABSTRACT = {According to traditional wisdom in Linguistics, morphologically simple words reside in the mental lexicon, a kind of brain dictionary that contains unpredictable mappings between lexical features. Here I illustrate some of the defining features of an alternative view of the language architecture, where computation and storage are just the short-term and long-term dynamics of the same underlying process. Empirical results of a computational model of this view are reported and general implications for a theory of the lexicon are discussed.}, KEYWORDS = {Mental Lexicon, Morphology, Human Language Processing, artificial neural networks, lexical self-organization}, PAGES = {1-21}, URL = {https://picgl4.files.wordpress.com/2015/11/4-paper_1_pirrelli.pdf}, CONFERENCE_NAME = {4th Patras International Conference of Graduate Students in Linguistics}, CONFERENCE_PLACE = {Patras, Greece}, CONFERENCE_DATE = {20-22/05/ 2016}, } @INPROCEEDINGS{SASSOLINI_2017_INPROCEEDINGS_SCC_382393, AUTHOR = {Sassolini, E. and Cucurullo, S. and Cinini, A.}, TITLE = {I corpora digitali: dall'obsolescenza tecnologica, alla salvaguardia e alla condivisione}, YEAR = {2017}, ABSTRACT = {Studio e implementazione di un protocollo di recupero, conservazione e valorizzazione di testi e corpora digitali interessati da problemi di obsolescenza tecnologica. Le strategie di salva-guardia adottate si spingono oltre il salvataggio dei testi e la conservazione in un formato di rappresentazione in linea con gli standard internazionali (XML TEI), si pongono come obiettivo la valorizzazione di questo patrimonio attraverso nuove modalità di fruizione dei contenuti. Lo scopo è affiancare le funzionalità classiche di analisi testuale, che da sempre caratterizzano le nostre attività di ricerca, a nuove modalità grafiche e visuali di fruizione dei dati e, in alcuni casi, migrare verso dispositivi mobili e tecnologie App. In questo articolo, oltre al protocollo di recupero, presentiamo due sperimentazioni di valorizzazione di contenuti testuali. Nel primo caso proponiamo tecniche di visual analytics applicate ad un corpus testuale semi strutturato riguardante corrispondenza redatta in lingua italiana del 1600. Nel secondo caso abbiamo realizzato un'applicazione per sistema Android finalizzata all'interrogazione di dati testuali relativi ad un progetto di censimento di architetture moderne della regione Liguria.}, KEYWORDS = {Testi digitali, Analisi testuale, Preservazione dei dati, Diffusione dei risultati}, PAGES = {31-35}, URL = {https://www.garr.it/it/documenti/3529-conferenza-2016-selected-papers-sassolini-et-al/file}, DOI = {10.26314/GARR-Conf16-proceeedings-06}, PUBLISHER = {Consortium GARR (Roma, ITA)}, ISBN = {978-88-905077-6-2}, CONFERENCE_NAME = {Conferenza GARR 2016-The CreActive Network}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {30/11/2016-02/12/2016}, } @INPROCEEDINGS{VADICAMO_2017_INPROCEEDINGS_VCFCDCT_375139, AUTHOR = {Vadicamo, L. and Carrara, F. and Falchi, F. and Cimino, A. and Dell'Orletta, F. and Cresci, S. and Tesconi, M.}, TITLE = {Cross-media learning for image sentiment analysis in the wild}, YEAR = {2017}, ABSTRACT = {Much progress has been made in the field of sentiment analysis in the past years. Researchers relied on textual data for this task, while only recently they have started investigating approaches to predict sentiments from multimedia content. With the increasing amount of data shared on social media, there is also a rapidly growing interest in approaches that work "in the wild", i.e. that are able to deal with uncontrolled conditions. In this work, we faced the challenge of training a visual sentiment classifier starting from a large set of user-generated and unlabeled contents. In particular, we collected more than 3 million tweets containing both text and images, and we leveraged on the sentiment polarity of the textual contents to train a visual sentiment classifier. To the best of our knowledge, this is the first time that a cross-media learning approach is proposed and tested in this context. We assessed the validity of our model by conducting comparative studies and evaluations on a benchmark for visual sentiment analysis. Our empirical study shows that although the text associated to each image is often noisy and weakly correlated with the image content, it can be profitably exploited to train a deep Convolutional Neural Network that effectively predicts the sentiment polarity of previously unseen images.}, KEYWORDS = {Big data, Data Mining, Sentiment Analysis, Social Media Analysis}, PAGES = {10}, URL = {https://ieeexplore.ieee.org/document/8265255}, DOI = {10.1109/ICCVW.2017.45}, ISBN = {978-1-5386-1034-3}, CONFERENCE_NAME = {ICCV 2017 IEEE International Conference on Computer Vision Workshops}, CONFERENCE_PLACE = {Venezia, Italy}, CONFERENCE_DATE = {22-29 October 2017}, } @INPROCEEDINGS{BARTOLINI_2017_INPROCEEDINGS_BGPRFF_377073, AUTHOR = {Bartolini, R. and Goggi, S. and Pardelli, G. and Russo, I. and Farace, D. and Frantzen, J.}, TITLE = {Data Visualization of a Grey Literature Community: A Cooperative Project}, YEAR = {2017}, ABSTRACT = {The expected outcome of this project will not only produce a revised and updated publication of International Directory of Organizations in Grey Literature, IDGL, but will also provide a visual overview of GreyNet as an international organization serving diverse communities with shared interests in grey literature. It would be a demonstration of GreyNet's commitment to research, publication, open access, education, and public awareness in this field of library and information science.}, KEYWORDS = {International Directory of Organizations in Grey Literature, Data Visualization}, PAGES = {63-63}, URL = {https://publications.cnr.it/doc/377073}, VOLUME = {19}, ISBN = {978-90-77484-32-6}, CONFERENCE_NAME = {Nineteenth International Conference on Grey Literature, GL19}, CONFERENCE_PLACE = {Rome, National Research Council, CNR}, CONFERENCE_DATE = {October 23-24, 2017}, BOOKTITLE = {Nineteenth International Conference on Grey Literature Public Awareness and Access to Grey Literature. Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{BELLANDI_2017_INPROCEEDINGS_BBKDM_366612, AUTHOR = {Bellandi, A. and Boschetti, F. and Khan, A. F. and Del Grosso, A. M. and Monachini, M.}, TITLE = {Provando e riprovando modelli di dizionario storico digitale: collegare voci, citazioni, interpretazioni}, YEAR = {2017}, ABSTRACT = {Il dizionario storico è il luogo d'incontro privilegiato di linguistica e lessicografia e filologia e critica letteraria. Nella prima parte prendiamo in considerazione un caso di studio piuttosto noto, relativo all'espressione "provando e riprovando", per mostrare come perfino i luoghi citati nei dizionari, che sono introdotti con lo scopo di disambiguare i termini in contesto, non siano privi di controversie interpretative. Nella seconda parte, molto più dettagliata e più tecnica, tentiamo di aggiungere ai modelli lessicali e citazionali già esistenti ed aperti soluzioni minime che ci permettano di collegare voci, citazioni e interpretazioni all'interno dell'universo dei Linked Open Data.}, KEYWORDS = {Linked Open Data LOD}, PAGES = {119-125}, URL = {http://aiucd2017.aiucd.it/wp-content/uploads/2017/01/book-of-abstract-AIUCD-2017.pdf}, CONFERENCE_NAME = {AIUCD 2017 Conference \& 3rd EADH Day}, CONFERENCE_PLACE = {Roma, Università "Sapienza"}, CONFERENCE_DATE = {24-28 January 2017}, BOOKTITLE = {AIUCD 2017 Conference}, } @INPROCEEDINGS{BOMPOLAS_2017_INPROCEEDINGS_BMFCPR_381125, AUTHOR = {Bompolas, S. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V. and Ralli, A.}, TITLE = {Transparency and predictability in Modern Greek conjugation: Implications for models of word processing}, YEAR = {2017}, ABSTRACT = {We argue that the Greek evidence calls for a substantial revision of the clear-cut interaction between transparency/predictability and regularity, to make room for a more process-oriented notion of regularity. According to this view, regularity is no longer an epiphenomenon of the design of the human language faculty and the purported dualism between rule-based and memory-based routes, but the graded result of the varying interaction of several structural factors concurrently affecting the human word processor.}, KEYWORDS = {Inflectional regularity, Word Processing, Modern Greek Conjugation}, PAGES = {17-19}, URL = {http://www.lilec.it/mmm/wp/wp-content/uploads/2017/02/Book-of-abstracts_MMM11_Final.pdf}, CONFERENCE_NAME = {MMM 11: 11th Mediterranean Morphology Meeting}, CONFERENCE_PLACE = {Cyprus}, CONFERENCE_DATE = {22-25/06/2017}, } @INPROCEEDINGS{DELGRATTA_2017_INPROCEEDINGS_D_382031, AUTHOR = {Del Gratta, R.}, TITLE = {(Re)Using OpeNER and PANACEA Web Services in the CLARIN Research Infrastructure}, YEAR = {2017}, ABSTRACT = {We describe the implications of (re)using the OpeNer and PANACEA Web Services into the CLARIN Research Infrastructure. The analyzed tools are of great interest for specific communities such as academic and small business focused on sentiment/opinion analysis and on Machine Translation along with related technologies, but their outcomes may be of great importance for the CLARIN audience as well. In fact, the Virtual Language Observatory shows a lot of lexical resources for sentiment but a few tool, while a lot of lexical resources and tools are available for Machine Translation. This means that the latter community is already in CLARIN, while the former should be poked. If community-related challenges are on the political side, issues related to interoperability are definitely on the technical one. The initiative is carried out at the ILC4CLARIN center in Pisa, the leading one of the CLARIN-IT national Consortium. The least common multiple between those two projects is neither limited to tools and Web Services nor to the creation of annotated corpora and lexicons; neither to the focus they have on specific communities. They also are based on (and strongly pursue and suggest) the concept of interoperability. This is clear from the use of the Kyoto Annotation Format in OpeNer, of Graph Annotation Format in PANACEA8 and of and the Lexical Markup Framework in both. Data and tools interoperability is also a key asset in both CLARIN (https://www.clarin.eu/event/2017/clarin-workshop-towards-interoperability-lexico-semantic-resources) and EUDAT (https://eudat.eu/communities/an-eudat-based-fair-data-approach-for-data-interoperability) . Within CLARIN, initiatives such as the Language Resource Switchboard and openly go towards methodologies and "systems" to address the interoperability issues. From a technical point of view the main issues are briefly reported below: 1. Many tools in OpeNer and PANACEA are command line ones; 2. OpeNer o_ers both POST and GET API; 3. PANACEA built its Web Services using Soaplab11 and o_ers SOAP Web Services; 4. KAF, LMF and GrAF guarantee the interoperability among data and services; 5. Simple pipelines are available in OpeNer, while a workow engine has been used in PANACEA. Tools are already wrapped, but to fully meet the requirements of both LRS and WebLicht we have to build a new shell around the command line tools so that REST APIs can accept both POST and GET requests and accept/produce different formats. Indeed if Language Resource Switchboard accepts tools with their output format but requires to read data from URL in plain text, WebLicht accepts tools which read and write the TCF format. While OpeNer requires that the core (the command line) be wrapped into a REST shell, Web Services in PANACEA need REST APIs around a SOAP core. In the final paper, we will finalize the technical aspects and describe how the User Involvement group can play an important role in poking the sentiment/opinion community in CLARIN.}, KEYWORDS = {Web Services, Clarin, Research Infrastructures}, URL = {https://indico.egi.eu/indico/event/3455/contribution/139}, CONFERENCE_NAME = {Digital Infrastructures for Research 2017}, CONFERENCE_PLACE = {Brussels, The Square Meeting Centre}, CONFERENCE_DATE = {30/11/2017, 1/12/2017}, } @INPROCEEDINGS{DELGROSSO_2017_INPROCEEDINGS_D_384783, AUTHOR = {Del Grosso, A. M.}, TITLE = {Domain Driven Design and Domain Specific Modelling for Digital Textual Scholarship}, YEAR = {2017}, ABSTRACT = {Over the last years, the digital turn and the world wide web have led historical studies towards an automatic processing of their own data and consequently towards new forms of scholarly editing and publications. In this framework, scholars have adopted digital models, electronic elements and computational features in their work, but these new instruments are generally derived from other disciplines. For example, they exploit optical character recognition from image processing, corpora annotation and natural language processing from computational linguistics, text alignment from bioinformatics, text meaning from knowledge engineering, text presentation from data visualization. However, these latter research areas do not cover entirely the specificity of the fundamental requirements of the scholarly domain (for instance, treebank data models do not provide the adequate abstractions to manage multiple variant readings and multiple text interpretations). To exceed these issues, it is essential to adopt correct design approaches devoted to analyze the problem space of the historical source editing field. This rigorous and formal analysis will shape suitable architectures, design patterns, data abstractions and procedural abstractions for the constitutive features of the digital scholarly editions. Moreover, this modelling process will produce generic, flexible, maintainable and reusable digital models and modular textual scholarly environments. This contribution aims at discussing software engineering approaches, within an object-oriented paradigm, towards the definition of domain specific abstractions (DS-ADTs). In this way, it will be possible to accommodate domain needs by formally defining core "unities of concerns" which actually adhere to both the traditional and the digital editorial domain.}, KEYWORDS = {Domain Driven Design, Digital Scholarly Editing, Computational Philology, Digital Philology}, URL = {http://atlasfontium.pl/edition2.0/Home-and-News.php}, CONFERENCE_NAME = {Historical Source Edition 2. 0}, CONFERENCE_PLACE = {Warsaw, Poland}, CONFERENCE_DATE = {6/10/2017-7/10/2017}, } @INPROCEEDINGS{DELGROSSO_2017_INPROCEEDINGS_D_390293, AUTHOR = {Del Grosso, A. M.}, TITLE = {Digital Textual Scholarship Tools: From Digitizing Historical Archives To Digital Scholarly Editing Models}, YEAR = {2017}, ABSTRACT = {The contribution illustrates fundamental aspects concerning the digitization of a historical archive for scholarly studies. During the first part of the talk I briefly introduce the standard reference model for digital archives, thereafter, I will show some features and technologies about the production of digital facsimiles from original primary sources. Beside this topic the metadata issues will be pointed out. Afterwards, digital transcription tools and text recognition tasks will be highlighted. Highlights on digital textual scholarship will introduce the text encoding and the annotation topics. This talk ends with an overview of tools for visualizing, indexing and searching textual content.}, KEYWORDS = {historical archive, digital textual scholarship, digital humanities, digital libraries, computational philology, software engineering}, URL = {https://publications.cnr.it/doc/390293}, CONFERENCE_NAME = {International Workshop on Machine Learning and Natural Language Processing}, CONFERENCE_PLACE = {Fez, Marocco}, CONFERENCE_DATE = {24-25/11/2017}, } @INPROCEEDINGS{DELGROSSO_2017_INPROCEEDINGS_DGM_377409, AUTHOR = {Del Grosso, A. M. and Giovannetti, E. and Marchi, S.}, TITLE = {Il modello a microkernel di Omega nello sviluppo di strumenti per lo studio dei testi: dagli ADT alle API}, YEAR = {2017}, KEYWORDS = {microkernel, studio del testo, Omega, ADT, API}, PAGES = {199-205}, URL = {https://publications.cnr.it/doc/377409}, ISBN = {978-88-942535-1-1}, CONFERENCE_NAME = {AIUCD 2017 Conference}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {24-28/01/2017}, } @INPROCEEDINGS{DELGROSSO_2017_INPROCEEDINGS_DGM_377413, AUTHOR = {Del Grosso, A. M. and Giovannetti, E. and Marchi, S.}, TITLE = {Thinking like the "Modern Operating Systems": The Omega architecture and the Clavius on the Web project}, YEAR = {2017}, ABSTRACT = {The current digital turn in studying and analyzing historical documents results in both having machine actionable cultural data and providing software able to process them. However, these data and services often lack in integration strategies among them in order to be reused in other contexts different from the original ones. As pointed out by Franz Fischer in a worthy of note article: "There is no out-of-the-box software available for creating truly critical and truly digital editions at the same time" [1]. Likewise, Monica Berti stated that is now important to "build a model for representing quotations and text reuses of lost works in a digital environment" [2]. In this vision Bridget Almas is in charge of developing an integrated platform for collaboratively transcribing, editing, and translating historical documents and texts. She claimed that through this platform, called Perseids, students and scholars are able to create open source digital scholarly editions [3]. A number of interesting projects are currently under development to realize general models, digital services, and online tools that can be adopted as part of a long-term infrastructure for managing digital editions. Among Perseids and others, we cite as reference systems (a) the Textual Community project led by P. Robinson and B. Bordalejo, (b) the AustESE project led by the Australian eResearch group, (c) the Tagore Online Variorum "Bichitra" project led by Sukanta Chaudhuri, (d) Homer Multitext led by Neel Smith and Christopher Blackwell, (e) Sharing Ancient Wisdoms founded by the HERA network.}, KEYWORDS = {Omega, object-oriented design, digital scholarly editing, clavius on the web}, URL = {https://publications.cnr.it/doc/377413}, CONFERENCE_NAME = {Global Philology Open Conference}, CONFERENCE_PLACE = {Leipzig}, CONFERENCE_DATE = {20-23/02/2017}, } @INPROCEEDINGS{GOGGI_2017_INPROCEEDINGS_GPRBM_377070, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2017}, ABSTRACT = {This work will provide a map of the documentation archived in the CLARIN infrastructure, whose purpose is to share language resources produced and managed in the various European countries but finally merged into the CLARIN data centers for allowing access, interoperability, reuse and preservation of scientific documentation as well as Grey Literature.}, KEYWORDS = {CLARIN ERIC, Terminological Resources, Grey Literature}, PAGES = {60-62}, URL = {https://publications.cnr.it/doc/377070}, VOLUME = {19}, ISBN = {978-90-77484-32-6}, CONFERENCE_NAME = {Nineteenth International Conference on Grey Literature, GL19}, CONFERENCE_PLACE = {Rome, National Research Council, CNR}, CONFERENCE_DATE = {October 23-24, 2017}, BOOKTITLE = {Nineteenth International Conference on Grey Literature Public Awareness and Access to Grey Literature. Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_382175, AUTHOR = {Monachini, M.}, TITLE = {Discipline umanistiche: vantaggi, opportunità e benefici dell'Infrastruttura di Ricerca CLARIN e del nodo nazionale CLARIN-IT per la comunità italiana}, YEAR = {2017}, ABSTRACT = {L'interesse da parte delle scienze umane e sociali per le tecnologie del linguaggio non è mai stato così attuale come in questo momento storico. Le principali conferenze di Digital Humanities vedono sempre più la partecipazione di linguisti computazionali, mentre nelle conferenze di Trattamento Automatico del Linguaggio (TAL), l'applicazione di soluzioni TAL alle scienze umane e sociali costituisce una tematica che si affianca a quella delle ricadute industriali. Il bisogno di rispondere alle esigenze di una platea di utenti diversa apre nuove prospettive e offre una sfida rilevante per il settore delle tecnologie del linguaggio. I testi da trattare in ambito umanistico possono essere spesso eterogenei per genere, per periodo storico, per tipologia e nuovi tipi di analisi testuale acquistano particolare rilevanza. I software di analisi devono permettere una elaborazione automatica affidabile di tipologie di dati diversi da quelli che comunemente vengono usati nel TAL. La qualità delle risorse, in particolare la qualità dei vari livelli di annotazione acquista maggiore importanza quando queste devono essere usate per fare ricerca. Diventa cruciale sviluppare strumenti facilmente usabili e adattabili a diverse tipologie di contenuto e fornire soluzioni volte facilitare il reperimento e la condivisione di risorse e di tecnologie. E' proprio per rispondere a queste esigenze e per far incontrare chi produce e sviluppa risorse e tecnologie linguistiche con chi le usa, che è stata creata CLARIN (Common Language Resources Infrastructure for Social Sciences and Humanities), l'infrastruttura di ricerca europea per le risorse linguistiche al servizio delle scienze umane e sociali. CLARIN favorisce lo sviluppo di soluzioni tecnologiche volte a rendere le risorse e le tecnologie linguistiche visibili e disponibili per studiosi, ricercatori, studenti e cittadini, attraverso una modalità unificata e standardizzata di accesso. Tale innovazione consente di adottare nuovi e diversi approcci alla disciplina tradizionale determinando, in prospettiva, nuove consuetudini di studio che, sulla base delle buone pratiche lasciate in eredità dalla tradizione precedente, permettono lo sviluppo di una diversa e più attuale metodologia di ricerca e di prassi didattica.}, KEYWORDS = {Digital Humanities, CLARIN-IT}, URL = {https://apps.unive.it/server/eventi/13818/master%202017-2018%2011-2017-1.pdf}, CONFERENCE_NAME = {Università Ca' Foscari. Cerimonia conclusiva Master Digital Humanities}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {3/11/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_382188, AUTHOR = {Monachini, M.}, TITLE = {Infrastrutture di Ricerca e Studi Classici. CLARIN-IT: opportunità e prospettive}, YEAR = {2017}, ABSTRACT = {L'interesse da parte delle scienze umane e sociali per le tecnologie del linguaggio non è mai stato così attuale come in questo momento storico. Le principali conferenze di Digital Humanities vedono sempre più la partecipazione di linguisti computazionali, mentre nelle conferenze di Trattamento Automatico del Linguaggio (TAL), l'applicazione di soluzioni TAL alle scienze umane e sociali costituisce una tematica che si affianca a quella delle ricadute industriali. Il bisogno di rispondere alle esigenze di una platea di utenti diversa apre nuove prospettive e offre una sfida rilevante per il settore delle tecnologie del linguaggio. I testi da trattare in ambito umanistico possono essere spesso eterogenei per genere, per periodo storico, per tipologia e nuovi tipi di analisi testuale acquistano particolare rilevanza. I software di analisi devono permettere una elaborazione automatica affidabile di tipologie di dati diversi da quelli che comunemente vengono usati nel TAL. La qualità delle risorse, in particolare la qualità dei vari livelli di annotazione acquista maggiore importanza quando queste devono essere usate per fare ricerca. Diventa cruciale sviluppare strumenti facilmente usabili e adattabili a diverse tipologie di contenuto e fornire soluzioni volte facilitare il reperimento e la condivisione di risorse e di tecnologie. E' proprio per rispondere a queste esigenze e per far incontrare chi produce e sviluppa risorse e tecnologie linguistiche con chi le usa, che è stata creata CLARIN (Common Language Resources Infrastructure for Social Sciences and Humanities), l'infrastruttura di ricerca europea per le risorse linguistiche al servizio delle scienze umane e sociali. CLARIN favorisce lo sviluppo di soluzioni tecnologiche volte a rendere le risorse e le tecnologie linguistiche visibili e disponibili per studiosi, ricercatori, studenti e cittadini, attraverso una modalità unificata e standardizzata di accesso. Tale innovazione consente di adottare nuovi e diversi approcci alla disciplina tradizionale determinando, in prospettiva, nuove consuetudini di studio che, sulla base delle buone pratiche lasciate in eredità dalla tradizione precedente, permettono lo sviluppo di una diversa e più attuale metodologia di ricerca e di prassi didattica.}, KEYWORDS = {Digital Humanities, CLARIN-IT}, URL = {http://www.clarin-it.it/sites/default/files/documents/UniParma_Workshop_2017_Locandina.pdf}, CONFERENCE_NAME = {DIGITAL HUMANITIES E FILOLOGIA GRECA: risorse e infrastrutture di ricerca applicate allo studio del greco antico}, CONFERENCE_PLACE = {Parma}, CONFERENCE_DATE = {20/11/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_382191, AUTHOR = {Monachini, M.}, TITLE = {Nuove tecnologie e nuovi sviluppi di indagine: CLARIN-IT e alcuni esempi di applicazione allo studio del greco antico}, YEAR = {2017}, ABSTRACT = {l lavoro tradizionale del filologo necessita oggi di una disponibilità sempre più ampia di dati e di testi (letteratura secondaria, bibliografia specifica, fonti primarie), il lavoro del singolo studioso sembra oramai accompagnarsi alla necessità di un team di ricerca che collabori su progetti di ampia scale, quali le edizioni dei testi. Molte delle informazioni indispensabili per il filologo sono oggi (o potrebbero essere) disponibili e maggiormente accessibili grazie all'utilizzo di strumenti informatici, ma spesso si tratta di materiali dispersi e poco connessi tra loro; talora la loro esistenza è persino ignota agli studiosi tradizionali. 2 Il trend dei dati che si registra nella disciplina, grazie alla diffusione del web, con la circolazione di risorse utili per l'analisi e la ricostruzione del testo, fa ripensare al rapporto tra filologia - in ogni suo aspetto - e nuove tecnologie e lascia ampio spazio alle riflessioni metodologiche sui procedimenti d'indagine. Si tratta di far dialogare questi dati e implementarli. Il primo passo riguarda l'individuazione delle opportunità offerte dal settore delle DH in relazione a ogni singola disciplina nella sua specificità e, d'altro lato definire quali siano le esigenze di ciascun singolo settore. Nel fare questo è necessario mantenere alto lo standard sia dello strumento sia del tipo di dati inseriti. Come tratta il testo lo studioso affiancato dall'ausilio delle nuove tecnologie? Cosa trova online? Che bisogni emergono nelle pratiche di uso odierne? Si tenterà di dare una risposta a queste domande con esempi pratici di metodo applicato allo studio - ad esempio - di un autore specifico. Nel contesto verranno inoltre presentate le attività sino ad ora svolte dal gruppo di ricerca. 1) Tramite Survey si sono identificati la pratica d'uso oggi, e i punti di forza e mancanze degli strumenti esistenti. 2) Si sono così definite le esigenze di una comunità specifica e le relative richieste e aspettative. 3) Si è definito un prototipo di strumento che risponda alle esigenze individuate, ora in fase di valutazione. 4) realizzazione, in prospettiva, di uno strumento che possa offrire una piattaforma collaborativa che metta a disposizione i dati (testo, apparato, commento, analisi a diversi livelli, etc.), variamente fruibili, assieme alla possibilità di accedere facilmente a tutti i dati relativi disponibili in rete.}, KEYWORDS = {Digital Humanities, Computational Philology}, URL = {http://www.clarin-it.it/sites/default/files/documents/UniParma_Workshop_2017_Locandina.pdf}, CONFERENCE_NAME = {2° Workshop di Studio Insegnamenti di Storia della Lingua Greca (LT) e Filologia Greca (LM)}, CONFERENCE_PLACE = {Parma}, CONFERENCE_DATE = {1/12/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_429407, AUTHOR = {Monachini, M.}, TITLE = {Digital Humanities and Research Infrastructures: CLARIN and CLARIN-IT}, YEAR = {2017}, ABSTRACT = {La lezione al Corso "Digital Humanities: Web Resources, Tools and Infrastructures" Venice International University (a.a. 2017-2018) ha lo scopo di dimostrare i vantaggi, i benefici e le opportunità offerte da una infrastruttura di ricerca come CLARIN-ERIC per rispondere ai quesiti di ricerca e le sfide nel settore delle Digital Humanities.}, KEYWORDS = {digital humanities, research infrastructures, data deluge}, URL = {https://publications.cnr.it/doc/429407}, CONFERENCE_NAME = {Course "Digital Humanities: Web Resources, Tools and Infrastructures" Venice International University}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {4/12/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_MNS_375982, AUTHOR = {Monachini, M. and Nicolosi, A. and Stefanini, A.}, TITLE = {Digital Classics: A Survey of the Needs of Ancient Greek Scholars in Italy}, YEAR = {2017}, ABSTRACT = {This paper presents and discusses the findings of a survey carried out in order to assess the use of digital resources and digital technologies with respect to work in Ancient Greek scholarship, as well as to identify the factors that are likely to constrain its use and to elicit needs and requirements of Ancient Greek scholars in Italy. The survey is in line with the principles behind the recent user engagement strategy developed by CLARIN-ERIC and constitutes one of the national efforts undertaken by CLARIN-IT to contribute to the wider impact of CLARIN on Digital Classicists.}, KEYWORDS = {CLARIN-ERIC, CLARIN-IT, CLARIN on Digital Classicists}, PAGES = {5}, URL = {https://www.clarin.eu/event/2017/clarin-annual-conference-2017-budapest-hungary}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {18-20 September, 2017}, } @INPROCEEDINGS{NICOLAS_2017_INPROCEEDINGS_NKMDCAEBQ_375984, AUTHOR = {Nicolas, L. and Konig, A. and Monachini, M. and Del Gratta, R. and Calamai, S. and Abel, A. and Enea, A. and Biliotti, F. and Quochi, V.}, TITLE = {CLARIN-IT: State of Affairs, Challenges and Opportunities}, YEAR = {2017}, ABSTRACT = {This paper provides an overview on the Italian national CLARIN consortium and the status of CLARIN-IT in general. It thus discusses the current state of affairs of the consortium and provi-des information on the members, especially with regards to what they offer to CLARIN in terms of resources, services and expertise, and what CLARIN offers them to further their own research.}, KEYWORDS = {Italian CLARIN consortium, CLARIN-IT}, PAGES = {4}, URL = {https://www.clarin.eu/event/2017/clarin-annual-conference-2017-budapest-hungary}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {18-20 September, 2017}, } @INPROCEEDINGS{PARDELLI_2017_INPROCEEDINGS_PGBD_366597, AUTHOR = {Pardelli, G. and Giannini, S. and Boschetti, F. and Del Gratta, R.}, TITLE = {AIUCD e CLiC-it: citazioni bibliografiche a confronto}, YEAR = {2017}, ABSTRACT = {Il lavoro propone l'analisi e il confronto dei riferimenti bibliografici delle cinque edizioni annuali della Conferenza dell'Associazione per l'Informatica Umanistica e la Cultura Digitale (AIUCD) e del primo biennio (2014-2015) della Conferenza Italiana di Linguistica Computazionale (CLiC-it) per misurare la direzione in cui si muove il trend citazionale.. L'analisi muove dal principio di rilevanza della citazione nella trasmissione della conoscenza in un periodo di grandi cambiamenti socioculturali e di importanti evoluzioni nelle modalità di produzione e diffusione dei risultati della ricerca scientifica. Lo scopo dello studio è quello di misurare la gamma delle risorse citate in questa area del sapere mediante l'uso di indicatori volti a comprendere la loro ampiezza, l'estensione temporale, la varietà, le relazioni con il mondo editoriale e i modelli di riferimento. L'osservazione dei risultati consente di classificare i documenti citati, di descriverne le caratteristiche e di valutare eventuali trasformazioni rispetto alle modalità di citazione tradizionali. -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- The paper suggests the analysis of the bibliographic references - enclosed in the proceedings of the Italian Conference on Computational Linguistics - CLICit in 2014-2015 biennium and of five AIUCD Conference editions 2012-2016 . The analysis moves from the principle of relevance of citation in the transmission of knowledge in a period of great socio-cultural changes and important developments in the production and dissemination of the results in the scientific research. The purpose of the study is to measure the range of resources cited in this area of knowledge by the use of indicators aimed in understanding their wideness, the time extension, the variety, the relations with the publishing world and the reference models. The observation of the results allows to classify the cited document, to describe its characteristics and to assess any changes compared to the traditional citation mode.}, KEYWORDS = {Corpus bibliografico, Analisi di metadati}, PAGES = {38-50}, URL = {http://aiucd2017.aiucd.it/wp-content/uploads/2017/01/book-of-abstract-AIUCD-2017.pdf}, CONFERENCE_NAME = {AIUCD 2017 Conference \& 3rd EADH Day}, CONFERENCE_PLACE = {Roma, Università "Sapienza"}, CONFERENCE_DATE = {24-28 January 2017}, BOOKTITLE = {AIUCD 2017 Conference}, } @INPROCEEDINGS{PICCINI_2017_INPROCEEDINGS_PMG_378393, AUTHOR = {Piccini, S. and Marchi, S. and Giovannetti, E.}, TITLE = {Étudier le structuralisme par le structuralisme: expériences de sémantique distributionnelle dans la construction d'un lexique électronique de la terminologie saussurienne}, YEAR = {2017}, ABSTRACT = {En 2010-2011, le premier lexique électronique dédié à la terminologie linguistique saussurienne a été créé [1] dans le cadre d'un projet de recherche intitulé « Pour une édition numérique des manuscrits de Ferdinand de Saussure », projet coordonné par le Professeur Gambarara. La première étape de construction de la ressource lexicale a consisté en l'identification manuelle dans les textes des termes clés du vocabulaire saussurien et de leurs propriétés sémantiques. L'informatique n'est intervenue que dans la phase de formalisation des données extraites. C'est pourquoi nous nous proposons d'illustrer ici la possibilité de recourir à des techniques automatiques et, en particulier, à des algorithmes de sémantique distributionnelle [2] pour identifier les relations que les termes entretiennent entre eux dans le texte. La méthodologie sous-jacente est basée sur l'hypothèse distributionnelle selon laquelle plus deux mots sont sémantiquement proches, plus ils ont tendance à se produire dans des contextes similaires. Le lexique d'un texte est considéré comme un espace métrique où chaque mot peut être représenté comme un vecteur à n dimensions, chacune d'elles enregistrant le nombre de fois que ce mot apparaît dans un contexte donné. La proximité spatiale entre deux vecteurs indique la similarité sémantique entre deux mots. Elle est calculée par le cosinus de l'angle compris entre les deux vecteurs : plus la valeur du cosinus est grande, plus les termes sont, en principe, sémantiquement similaires. Les techniques computationnelles ont été appliquées aux mêmes textes à partir desquels le lexique électronique a été construit : le Cours de linguistique générale [3], les Écrits de linguistique générale [4] et le Recueil des publications scientifiques [5]. Bien qu'au stade préliminaire, l'expérience a permis d'obtenir des résultats intéressants. À titre d'exemple, nous présentons ci-dessous (Tableau) les valeurs de similitude obtenues par l'algorithme entre le terme signe et d'autres mots dans les textes. Si l'on compare les résultats avec l'entrée du lexique signe, on peut remarquer que l'algorithme est en mesure de détecter un grand nombre de liens explicités dans la ressource et de suggérer, en outre, des relations possibles avec d'autres termes comme valeur, rapport, idée. L'application de ces techniques computationnelles au corpus saussurien peut donc constituer une aide précieuse non seulement pour les lexicographes mais également pour les experts du domaine en faisant émerger des connections qui n'apparaissent pas immédiatement de manière explicite et en suggérant ainsi des parcours alternatifs d'analyse de la pensée de l'auteur.}, KEYWORDS = {structuralisme, sémantique distributionnelle, terminologie saussurienne, lexique électronique}, URL = {https://publications.cnr.it/doc/378393}, CONFERENCE_NAME = {Atelier "Les manuscrits de Saussure, parmi d'autres. Problèmes, stratégies et solutions d'édition pour les archives numériques"}, CONFERENCE_PLACE = {Geneve}, CONFERENCE_DATE = {09-14/01/2017}, } @INPROCEEDINGS{PIRRELLI_2017_INPROCEEDINGS_P_381136, AUTHOR = {Pirrelli, V.}, TITLE = {Storage vs. Processing in Models of Word Inflection. A Neuro-computational Hebbian Perspective}, YEAR = {2017}, ABSTRACT = {The advent of connectionism in the 80's popularised the idea that the lexical processor consists of a network of parallel processing units selectively firing in response to sensory stimuli. In the light of these assumptions, the most important contribution of connectionism to the theoretical debate on lexical modelling at the time was the utter rejection of the widely accepted idea that word recognition and production require a dichotomous choice between storage and processing. However, in spite of the prima facie psycho-computational allure of this view of the lexicon, early connectionist models also embraced a number of unsatisfactory assumptions about word learning and processing. More recently, a growing number of approaches to inflection in both Psycholinguistics and Theoretical Linguistics developed the view that surface word relations represent a fundamental domain of morphological competence. Learning the morphology of a language amounts to acquiring relations between fully stored lexical forms, which are concurrently available in the speaker's mental lexicon and jointly facilitate processing of morphologically related forms through patterns of emergent self-organisation. This novel view presupposes an integrative language architecture, where storage and processing, far from being conceived of as insulated and poorly interacting modules, are the short-term and the long-term dynamics of the same underlying process of adaptive specialisation of synaptic connections. This view, upheld by recent evidence of the neuro-anatomical bases of short-term and long-term memory processes, crucially hinges on Hebbian principles of synaptic plasticity, which are, in turn, in keeping with mathematical models of discriminative learning. I contend that integrative computer models of Hebbian language learning represent an exciting way forward in current neuro-computational research on word processing, and a persistently fertile legacy of the connectionist revolution.}, KEYWORDS = {Hebbian Learning, Recurrent Neural Networks, Word Inflection}, PAGES = {19-19}, URL = {https://indico.sissa.it/event/12/abstract-book.pdf}, CONFERENCE_NAME = {International Morphological Processing Conference (MoProc)}, CONFERENCE_PLACE = {Trieste}, CONFERENCE_DATE = {22-24/06/2017}, } @INPROCEEDINGS{PIRRELLI_2017_INPROCEEDINGS_PMFC_381117, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M. and Cardillo, F. A.}, TITLE = {Paradigm Relative Entropy and Discriminative Learning}, YEAR = {2017}, ABSTRACT = {In the present contribution, we show that principles of discriminative learning of symbolic time series go a long way in accounting for these effects, thus making an important contribution to our understanding of the human lexical processor and its sensitivity to word distributions both within and across paradigms.}, KEYWORDS = {Paradigm Entropy, Discriminative Learning, Mental Lexicon, Verb Inflection}, PAGES = {5}, URL = {http://w3.erss.univ-tlse2.fr/ParadigMo2017/program.html}, CONFERENCE_NAME = {ParadigMo 2017: First Workshop on Paradigmatic Word Formation Modeling}, CONFERENCE_PLACE = {Toulouse}, CONFERENCE_DATE = {19-20/06/2017}, } @INPROCEEDINGS{RUSSO_2017_INPROCEEDINGS_RS_382094, AUTHOR = {Russo, I. and Soria, C.}, TITLE = {Digital Language Diversity on New Media: the DLDP Survey about European Minority Languages Speakers}, YEAR = {2017}, ABSTRACT = {How does the linguistic diversity of Europe reflect in the New Media? Do regional and minority languages contribute to EU digital language diversity? In this paper we will present the results of the first survey about actual needs of European minority languages speakers regarding digital opportunities. The survey is part of the work carried out by the Digital Language Diversity Project (DLDP), a three-year Erasmus+ project started in September 2015. The goal of DLDP is helping minority languages speakers in the acquisition of intellectual and practical skills to create, share, and reuse online digital content, at the same time defining general guidelines and best practices for the promotion of minority languages with poor digital representation, a fact that further prevents their usability on digital media and devices. The focus of the project is on four European minority languages at different stages of digital developments (Basque, Breton, Karelian and Sardinian), and this will enable a comparison about the role of the availability of digital content for promotion of digital usage of these languages and development of language-based digital applications. With the aim of understanding the specific needs and the peculiar behaviours of speakers of these languages, during Spring 2016 we conducted a survey focused on gathering information about their personal digital use of the language and about any known digital resource and services that make use of the language. We received feedback from almost 2000 speakers and we are now in the position of analysing results for future actions. In particular, taking into account media user typology elaborated by Brandtzæg (2010) (e.g. entertainment, instrumental and advanced users) we aim to profile speakers' answers according to these different classes, in order to better understand how to make speakers aware of the opportunities new media offer for preservation and revitalisation of minority languages.}, KEYWORDS = {minority languages, regional languages, new media, digital language diversity, digital language development}, URL = {https://minoritylanguagesnewmedia2017.files.wordpress.com/2017/03/final_abstracts-baal-cup-seminar-on-minority-languages-in-new-media.pdf}, CONFERENCE_NAME = {BAAL-Cambridge University Press Seminar on Minority Languages in New Media}, CONFERENCE_DATE = {27-28/4/2017}, } @INPROCEEDINGS{SASSOLINI_2017_INPROCEEDINGS_SC_382418, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {Approcci grafici all'analisi di corpora testuali}, YEAR = {2017}, ABSTRACT = {sperimentazioni finalizzate a combinare tecniche di "distant reading" e funzionalità classiche di Information Retrieval (IR) su dati testuali. Incrementare con sintesi grafiche e visuali l'offerta di strumenti di studio e di analisi dei dati testuali rappresenta una nuova frontiera del nostro ambito di ricerca consueto.}, KEYWORDS = {analisi testuale, distant reading, visual analytics}, PAGES = {83-86}, URL = {http://aiucd2017.aiucd.it/wp-content/uploads/2017/01/book-of-abstract-AIUCD-2017.pdf}, CONFERENCE_NAME = {AIUCD 2017 Conference}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {24-28/01/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382071, AUTHOR = {Soria, C.}, TITLE = {The digital language vitality scale: a model for assessing digital vitality of languages}, YEAR = {2017}, ABSTRACT = {In this paper, we present the Digital Language Vitality Scale, a tool for measuring the degree of digital vitality of languages. Digital vitality can be defined as the extent to which a language is present, used and usable over the Internet through digital devices. The scale is inspired to ethnolinguistic vitality assessment (such as GIDS, Fishman 2001), updated by (Lewis and Simons 2010) as EGIDS, and the UNESCO "nine factors" (UNESCO 2003), and is based on previous work in this area such as (Kornai 2013) and (Gibson 2015). Seven levels of digital vitality are identified, from "pre-digital" to "digitally thriving", and a set of associated indicators. The indicators associated with the scale are proxies representing both digital representation (presence) of a language and digital use. They are clustered into three groups: a first group of indicators refers to digital usability of a language, for instance, the existence of Internet connection or the availability of standardised fonts for writing the language. A second group of indicators is related to the quality and amount of digital use of a language: if and how much a language is used for texting and emailing, on websites, blogs, if there are e-books, Wikipedias, if the language is used on social media. The last group of indicators correlates with the digital prestige of a language; they are a sign of a language that not only is used on digital media and devices, but it is so in a full-fledged way, enjoying the widest possible ranges of uses and applications (e.g. localised digital services, machine translation, edu-tainment products and services). The scale is currently being used in the context of the DLDP project (http://www.dldp.eu) as an assessing instrument for digital language planning, with particular reference to regional and minority languages.}, KEYWORDS = {digital vitality, language vitality, digital language diversity}, PAGES = {100-100}, URL = {https://icriml.indiana.edu/conference-program/Abstractbook.pdf}, CONFERENCE_NAME = {First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_PLACE = {Barcelona/Vic}, CONFERENCE_DATE = {19-21/04/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382081, AUTHOR = {Soria, C.}, TITLE = {Language policies and speakers' attitudes: evaluating the impact of official recognition on some of Italy's regional languages}, YEAR = {2017}, ABSTRACT = {The panel focuses on the minority (some of them highly endangered) languages of Italy, with a special attention to those which are not recognized (nor supported) by the Italian Government. Key points will be a. the official language policy of Italy, b. language discrimination, c. language ideology and d. the ambiguous role of academic institutions vis-à-vis languages and dialects, e. the effects (and results) of official support for recognized minority languages, as well as f. grassroots approaches to the standardization and development of unrecognized languages and new developments on the net.}, KEYWORDS = {minority languages, multilingualism, language policy}, PAGES = {42-42}, URL = {https://icriml.indiana.edu/conference-program/Abstractbook.pdf}, CONFERENCE_NAME = {First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_DATE = {19-21/04/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382083, AUTHOR = {Soria, C.}, TITLE = {Inquiring current digital use and usability of regional and minority languages: the DLDP survey}, YEAR = {2017}, ABSTRACT = {The Digital Language Diversity Project is a three-year project funded under the Erasmus+ programme that addresses the problem of low digital representation and use of EU regional and minority languages, a cause for their endangerment according to some scholars. One of the first actions of the project is to assess the current use and usability of four EU regional/minority languages, representing very different degrees of digital language representation and use: these languages are Sardinian, Karelian, Basque and Breton. From June to September 2016, the DLDP project has been spreading a survey, that was localized and translated into these languages. The survey is developed on the basis of previous work carried out in the area of ethnolinguistic vitality, such as the ELDIA Barometer, and other inquiries addressing specifically digital use of languages and availability and usability of digital resources and media. The DLDP survey consists of a general part collecting basic information on the informant (age, sex, proficiency level in the language, frequency of use, etc.). The second part is focused on gathering information about his/her personal digital use of the language and about any known digital resource and services that make use of the language. The survey is the first ever study of the digital needs of minority language speakers. It will give stakeholders and academia a detailed view into what actual language speakers are thinking about in terms of how they want to develop provision for their languages in the digital sphere. Therefore, we strongly encourage wide adoption and dissemination of the survey to regional and minority languages beyond the four investigated. The workshop intends to illustrate and discuss the model survey, to share it with researchers interested in adopting it for other languages, and to discuss collaboration paths.}, KEYWORDS = {digital language use, regional languages, minority languages, digital language vitality}, PAGES = {44-44}, URL = {https://icriml.indiana.edu/conference-program/Abstractbook.pdf}, CONFERENCE_NAME = {First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_DATE = {19-21/04/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382086, AUTHOR = {Soria, C.}, TITLE = {Alliances for digital linguistic diversity}, YEAR = {2017}, ABSTRACT = {Linguapax proposes a complementary pair of Roundtable discussions in the Conference's thematic line 1: The value of linguistic diversity, from an operational perspective. This "diptych" will present different types of alliances that create contexts for the preservation and continuation of linguistic diversity (RT 1), and will show how a plural perspective on linguistic diversity can emerge, drawing on Linguapax's experience as an international network (RT 2). In the first Roundtable discussion we will tackle the generation of different contexts of interaction (networks and alliances) to enhance the presence and vitality of linguistic diversity - those contexts emerging from linguistic, cultural and digital networks (although should be - in theory- mostly overlapping, an overview of their specific potentialities is useful and needed): 1. "Linguistic cooperation": International networks of projects/exchange of experiences in language revitalisation. Andoni Barreña (Garabide Elkartea, Basque Country); 2. Alliances for digital linguistic diversity. Claudia Soria. Consiglio Nazionale delle Ricerche. Pisa. 3. International cultural cooperation. Although "cultural cooperation" should be the common umbrella, it is usually driven by a specific logic in which linguistic diversity rarely comes consistently into play. On this occasion for dialogue, the Round Table will expose linguists/activists to a potentially useful vocabulary and world of networks and platforms. Jordi Pascual, expert on international cultural relations. This Roundtable aims to harness the relational potential of the Conference: On one hand, putting into dialogue two contributions already planning individual participation (1, 2), and on the other, inviting an 74 external perspective to crossfertilise the debate and increase the scope for networking. Given the operational approach of this Roundtable, active interaction with the audience will be a key element for achieving the desired multiplier effect. Moderator: Alícia Fuentes-Calle. Linguapax (Barcelona). Departament de Lingüística. Universitat de Barcelona.}, KEYWORDS = {linguistic diversity, digital language diversity}, URL = {https://publications.cnr.it/doc/382086}, CONFERENCE_NAME = {ROUNDTABLE DISCUSSION-Linguapax-I. Generating contexts for linguistic diversity to thrive: networks of linguistic, cultural and digital cooperation. First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_DATE = {19-21/04/2017}, } @INPROCEEDINGS{WEINGART_2017_INPROCEEDINGS_WG_377381, AUTHOR = {Weingart, A. and Giovannetti, E.}, TITLE = {From canabo to Cannabis sativa L.: Modelling Diachronic Termino-ontological Resources in the Context of DiTMAO}, YEAR = {2017}, ABSTRACT = {The paper aims at contributing to the understanding of the Medieval Brain from a knowledge engineering perspective. As the brain is conceived as locus of cognition and knowledge in medieval medicine, we want to focus on the representation of medieval medico-botanical knowledge by means of a three-level text-termino-ontological resource. The resource is based on lemon (a model for the representation of lexica as RDF) and a set of ontologies represented in OWL and consists of (i) a medieval termino-ontological resource, (ii) a modern termino-ontological resource and (iii) a documentation corpus. It is developed and implemented within the DFG-funded project "Dictionnaire de Termes Médico-botaniques de l'Ancien Occitan" (DiTMAO). In an introductory part we will briefly introduce the aims of the project and the particularities of the corpus. The corpus consists of medical monographs in Latin script but also of so-called synonym lists in Hebrew script. These lists can be described as ancient multilingual dictionaries, in our case of Old Occitan, (Judaeo-)Arabic, Hebrew, Latin or other Romance languages and sometimes Greek and Aramaic, and they are of particular importance because the equivalent terms in other ancient languages help to determine the meaning of otherwise opaque Old Occitan terms. After introducing the three components of the resource, given some examples from our corpus, we elaborate, in the main part of paper, a solution to the problem of representing the relation between medieval medical terms or concepts and their ancient and modern correspondences. The problem concerns in particular the process of determining the terms´ meaning through the documentation of each term in corpus-external dictionaries and editions, and how this process can be represented in a comprehensive and transparent way. We propose to relate the terms of the medieval termino-ontological resource at a lexical level using relations such as synonymy or sublemma. The connections between modern and medieval terms, like a translation into modern English, will be mediated by the documentation corpus. As for modern scientific terms, we opt for an ontological connection. These types of connections will be exemplified with (mainly) plant names from our corpus. For example, a medieval term has a referent in the medieval ontology, structuring the botanic world as conceived by a "medieval brain" by giving the medieval classifications e.g. the primary qualities. This ontological entity is related to a referent of a modern scientific name, given that the documentation of the medieval term provides such information. We will argue that a clear separation of datasets (medieval and modern) allows for the diachronic study of the evolution of terminology and, more importantly for this context, paves the way for the analysis of the changes in the cognitive representation of what those terms actually refer to.}, KEYWORDS = {lexica, multi-language lexica, termino-ontological resource, ancient occitan}, URL = {https://themedievalbrain.wordpress.com/}, CONFERENCE_NAME = {The Medieval Brain Conference}, CONFERENCE_PLACE = {University of York}, CONFERENCE_DATE = {09-11/03/2017}, } @TECHREPORT{CARLINO_2017_TECHREPORT_C_483691, AUTHOR = {Carlino, M.}, TITLE = {Rapporto annuale 2016 del CNR-ILC}, YEAR = {2017}, ABSTRACT = {Rapporto Annuale 2016 del Cnr-Istituto di Linguistica Computazionale "Antonio Zampolli" (CNR-ILC)}, KEYWORDS = {CNR-ILC, ILC, Annual Report, Rapporto Annuale, Istituto di Linguistica Computazionale, Zampolli, Activity report}, PAGES = {1-54}, URL = {https://publications.cnr.it/doc/483691}, } @TECHREPORT{CININI_2017_TECHREPORT_CCS_382931, AUTHOR = {Cinini, A. and Cucurullo, S. and Sassolini, E.}, TITLE = {Rapporto Tecnico: Standardizzazione del corpus testuale del PRIN Crusca}, YEAR = {2017}, ABSTRACT = {Attività previste nella convenzione operativa tra ILC-CNR e Accademia della Crusca che riguardano la progettazione e lo sviluppo di una piattaforma Web modulare per l'archiviazione, la gestione e l'interrogazione di corpora testuali in lingua italiana, con funzionalità derivate dal DBT (Data Base Testuale) nelle sue diverse implementazioni.Il lavoro preliminare svolto riguarda anche la normalizzazione dei testi e la conversione nello standard di rappresentazione XML TEI.}, KEYWORDS = {Codifica dei testi, Analisi testuale, formato XML TEI}, PAGES = {1-21}, URL = {https://publications.cnr.it/doc/382931}, } @TECHREPORT{RUSSO_2017_TECHREPORT_RS_382302, AUTHOR = {Russo, I. and Soria, C.}, TITLE = {Sardinian-a digital language?}, YEAR = {2017}, ABSTRACT = {In this report we present the results of the first survey about the actual needs of Sardinian speakers in terms of digital opportunities}, KEYWORDS = {digital use, digital language diversity, Sardinian}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Sardinian-Report.pdf}, } @TECHREPORT{SASSOLINI_2017_TECHREPORT_SC_383394, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {DIGESTO: NUOVE FUNZIONALITÀ E SITO WEB}, YEAR = {2017}, ABSTRACT = {Realizzazione di un nuovo sito web per la consultazione dei testi bilingui, con sviluppo di nuove funzionalità di ricerca, non più solo per parola ma anche per Titolo, Frammento o Paragrafo. Realizzazione di una versione PDF scaricabile di parti ragionate dell'intero corpus}, KEYWORDS = {testi paralleli, sito web, analisi testuale, visual analytics}, PAGES = {1-12}, URL = {https://publications.cnr.it/doc/383394}, } @MISC{ALBANESI_2017_MISC_ABBCDGCP_427294, AUTHOR = {Albanesi, D. and Bellandi, A. and Bulleri, F. and Carniani, E. and Dattilo, D. and Giovannetti, E. and Colombo, M. and Papini, M.}, TITLE = {Traduco}, YEAR = {2017}, ABSTRACT = {il Progetto Traduzione del Talmud Babilonese ha permesso di allestire una "officina digitale" specializzata nella traduzione di testi di particolare complessità interpretativa, quali, appunto, il Talmud Babilonese. Per la realizzazione della complessa opera di traduzione è stata costituita una équipe multidisciplinare che ha coinvolto traduttori, revisori di contenuto, redattori, curatori e grafici che, attraverso la piattaforma digitale Traduco hanno potuto lavorare congiuntamente sul testo da diversi luoghi del mondo. Traduco è uno strumento web collaborativo per la traduzione assistita di testi, per lo sviluppo del quale sono state condotte ricerche specifiche, sia nell'ambito dell'Ingegneria del Software che in quello della Linguistica Computazionale. Attraverso il lavoro di ricerca, è stato possibile mettere a punto un ambiente "intelligente" di supporto alla traduzione con caratteristiche innovative. Traduco ha consentito, come side effect positivo del processo vero e proprio di traduzione, di produrre innovazione tecnologica e scientifica.}, KEYWORDS = {traduzione collaborativa, traduzione assistita dal calcolatore, linguistica computazionale, traduco, talmud babilonese}, URL = {https://publications.cnr.it/doc/427294}, } @MISC{CARRARA_2017_MISC_CCCDFVT_429823, AUTHOR = {Carrara, F. and Cimino, A. and Cresci, S. and Dell'Orletta, F. and Falchi, F. and Vadicamo, L. and Tesconi, M.}, TITLE = {T4SA: Twitter for Sentiment Analysis}, YEAR = {2017}, ABSTRACT = {T4SA is intended for training and testing image sentiment analysis approaches. It contains little less than a million tweets, corresponding to about 1.5M images. We initially collected about 3.4M tweets corresponding to about 4M images. We classified the sentiment polarity of the texts (as described in Section 4) and we selected the tweets having the most confident textual sentiment predictions to build our Twitter for Sentiment Analysis (T4SA) dataset. The dataset is publicly available at: http://www.t4sa.it/}, KEYWORDS = {social media, sentiment analysis, image analysis, image sentiment analysis, deep learning, multimedia sentiment analysis, dataset, tweets}, URL = {http://www.t4sa.it/}, } @MISC{DELGROSSO_2017_MISC_D_390356, AUTHOR = {Del Grosso, A. M.}, TITLE = {Annotazioni collaborative di testi storici}, YEAR = {2017}, ABSTRACT = {Con l'avvento delle tecnologie e degli strumenti digitali, l'annotazione di testi (storici) è divenuta ancora più importante ed essenziale nel lavoro di strutturazione, di analisi e di comprensione dei fenomeni testuali. Inoltre, considerando gli aspetti più filologici, la realizzazione di una edizione digitale semanticamente connotata determina il bisogno di arricchire il testo con informazioni di varia granularità, di varia natura e di vario tipo. Questo intervento presenterà un approccio all'annotazione che tragga vantaggio dal Web semantico e dalle sue tecnologie. Le risorse testuali verranno caratterizzate da una forte interconnessione con risorse esterne favorendo di fatto la costruzione e l'interrogazione di un'unica banca dati globale, condivisa e formalmente descritta. In concreto, verrà presentato come scrivere annotazioni in RDF, utilizzando strumenti di annotazione disponibili sul web: a) il Text Encoder and Annotator (TEA); b) Annotarium. Il primo orientato alla trascrizione e all'annotazione delle risorse testuali, il secondo orientato alla gestione e alla interrogazione full-text e concettuale delle risorse annotate.}, KEYWORDS = {Digital Humanities, tools, software engineering, semantic web, digital edition}, URL = {https://digitaltools.labcd.unipi.it/past-editions/resources2017/#DelGrosso}, } @MISC{DELGROSSO_2017_MISC_D_390782, AUTHOR = {Del Grosso, A. M.}, TITLE = {Modelli Concettuali e Architetture orientate agli oggetti per la Progettazione e lo Sviluppo di una Digital Scholarly Platform}, YEAR = {2017}, ABSTRACT = {L'intervento affronta temi legati alla progettazione concettuale e alla definizione di architetture software object-oriented per lo sviluppo di una Digital Scholarly Platform}, KEYWORDS = {digital philology, computational philology, software engineering}, URL = {https://publications.cnr.it/doc/390782}, } @MISC{DELGROSSO_2017_MISC_D_390783, AUTHOR = {Del Grosso, A. M.}, TITLE = {Tools for Digital Textual Scholarship}, YEAR = {2017}, ABSTRACT = {la presentazione illustra le architetture software per la realizzazione di biblioteche digitali e archivi digitali, con particolare riguardo agli aspetti di studio filologico del testo}, KEYWORDS = {computational philology, digital philology, digital humanities}, URL = {https://publications.cnr.it/doc/390783}, } @MISC{DELGROSSO_2017_MISC_D_390784, AUTHOR = {Del Grosso, A. M.}, TITLE = {Strumenti software per lo studio e l'analisi di risorse testuali}, YEAR = {2017}, ABSTRACT = {Il contributo introduce i principali strumenti digitali per l'analisi dei testi letterari. Inoltre saranno illustrati gli strumenti sviluppati dal gruppo di Literary Computing dell'Istituto}, KEYWORDS = {digital humanities, digital philology, software, software engineering}, URL = {https://publications.cnr.it/doc/390784}, } @MISC{DELGROSSO_2017_MISC_D_484913, AUTHOR = {Del Grosso, A. M.}, TITLE = {Introduzione alle tecnologie digitali per la redazione e la pubblicazione di contenuti Web}, YEAR = {2017}, ABSTRACT = {Introduzione alle tecnologie digitali per la redazione e la pubblicazione di contenuti Web}, KEYWORDS = {tecnologie web, html, css}, URL = {https://publications.cnr.it/doc/484913}, } @MISC{DELGROSSO_2017_MISC_DM_390360, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Clavius on The Web search framework}, YEAR = {2017}, ABSTRACT = {Search engine and restful API developed within the Clavius On the Web project.}, KEYWORDS = {digital humanities, computational philology, software engineering, search engine}, URL = {https://github.com/literarycomputinglab/ClaviusSearch}, } @MISC{GIOVANNETTI_2017_MISC_GD_390396, AUTHOR = {Giovannetti, E. and Del Grosso, A. M.}, TITLE = {LicoLab@LabexObvil}, YEAR = {2017}, ABSTRACT = {This talk illustrates some aspects of our research activities, specifically some outcomes carried out during designing and developing a digital environment for textual scholarship.}, KEYWORDS = {digital humanities, computational philology, software engineering}, URL = {https://publications.cnr.it/doc/390396}, } @MISC{ZAMORANI_2017_MISC_Z_383456, AUTHOR = {Zamorani, N.}, TITLE = {Featured Linguist: Nicoletta Calzolari}, YEAR = {2017}, ABSTRACT = {The LINGUIST List Official LINGUIST List Blog: Featured Linguist: Nicoletta Calzolari Posted on April 7, 2017 by Clare Harshey We are proud to share with our readers the next featured linguist of our 2017 Fund Drive: Nicoletta Calzolari. We hope that you enjoy reading Dr. Calzolari's thoughts on her long and varied career as a computational linguist.}, KEYWORDS = {Computational Linguistics, Nicoletta Calzolari}, PAGES = {11}, URL = {https://blog.linguistlist.org/fund-drive/featured-linguist-nicoletta-calzolari/}, } @ARTICLE{BELLANDI_2016_ARTICLE_BABG_364945, AUTHOR = {Bellandi, A. and Albanesi, D. and Benotto, G. and Giovannetti, E.}, TITLE = {Il Sistema Traduco nel Progetto Traduzione del Talmud Babilonese}, YEAR = {2016}, ABSTRACT = {Nell'ambito del Progetto Traduzione del Talmud Babilonese, l'Istituto di Linguistica Computazionale del CNR ha sviluppato Traduco, uno strumento web collaborativo con alcune caratteristiche che lo rendono particolarmente adatto alla traduzione di testi che pongono problemi interpretativi. Ad oggi, gli strumenti per la traduzione assistita (in inglese, Computer-Assisted Translation, o CAT) sono utilizzati tipicamente per la traduzione di manuali tecnici, testi legislativi o siti Web e hanno principalmente lo scopo di accelerare il processo di traduzione. Traduco riprende la maggior parte dei componenti standard di uno strumento di traduzione assistita tradizionale, ma li estende con caratteristiche specifiche necessarie per supportare l'interpretazione e la traduzione di testi complessi che pongono particolari problemi di comprensione. In questo articolo presenteremo un caso di studio specifico, relativo a un testo con queste caratteristiche: il Talmud Babilonese. Traduco include funzionalità per l'aggiunta di note, riferimenti bibliografici, annotazioni semantiche e creazione di glossari. Traduttori, revisori, redattori, supervisori e utenti finali che accedono al Sistema sono supportati nell'intero processo di traduzione, che va dall'interpretazione del testo originario alla fase ditoriale per la stampa delle traduzioni, attraverso l'uso di tecnologie di traduzione assistita, l'annotazione semantica del testo, l'arricchimento delle traduzioni con informazioni esplicative, l'esportazione delle traduzioni in XML e in TEI e l'integrazione di tecniche per il trattamento automatico della lingua. La progettazione e lo sviluppo di Traduco ha richiesto l'adozione di un approccio multidisciplinare che combina aspetti di ingegneria del software, linguistica computazionale, ingegneria della conoscenza ed editoria digitale.}, KEYWORDS = {traduco, traduzione-assistita, talmud}, PAGES = {109-126}, URL = {https://journals.openedition.org/ijcol/404}, VOLUME = {2}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{BOSCHETTI_2016_ARTICLE_BB_382026, AUTHOR = {Boschetti, F. and Buzzoni, M.}, TITLE = {Cronache-Edizioni digitali: Rappresentazione, Interoperabilità, Analisi del testo e Infrastrutture (Venezia, 7-9 settembre 2016)}, YEAR = {2016}, ABSTRACT = {Cronaca del quinto convegno annuale dell'Associazione di Informatica Umanistica e Cultura Digitale (AIUCD), che si è svolto dal 7 al 9 settembre 2016 presso l'Aula Magna di Ca' Dolfin dell'Università Ca' Foscari di Venezia e ha avuto per tema: "Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture".}, KEYWORDS = {convegno AIUCD}, PAGES = {269-274}, URL = {http://bit.ly/2CW0BOV}, VOLUME = {13}, PUBLISHER = {Carrocci (Roma, Italia)}, ISSN = {1825-5361}, JOURNAL = {Ecdotica (Roma)}, } @ARTICLE{BRUNATO_2016_ARTICLE_BD_366755, AUTHOR = {Brunato, D. and Dell'Orletta, F.}, TITLE = {ISACCO: a corpus for investigating spoken and written language development in Italian school-age children}, YEAR = {2016}, ABSTRACT = {In this paper we present ISACCO (Italian School-Age Children COrpus), a corpus of oral and written retellings of Italian-speaking children attending primary school. All texts were digitalized and automatically enriched with multi-level linguistic annotation. Preliminary explorations of both the form and the content of children's productions were carried out based on a set of features automatically extracted by NLP tools. Written retellings were manually annotated with a typology of errors belonging to three different linguistic levels. The resource, which has been made publicly available1, is conceived to support research and computational modeling of "later language acquisition", with an emphasis on comparative assessment of the evolution of oral and written language competencies in early school grades.}, KEYWORDS = {Child language acquisition, Oral and Written language, multi-level linguistic analysis}, PAGES = {63-76}, URL = {http://www.italianlp.it/wp-content/uploads/2016/09/04_brunato_dell-orletta.pdf}, VOLUME = {2}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{BRUNATO_2016_ARTICLE_BDMV_385220, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Monitoraggio linguistico di Scritture Brevi: aspetti metodologici e primi risultati}, YEAR = {2016}, ABSTRACT = {Se da un lato le tecnologie del linguaggio svolgono un ruolo ormai indiscusso per l'accesso al contenuto testuale, ciò non appare scontato quando si va a considerare il loro ruolo nella valutazione delle strutture linguistiche sottostanti al testo. Questo contributo si focalizza sulla definizione di una metodologia innovativa di monitoraggio linguistico della lingua italiana che a partire dall'output di strumenti di annotazione linguistica automatica permette di ricostruire un profilo linguistico di una collezione di testi rappresentativa di una specifica varietà d'uso della lingua. Tale metodologia è stata applicata a un corpus di tweet allo scopo di far luce su interrogativi aperti quali la possibilità di rintracciare tendenze lessicali, morfo-sintattiche e sintattiche peculiari all'interno di questa tipologia testuale; di studiare come queste tendenze si rapportino ai tratti caratterizzanti della lingua scritta e parlata; di individuare possibili differenze nella forma linguistica in cui si twittano contenuti di natura diversa.}, KEYWORDS = {Trattamento Automatico del Linguaggio, Monitoraggio Linguistico, Varietà d'Uso della Lingua, Lingua del Web}, PAGES = {149-176}, URL = {https://publications.cnr.it/doc/385220}, VOLUME = {N. S. 5}, PUBLISHER = {Università degli Studi di Napoli "L'Orientale" (Napoli, Italia)}, ISSN = {1825-2796}, JOURNAL = {Quaderni Aion}, } @ARTICLE{FRONTINI_2016_ARTICLE_FBRJJ_357604, AUTHOR = {Frontini, F. and Brando, C. and Riguet, M. and Jacquot, C. and Jolivet, V.}, TITLE = {Annotation of Toponyms in TEI Digital Literary Editions and Linking to the Web of Data}, YEAR = {2016}, ABSTRACT = {This paper aims to discuss the challenges and benefits of the annotation of place names in literary texts and literary criticism. We shall first highlight the problems of encoding spatial information in digital editions using the TEI format by means of two manual annotation experiments and the discussion of various cases. This will lead to the question of how to use existing semantic web resources to complement and en-rich toponym mark-up, in particular to provide mentions with precise geo-referencing. Finally the automatic annotation of a large corpus will show the potential of visualizing places from texts, by illustrating an analysis of the evolution of literary life from the spatial and geographical point of view.}, KEYWORDS = {digital literary studies toponyms semantic web geographic databases maps and visualizations}, PAGES = {49-75}, URL = {http://dx.doi.org/10.14195/2182-8830_4-2_3}, VOLUME = {4}, DOI = {10.14195/2182-8830_4-2_3}, ISSN = {2182-8830}, JOURNAL = {MATLIT: Materialidades da Literatura}, } @ARTICLE{FRONTINI_2016_ARTICLE_FCG_357602, AUTHOR = {Frontini, F. and Carmen, B. and Ganascia, J. G.}, TITLE = {REDEN: Named Entity Linking in Digital Literary Editions Using Linked Data Sets}, YEAR = {2016}, ABSTRACT = {This paper proposes a graph-based Named Entity Linking (NEL) algorithm named REDEN for the disambiguation of authors' names in French literary criticism texts and scientific essays from the 19th and early 20th centuries. The algorithm is described and evaluated according to the two phases of NEL as reported in current state of the art, namely, candidate retrieval and candidate selection. REDEN leverages knowledge from different Linked Data sources in order to select candidates for each author mention, subsequently crawls data from other Linked Data sets using equivalence links (e.g., owl:sameAs), and, finally, fuses graphs of homologous individuals into a non-redundant graph well-suited for graph centrality calculation; the resulting graph is used for choosing the best referent. The REDEN algorithm is distributed in open-source and follows current standards in digital editions (TEI) and semantic Web (RDF). Its integration into an editorial workflow of digital editions in Digital humanities and cultural heritage projects is entirely plausible. Experiments are conducted along with the corresponding error analysis in order to test our approach and to help us to study the weaknesses and strengths of our algorithm, thereby to further improvements of REDEN.}, KEYWORDS = {Named Entity Linking, graph centrality, linked data, data fusion, digital humanities}, PAGES = {60-80}, URL = {https://csimq-journals.rtu.lv/article/view/csimq.2016-7.04}, VOLUME = {7}, DOI = {10.7250/csimq.2016-7.04}, ISSN = {2255-9922}, JOURNAL = {Complex Systems Informatics and Modeling Quarterly}, } @ARTICLE{GOGGI_2016_ARTICLE_GPBFMMDB_359144, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2016}, ABSTRACT = {Here we present the final results of the MAPS (Marine Planning and Service Platform) project, an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. The system takes as input non-textual data (measurements) and text - both published papers and documentation - and it provides an advanced search facility thanks to the rich set of metadata and, above all, to the possibility of a refined and domain targeted key-word indexing of texts using Natural Language Processing (NLP) techniques. The paper describes the system in its details providing also evidence of evaluation.}, KEYWORDS = {Information Extraction, Search Engine, Operative Oceanography}, PAGES = {155-161}, URL = {http://www.greynet.org/thegreyjournal/currentissue.html}, VOLUME = {12}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{LENCI_2016_ARTICLE_LLMM_367820, AUTHOR = {Lenci, A. and Labanca, N. and Marazzini, C. and Montemagni, S.}, TITLE = {Voci della Grande Guerra: An Annotated Corpus of Italian Texts on World War I}, YEAR = {2016}, ABSTRACT = {Voci della Grande Guerra (Voices of the Great War) is a scientific and cultural initiative with the aim of preserving and promoting the memory of Italy in World War I through the creation of a corpus of digital texts selected by historians and linguists in order to be representative of the different ways to experience and describe the Italian war by its protagonists. With the help of advanced techniques of computational linguistics, semantic web and information visualization, the digitized historical materials will be explored with an online interface to enable easy but effective and innovative search modalities. The project will allow experts as well as non-experts to become acquainted with "linguistic polyphony" of Italy during World War I.}, KEYWORDS = {Great War, World War, digital texts, corpus, Italian, Voci della Grande Guerra, Voices of the Great War}, PAGES = {101-108}, URL = {http://www.ai-lc.it/IJCoL/v2n2/6-lenci_et_al.pdf}, VOLUME = {2}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{MARZI_2016_ARTICLE_MFCP_360723, AUTHOR = {Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {Effects of frequency and regularity in an integrative model of word storage and processing}, YEAR = {2016}, ABSTRACT = {Considerable evidence has accrued on the role of paradigms as both theoretical and cognitive structures regimenting the way words are processed and acquired. The evidence supports a view of the lexicon as an emergent integrative system, where word forms are concurrently and competitively stored as repeatedly successful processing patterns, and on-line processing crucially depends on the internal organisation of stored patterns.}, KEYWORDS = {Lexical access, word recall, serial processing, parallel activation, inflectional paradigms, mental lexicon}, PAGES = {79-114}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84986550295\&origin=inward}, VOLUME = {28}, PUBLISHER = {Pacini (Ospedaletto, Italia)}, ISSN = {1120-2726}, JOURNAL = {Rivista di Linguistica}, } @ARTICLE{MONACHINI_2016_ARTICLE_MF_373630, AUTHOR = {Monachini, M. and Frontini, F.}, TITLE = {CLARIN, l'infrastruttura europea delle risorse linguistiche per le scienze umane e sociali e il suo network italiano CLARIN-IT}, YEAR = {2016}, ABSTRACT = {ll 1°ottobre 2015 il MIUR firma l'adesione dell'Italia a CLARIN-ERIC, l'infrastruttura di ricerca che offre risorse e tecnologie linguistiche dedicate al settore delle scienze del linguaggio e delle scienze umane e sociali. Questo articolo intende fornire alla comunità italiana una ampia panoramica di CLARIN, la sua missione, i suoi pilastri, i servizi, la sua organizzazione tecnica ed amministrativa e la struttura di governance, sia a livello europeo che locale. Viene introdotto il network italiano, con il primo centro nazionale ILC4CLARIN, ospitato ed in via di sviluppo presso l'ILC-CNR, le funzionalità, le risorse ed i servizi offerti; viene presentato infine il primo nucleo del consorzio nazionale CLARIN-IT, illustrando i criteri di costituzione, le attività previste e le prospettive future.}, KEYWORDS = {Infrastrutture di ricerca, Tecnologie linguistiche, Network italiano CLARIN-IT}, PAGES = {1-30}, URL = {http://www.ai-lc.it/IJCoL/v2n2/1-monachini_and_frontini.pdf}, VOLUME = {Vol. 2}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{MUGELLI_2016_ARTICLE_MBDDKT_364960, AUTHOR = {Mugelli, G. and Boschetti, F. and Del Gratta, R. and Del Grosso, A. M. and Khan, F. and Taddei, A.}, TITLE = {A user-centred design to annotate ritual facts in ancient greek tragedies}, YEAR = {2016}, ABSTRACT = {Euporia is an annotation system developed with a user-centred approach for the study of ritual and religion in ancient Greek tragedy. Euporia adopts a domain specific language (DSL) and a lightweight web user interface in order to offer digital support to an anthropological study of ancient Greek tragedy that compares ritual as it is performed or described in Greek tragedy with ancient ritual as it can be reconstructed from literary, archaeological, and epigraphic sources. The case study discussed in this paper (Aesch. Ag 67-71) shows one of the main features of Euporia: the ability to annotate different readings and different interpretations of the text and their consequences in the reconstruction of ancient Greek ritual.}, KEYWORDS = {Digital Philology, Digital Humanities, Digital Classicist, Computational philology, Computational Linguistics}, PAGES = {103-120}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85007489227\&origin=inward}, VOLUME = {59}, DOI = {10.1111/j.2041-5370.2016.12041.x}, PUBLISHER = {Institute of Classical Studies, University of London (London, Regno Unito)}, ISSN = {0076-0730}, JOURNAL = {Bulletin-University of London. Institute of Classical Studies}, } @ARTICLE{REHM_2016_ARTICLE_RUABBBBBCDGGGVHHJKKKLMMMMMMOOPPPRRPSDTTTVVVZ_355592, AUTHOR = {Rehm, G. and Uszkoreit, H. and Ananiadou, S. and Bel, N. and Bieleviciene, A. and Borin, L. and Branco, A. and Budin, G. and Calzolari, N. and Daelemans, W. and Garabik, R. and Grobelnik, M. and Garcia Mateo, C. and Van Genabith, J. and Hajic, J. and Hernaez, I. and Judge, J. and Koeva, S. and Krek, S. and Krstev, C. and Linden, K. and Magnini, B. and Mariani, J. and McNaught, J. and Melero, M. and Monachini, M. and Moreno, A. and Odijk, J. and Ogrodniczuk, M. and Pezik, P. and Piperidis, S. and Przepiorkowski, A. and Rognvaldsson, E. and Rosner, M. and Pedersen, B. S. and Skadina, I. and De Smedt, K. and Tadic, M. and Thompson, P. and Tufis, D. and Varadi, T. and Vasiljevs, A. and Vider, K. and Zabarskaite, J.}, TITLE = {The strategic impact of META-NET on the regional, national and international level}, YEAR = {2016}, ABSTRACT = {This article provides an overview of the dissemination work carried out in META-NET from 2010 until 2015; we describe its impact on the regional, national and international level, mainly with regard to politics and the funding situation for LT topics. The article documents the initiative's work throughout Europe in order to boost progress and innovation in our field.}, KEYWORDS = {Language technology, Multilingual technologies, Machine translation, Language resources, META-NET, META-SHARE}, PAGES = {351-374}, URL = {http://link.springer.com/article/10.1007/s10579-015-9333-4}, VOLUME = {50}, DOI = {10.1007/s10579-015-9333-4}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{RHEM_2016_ARTICLE_RUCM_344298, AUTHOR = {Rhem, G. and Uzkoreit, H. and Calzolari, N. and Monachini, M.}, TITLE = {The strategic impact of META-NET on the regional, national and international level}, YEAR = {2016}, ABSTRACT = {This article provides an overview of the dissemination work carried out in META-NET from 2010 until 2015; we describe its impact on the regional, national and international level, mainly with regard to politics and the funding situation for LT topics. The article documents the initiative's work throughout Europe in order to boost progress and innovation in our field.}, KEYWORDS = {Language technology, Multilingual technologies, Machine translation, Language resources, META-NET, META-SHARE}, PAGES = {26}, URL = {http://www.springer.com/home?SGWID=0-0-1003-0-0\&aqId=2981193\&download=1\&checkval=6c0c2a6da36ef097f2a5e48a49f794e4}, DOI = {10.1007/s10579-015-9333-4}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @INCOLLECTION{BRUNATO_2016_INCOLLECTION_BV_366759, AUTHOR = {Brunato, D. and Venturi, G.}, TITLE = {Le tecnologie linguistico-computazionali per la leggibilità della comunicazione istituzionale}, YEAR = {2016}, ABSTRACT = {Il contributo illustra il ruolo delle tecnologie linguistico-computazionali per la valutazione automatica della leggibilità dei testi della comunicazione istituzionale e propone alcuni esempi di semplificazione semi-automatica di testi amministrativi e normativi.}, KEYWORDS = {tecnologie linguistico-computazionali, valutazione automatica della leggibilità, comunicazione istituzionale}, PAGES = {119-157}, URL = {https://publications.cnr.it/doc/366759}, PUBLISHER = {Pisa University Press (Pisa, ITA)}, ISBN = {978-88-6741-627-1}, } @INCOLLECTION{DELGRATTA_2016_INCOLLECTION_DBDKM_353799, AUTHOR = {Del Gratta, R. and Boschetti, F. and Del Grosso, A. and Khan, F. and Monachini, M.}, TITLE = {Cooperative philology on the way to web services: The case of the cophiwordnet platform}, YEAR = {2016}, ABSTRACT = {In this paper we present ongoing research carried out at the Institute for Computational Linguistics "A. Zampolli" (ILC) in Pisa. The institute has been active since many years in the field of Digital Humanities providing resources, tools and solutions to address issues of the to digital humanists. Starting from those previous initiatives, we show how to re-engineer them as Web Services in order to make connections between lexicons, semantic resources and a fine grained text management. Linked Open Data is chosen as the paradigm used to link the different resources as well as the modality of data presentation.}, KEYWORDS = {Canonical text services, Cooperative philology, Linked open data, Web services}, PAGES = {173-187}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84961744725\&partnerID=q2rCbXpz}, VOLUME = {9442}, DOI = {10.1007/978-3-319-31468-6_13}, PUBLISHER = {Springer International Publishing (Switzerland, CHE)}, ISBN = {978-3-319-31468-6}, BOOKTITLE = {Worldwide Language Service Infrastructure: Second International Workshop, WLSI 2015, Kyoto, Japan, January 22-23, 2015. Revised Selected Papers}, EDITOR = {Murakami, Y. and Li, D.}, } @INCOLLECTION{FRONTINI_2016_INCOLLECTION_FDM_357638, AUTHOR = {Frontini, F. and Del Gratta, R. and Monachini, M.}, TITLE = {GeoDomainWordNet: Linking the Geonames Ontology to WordNet}, YEAR = {2016}, ABSTRACT = {This paper illustrates the transformation of GeoNames' ontology concepts, with their English labels and glosses, into a GeoDomain WordNet-like resource in English, its translation into Italian, and its linking to the existing generic WordNets of both languages. The paper describes the criteria used for the linking of domain synsets to each other and to the generic ones and presents the published resource in RDF according to the w3c and lemon schema.}, KEYWORDS = {GeoNames, WordNet, Language resources, Lexicons, Linguistic linked data, lemon, RDF}, PAGES = {229-242}, URL = {http://link.springer.com/chapter/10.1007/978-3-319-43808-5_18}, VOLUME = {9561}, DOI = {10.1007/978-3-319-43808-5}, ISBN = {978-3-319-43808-5}, BOOKTITLE = {Human Language Technology. Challenges for Computer Science and Linguistics}, EDITOR = {Vetulani, Z. and Uszkoreit, H. and Kubis, M.}, } @INCOLLECTION{MONTEMAGNI_2016_INCOLLECTION_MW_367809, AUTHOR = {Montemagni, S. and Wieling, M.}, TITLE = {Tracking linguistic features underlying lexical variation patterns: A case study on Tuscan dialects}, YEAR = {2016}, ABSTRACT = {In this paper, we illustrate the application of hierarchical spectral partitioning of bipartite graphs in the study of lexical variation in Tuscany based on the data from a regional linguistic atlas. This method makes it possible not only to identify existing patterns of lexical variation in Tuscany, but also to uncover the underlying lexical features in terms of the most characteristic concept-lexicalization pairs. The results are promising, demonstrating the potential of the method for tracking the linguistic features underlying identified patterns of lexical variation and change across generations.}, KEYWORDS = {tuscan, dialects, lexical variation, linguistic atlas}, PAGES = {117-135}, URL = {http://langsci-press.org/catalog/view/81/146/376-1}, VOLUME = {1}, DOI = {10.17169/langsci.b81.146}, PUBLISHER = {Language Science Press (Berlin, DEU)}, BOOKTITLE = {The future of dialects}, EDITOR = {Côté, M. and Knooihuizen, R. and Nerbonne, J.}, } @INCOLLECTION{QUOCHI_2016_INCOLLECTION_Q_358123, AUTHOR = {Quochi, V.}, TITLE = {Development and representation of Italian light-fare constructions}, YEAR = {2016}, ABSTRACT = {The essay describes the study of the development and use of light fare 'do' constructions in Child-directed Speech and in Child Language with the twofold goal of showing that a Construction Grammar approach is viable, and of providing support to usage-based, functional predictions on language acquisition. The analysis of naturalistic data derived from the CHILDES database lead to two main findings: first, a representation of fare Light Verb Constructions as a family of constructions organized like a radial category is not only possible but more explicative, second, there exists a 'fare' pivot schema that children generalize at an early stage because it serves the purpose of naming new events, activities or situations.}, KEYWORDS = {Corpus linguistics Language Acquisition Construction Grammar, phraseology}, PAGES = {39-64}, URL = {https://benjamins.com/#catalog/books/cal.19.03quo/details}, VOLUME = {19}, DOI = {10.1075/cal.19.03quo}, PUBLISHER = {John Benjamins Publishing Company (Amsterdam/Philadelphia, USA)}, ISBN = {9789027204417}, BOOKTITLE = {Corpus-based Approaches to Construction Grammar}, EDITOR = {Yoon, J. and Th Gries, S.}, } @INCOLLECTION{WEINGART_2016_INCOLLECTION_WG_364950, AUTHOR = {Weingart, A. and Giovannetti, E.}, TITLE = {Extending the Lemon Model for a Dictionary of Old Occitan Medico-Botanical Terminology}, YEAR = {2016}, ABSTRACT = {The article presents the adaptation of the lemon model (a model for lexica as RDF data) for a multilingual and multi- alphabetical lexicon of Old Occitan medico-botanical terminology. The lexicon is the core component of an ontology-based information system that will be constructed and implemented within the DFG-funded project "Dictionnaire des Termes Medico-botaniques de l'Ancien Occitan" (DiTMAO). The difficulties for the lemmatization raised by the particularities of the corpus (terms in Latin, Hebrew and Arabic script and corresponding terms in other ancient languages, mostly Hebrew and Arabic) can be perfectly solved by extending the basic properties of lemon and introducing domain specific vocabulary.}, KEYWORDS = {Lemon model, RDF, Multilingual, Multi-alphabetical, Historical lexicon, Medico-Botanical terminology, Old occitan, Hebrew, Arabic}, PAGES = {408-421}, URL = {http://link.springer.com/chapter/10.1007/978-3-319-47602-5_53}, VOLUME = {9989}, DOI = {10.1007/978-3-319-47602-5_53}, ISBN = {978-3-319-47601-8}, BOOKTITLE = {The Semantic Web}, EDITOR = {Sack, H. and Rizzo, G. and Steinmetz, N. and Mladenić, D. and Auer, S. and Lange, C.}, } @INCOLLECTION{WIELING_2016_INCOLLECTION_WM_367813, AUTHOR = {Wieling, M. and Montemagni, S.}, TITLE = {Infrequent forms: Noise or not?}, YEAR = {2016}, ABSTRACT = {In this study we ask the question whether simplifying the data in dialectometrical studies by removing infrequent forms is advantageous to uncovering the geographical structure in dialect data. By investigating lexical variation in a large corpus of Tuscan dialect data via hierarchical bipartite spectral graph partitioning, we are able to identify the main geographical areas together with their linguistic basis. In order to assess the influence of infrequent forms, we conduct two analyses: one which includes only lexical variants used by at least 0.5% of the informants, and another which includes all lexical variants in the data. Using this approach we show that using all data enables us to find a geographical characterization with a more adequate linguistic basis than by using the trimmed data.}, KEYWORDS = {dialectometrical studies, dialectology, dialect data, lexical variation, Tuscan}, PAGES = {215-224}, URL = {http://langsci-press.org/catalog/view/81/78/367-1}, VOLUME = {Language Variation 1}, DOI = {10.17169/langsci.b81.78}, PUBLISHER = {Language Science Press (Berlin, DEU)}, ISBN = {978-3-946234-18-0}, BOOKTITLE = {The Future of Dialects}, EDITOR = {Côté, M. and Knooihuizen, R. and Nerbonne, J.}, } @EDITORIAL{BRANCO_2016_EDITORIAL_BCC_367184, AUTHOR = {Branco, A. and Calzolari, N. and Choukri, K.}, TITLE = {4REAL Workshop: Workshop on Research Results Reproducibility and Resources Citation in Science and Technology of Language}, YEAR = {2016}, ABSTRACT = {This workshop seeks to foster the discussion and the advancement on a topic that has been so far given insufficient attention in the research area of language processing tools and resources (Branco, 2013, Fokkens et al., 2013) and that has been an important topic emerging in other scientific areas. That is the topic of the reproducibility of research results and the citation of resources, and its impact on research integrity.}, KEYWORDS = {Research Results Reproducibility, Resources Citation}, PAGES = {1-38}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, } @EDITORIAL{BRUNATO_2016_EDITORIAL_BDVFB_367760, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Venturi, G. and François, T. and Blache, P.}, TITLE = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC 2016)}, YEAR = {2016}, ABSTRACT = {Introduzione agli atti della prima edizione del workshop "Computational Linguistics for Linguistic Complexity" che raccoglie lavori che studiano da prospettive diverse il tema della complessità linguistica workshop allo scopo di promuovere una riflessione comune su approcci diversi all'indagine, al trattamento e alla valutazione di aspetti che rendono complessa la lingua.}, KEYWORDS = {Linguistic Complexity, Computational Linguistics}, PAGES = {1-245}, URL = {https://aclweb.org/anthology/W/W16/W16-41.pdf}, ISBN = {978-4-87974-709-9}, } @EDITORIAL{CALZOLARI_2016_EDITORIAL_CCDGGMMMMOP_355640, AUTHOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, TITLE = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, YEAR = {2016}, ABSTRACT = {Curatela dei 744 articoli presentati alla conferenza LREC2016.}, KEYWORDS = {Language Resources Evaluation}, PAGES = {1-4693}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, } @EDITORIAL{DISEGNI_2016_EDITORIAL_D_383159, AUTHOR = {Di Segni, D. G.}, TITLE = {Talmud Babilonese - Trattato Rosh haShanà}, YEAR = {2016}, ABSTRACT = {Traduzione in italiano del trattato Rosh haShanà del Talmud Babilonese}, KEYWORDS = {Talmud, Traduco, Linguistica computazionale}, PAGES = {365}, URL = {https://www.talmud.it/}, VOLUME = {5}, PUBLISHER = {Giuntina (Firenze, ITA)}, ISBN = {978-88-8057-628-0}, } @EDITORIAL{KHAN_2016_EDITORIAL_KVLFFPGU_355434, AUTHOR = {Khan, F. and Vintar, Š. and León Araúz, P. and Faber, P. and Frontini, F. and Parvizi, A. and Grčić Simeunović, L. and Unger, C.}, TITLE = {Language and Ontology (LangOnto2) & Terminology and Knowledge Structures (TermiKS)}, YEAR = {2016}, ABSTRACT = {This joint workshop brings together two different but closely related strands of research. On the one hand it looks at the overlap between ontologies and computational linguistics and on the other it explores the relationship between knowledge modelling and terminologies. In particular the workshop aims to create a forum for discussion in which the different relationships and commonalities between these two areas can be explored in detail, as well as presenting cutting edge research in each of the two individual areas. A significant amount of human knowledge can be found in texts. It is not surprising that languages such as OWL, which allow us to formally represent this knowledge, have become more and more popular both in linguistics and in automated language processing. For instance ontologies are now of core interest to many NLP fields including Machine Translation, Question Answering, Text Summarization, Information Retrieval, and Word Sense Disambiguation. At a more abstract level, however, ontologies can also help us to model and reason about phenomena in natural language semantics. In addition, ontologies and taxonomies can also be used in the organisation and formalisation of linguistically relevant categories such as those used in tagsets for corpus annotation. Notably also, the fact that formal ontologies are being increasingly accessed by users with limited to no background in formal logic has led to a growing interest in developing accessible front ends that allow for easy querying and summarisation of ontologies. It has also led to work in developing natural language interfaces for authoring ontologies and evaluating their design. Additionally in recent years there has been a renewed interest in the linguistic aspects of accessing, extracting, representing, modelling and transferring knowledge. Numerous tools for the automatic extraction of terms, term variants, knowledge-rich contexts, definitions, semantic relations and taxonomies from specialized corpora have been developed for a number of languages, and new theoretical approaches have emerged as potential frameworks for the study of specialized communication. However, the building of adequate knowledge models for practitioners (e.g. experts, researchers, translators, teachers etc.), on the one hand, and NLP applications (including cross-language, cross-domain, cross-device, multi-modal, multi-platform applications), on the other hand, still remains a challenge. The papers included in the workshop range across a wide variety of different areas and reflect the strong inter-disciplinary approach, which characterises both areas of research. In addition we are very happy to include two invited talks in the program presented by authorities in their respective fields: Pamela Faber from the field of terminology, and John McCrae, an expert on linguistic linked data and the interface between NLP and ontologies.}, KEYWORDS = {lexicons, ontologies}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, } @EDITORIAL{MARZI_2016_EDITORIAL_MP_360724, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Word knowledge and word usage: A Foreword}, YEAR = {2016}, ABSTRACT = {This special issue, together with its companion issue to appear in Lingue e Linguaggio, stems from the NetWordS Final Conference Word knowledge and word usage: representations and processes in the mental lexicon.* The conference, held on the 30th and 31st of March, and the 1st of April 2015 in Pisa, concluded the 4-year NetWordS project, the European Network of Word Structure funded by the European Science Foundation within the Research Networking Programme. In line with the highly multidisciplinary profile of NetWordS agenda, the conference offered a comprehensive and inclusive forum focussing on two main lines of lexical inquiry: (i) usage-based approaches to bootstrapping word form and structure (morpho-phonological and morpho-syntactic issues), including: acquisition of lexical categories, emergence of morphological structure, lexical memories, anticipatory prediction-based mechanisms of word recognition, word production, frequency-based models of lexical productivity, word encoding, models of lexical architecture, family-based effects in word processing, word reading and writing; (ii) usage-based approaches to word meanings (lexical semantics and pragmatics in morphologically simple and complex words), including: distributional semantics, compound interpretation, concept composition and coercion, conceptualization of perception and action, time and space in the lexicon, metonymy and metaphor, lexico-semantic relations, perceptual grounding and embodied cognition, context-based and encyclopedic knowledge, semantic association and categorization. The multidisciplinary focus on word knowledge and word usage promoted by the Conference led participants to openly discuss an impressive range of approaches and empirical data: priming and lexical decision in a number of contexts, distributional semantics and models of semantic composition, neural networks, machine learning and mathematical modelling of empirical evidence, as well as their neuro-biological and neuro-functional correlates.}, KEYWORDS = {word knowledge, word usage, mental lexicon, interdisciplinary approach, NetWordS}, PAGES = {3-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84986558643\&origin=inward}, VOLUME = {28. 1}, PUBLISHER = {Pacini (Pisa, ITA)}, } @EDITORIAL{MARZI_2016_EDITORIAL_MP_360725, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Word knowledge and word usage: A foreword}, YEAR = {2016}, ABSTRACT = {This special issue, together with its companion issue to appear in Italian Journal of Linguistics, stems from the NetWordS Final Conference "Word knowledge and word usage: representations and processes in the mental lexicon". The conference, held on the 30th and 31st of March, and the 1st of April 2015 in Pisa, concluded the 4-year NetWordS project, the European Network of Word Structure funded by the European Science Foundation within the Research Networking Programme. In line with the highly multidisciplinary profile of NetWordS agenda, the conference offered a comprehensive and inclusive forum focussing on two main lines of lexical inquiry: (i) usage-based approaches to bootstrapping word form and structure (morpho-phonological and morpho-syntactic issues), including: acquisition of lexical categories, emergence of morphological structure, lexical memories, anticipatory prediction-based mechanisms of word recognition, word production, frequency-based models of lexical productivity, word encoding, models of lexical architecture, family-based effects in word processing, word reading and writing; (ii) usage-based approaches to word meanings (lexical semantics and pragmatics in morphologically simple and complex words), including: distributional semantics, compound interpretation, concept composition and coercion, conceptualization of perception and action, time and space in the lexicon, metonymy and metaphor, lexico-semantic relations, perceptual grounding and embodied cognition, context-based and encyclopedic knowledge, semantic association and categorization. The multidisciplinary focus on word knowledge and word usage promoted by the Conference led participants to openly discuss an impressive range of approaches and empirical data: priming and lexical decision in a number of contexts, distributional semantics and models of semantic composition, neural networks, machine learning and mathematical modelling of empirical evidence, as well as their neuro-biological and neuro-functional correlates. It is widely acknowledged that looking at the same problem from different angles has an additive effect on the impact of current language research. Certainly more can be achieved, however, if, rather than simply adding more perspectives on the same subject, with individual research efforts staying within the boundaries of single knowledge domains, scholars manage to integrate them into a boundary-shifting methodological perspective. When psycholinguistic evidence from humans is successfully replicated algorithmically through a computational model implementing a few well-understood principles of time-series processing, we are in a position to empirically assess what input conditions favour memorisation and acquisition of symbolic strings by the model, and test these algorithmic predictions back on human subjects, thus going full circle. This may have a multiplicative effect on current research, providing not only mathematical modelling of present behavioural evidence, but amounting to fully explanatory mechanisms. Our current understanding of WHERE and WHEN some cognitive processes are implemented in the brain will be complemented by knowledge of WHAT information they rely on and HOW they integrate it. Other compelling examples of the full potential of cross-disciplinary integration can be found in the present volume and in the twin issue of Italian Journal of Linguistics. As a general point, we contend that only by putting single-domain acquisitions into the wider context of human communication, and developing an interdisciplinary framework whereby each specialist will take advantage of insights from other disciplines, we can make substantial progress in our understanding of the lexical roots of human verbal communication in real contexts. The edited selection of papers presented here provides a representative sample of the range of approaches debated at the NetWordS Pisa Conference, by way of illustration of how aspects of knowledge integration and methodological innovation can be put at the service of a better understanding of broad lexical issues.}, KEYWORDS = {word knowledge, word usage, interdisciplinary approach, mental lexicon, NetWordS}, PAGES = {3-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84978285090\&origin=inward}, VOLUME = {XV. 1}, DOI = {10.1418/83651}, PUBLISHER = {Il Mulino (Bologna, ITA)}, ISBN = {978-88-15-26226-4}, } @EDITORIAL{MONTEMAGNI_2016_EDITORIAL_M_372004, AUTHOR = {Montemagni, S.}, TITLE = {Proceedings CLiC-it 2016 and EVALITA 2016}, YEAR = {2016}, ABSTRACT = {Proceedings of Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}, KEYWORDS = {Computational Linguistics Natural Language Processing Speech Tools for Italian CLiC-it EVALITA}, URL = {http://ceur-ws.org/Vol-1749/}, VOLUME = {1749}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, } @EDITORIAL{SORIA_2016_EDITORIAL_SPDMSW_355531, AUTHOR = {Soria, C. and Pretorius, L. and Declerck, T. and Mariani, J. and Scannell, K. and Wandl Vogt, E.}, TITLE = {CCURL 2016 Collaboration and Computing for Under-Resourced Languages: Towards an Alliance for Digital Language Diversity}, YEAR = {2016}, ABSTRACT = {Atti del Workshop "CCURL 2016 Collaboration and Computing for Under-Resourced Languages: Towards an Alliance for Digital Language Diversity"}, KEYWORDS = {Less-resourced languages, Language Technology, digital language vitality, digital language diversity}, PAGES = {1-103}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/workshops/LREC2016Workshop-CCURL2016_Proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, } @EDITORIAL{BASILI_2016_EDITORIAL_BM_372022, AUTHOR = {Basili, R. and Montemagni, S.}, TITLE = {Nota Editoriale}, YEAR = {2016}, KEYWORDS = {Computational Linguistics CLiC-it natural language processing}, PAGES = {7-10}, URL = {http://www.ai-lc.it/IJCoL/v2n1/00_nota_editoriale.pdf}, VOLUME = {2}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, BOOKTITLE = {Italian Journal of Computational Linguistics}, } @EDITORIAL{IDE_2016_EDITORIAL_IC_367165, AUTHOR = {Ide, N. and Calzolari, N.}, TITLE = {Editors' introduction to the special issue: papers from LREC 2014}, YEAR = {2016}, KEYWORDS = {Language Resources}, PAGES = {163-164}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84975686215\&origin=inward}, VOLUME = {50}, DOI = {10.1007/s10579-016-9358-3}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, BOOKTITLE = {Language resources and evaluation (Print)}, } @INPROCEEDINGS{BARBAGLI_2016_INPROCEEDINGS_BLDMV_366749, AUTHOR = {Barbagli, A. and Lucisano, P. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {CItA: an L1 Italian Learners Corpus to Study the Development of Writing Competence}, YEAR = {2016}, ABSTRACT = {In this paper, we present the CItA corpus (Corpus Italiano di Apprendenti L1), a collection of essays written by Italian L1 learners collected during the first and second year of lower secondary school. The corpus was built in the framework of an interdisciplinary study jointly carried out by computational linguistics and experimental pedagogists and aimed at tracking the development of written language competence over the years and students' background information.}, KEYWORDS = {Italian Learner Corpus, Diachronic Evolution of Written Language Competence, Error Annotation}, PAGES = {88-95}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/pdf/536_Paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz (Slovenia)}, CONFERENCE_DATE = {23-28 maggio 2016}, } @INPROCEEDINGS{BELLANDI_2016_INPROCEEDINGS_BBDG_364953, AUTHOR = {Bellandi, A. and Benotto, G. and Di Segni, G. and Giovannetti, E.}, TITLE = {Investigating the Application and Evaluation of Distributional Semantics in the Translation of Humanistic Texts: a Case Study}, YEAR = {2016}, ABSTRACT = {Digital Humanities are persisting ascending and the need for translating humanistic texts using Computer Assisted Translation (CAT) tools demands for a specific investigation both of the available technologies and of the evaluation techniques. Indeed, humanistic texts can present deep differences from texts that are usually translated with CAT tools, due to complex interpretative issues, the request of heavy rephrasing, and the addition of explicative parts in order to make the translation fully comprehensible to readers and, also, stylistically pleasant to read. In addition, these texts are often written in peculiar languages for which no linguistic analysis tool can be available. We faced this situation in the context of the project for the translation of the Babylonian Talmud from Ancient Hebrew and Aramaic into Italian. In this paper we describe a work in progress on the application of distributional semantics to the informing of the Translation Memory, and on the evaluation issues arising from its assessment.}, KEYWORDS = {computer-assisted translation, distributional semantics, talmud}, PAGES = {6-11}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/workshops/LREC2016Workshop-NLP4TM_Proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, CONFERENCE_NAME = {2nd Workshop on Natural Language Processing for Translation Memories (NLP4TM 2016)}, CONFERENCE_PLACE = {Portorož, Slovenia}, CONFERENCE_DATE = {28/05/2016}, BOOKTITLE = {Proceedings of the 2nd Workshop on Natural Language Processing for Translation Memories (NLP4TM 2016)}, } @INPROCEEDINGS{BENOTTO_2016_INPROCEEDINGS_BGM_364952, AUTHOR = {Benotto, G. and Giovannetti, E. and Marchi, S.}, TITLE = {Investigating the Application of Distributional Semantics to Stylometry}, YEAR = {2016}, ABSTRACT = {The inclusion of semantic features in the stylometric analysis of literary texts appears to be poorly investigated. In this work, we experiment with the application of Distributional Semantics to a corpus of Italian literature to test if words distribution can convey stylistic cues. To verify our hypothesis, we have set up an Authorship Attribution experiment. Indeed, the results we have obtained suggest that the style of an author can reveal itself through words distribution too.}, KEYWORDS = {NLP for Digital Humanities, Stilometry, Distributional Semantic}, PAGES = {61-65}, URL = {https://drive.google.com/open?id=0B0sEp2O7Oo7feVJLdHI3YXBxdTg}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {9788899982089}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-6/12/2016}, BOOKTITLE = {Proceedings of Third Italian Conference on Computational Linguistics (CLiC-it 2016)}, } @INPROCEEDINGS{BENOTTO_2016_INPROCEEDINGS_BGN_364353, AUTHOR = {Benotto, G. and Giovannetti, E. and Nahli, O.}, TITLE = {An application of distributional semantics for the analysis of the Holy Quran}, YEAR = {2016}, ABSTRACT = {In this contribution we illustrate the methodology and the results of an experiment we conducted by applying Distributional Semantics Models to the analysis of the Holy Quran. Our aim was to gather information on the potential differences in meanings that the same words might take on when used in Modern Standard Arabic w.r.t. their usage in the Quran. To do so we used the Penn Arabic Treebank as a contrastive corpus.}, KEYWORDS = {Distributional Semantics, the Holy Quran, Classical Arabic, Modern Standard Arabic, Contrastive Linguistics}, PAGES = {374-379}, URL = {http://ieeexplore.ieee.org/document/7805074/}, DOI = {10.1109/CIST.2016.7805074}, ISBN = {978-1-5090-0751-6}, CONFERENCE_NAME = {4th (IEEE) International Colloquium on Information Science and Technology, CiSt 2016}, CONFERENCE_PLACE = {Tangier, Morocco}, CONFERENCE_DATE = {24-26/10/2016}, BOOKTITLE = {2016 4th IEEE International Colloquium on Information Science and Technology (CiSt)}, EDITOR = {El Mohajir, M. and Chahhou, M. and Al Achhab, M. and El Mohajir, B. E.}, } @INPROCEEDINGS{BERTI_2016_INPROCEEDINGS_BCYBBD_363704, AUTHOR = {Berti, M. and Crane, G. and Yousef, T. and Bizzoni, Y. and Boschetti, F. and Del Gratta, R.}, TITLE = {Ancient Greek WordNet meets the Dynamic Lexicon: the example of the fragments of the Greek Historians}, YEAR = {2016}, ABSTRACT = {The Ancient Greek WordNet (AGWN) and the Dynamic Lexicon (DL) are multilingual resources to study the lexicon of Ancient Greek texts and their translations. Both AGWN and DL are works in progress that need accuracy improvement and manual validation. After a detailed description of the current state of each work, this paper illustrates a methodology to cross AGWN and DL data, in order to mutually score the items of each resource according to the evidence provided by the other resource. The training data is based on the corpus of the Digital Fragmenta Historicorum Graecorum (DFHG), which includes ancient Greek texts with Latin translations.}, KEYWORDS = {wordnet}, PAGES = {34-38}, URL = {http://gwc2016.racai.ro/procedings.pdf}, ISBN = {978-606-714-239-6}, CONFERENCE_NAME = {Global WordNet Conference}, CONFERENCE_PLACE = {Bucarest}, CONFERENCE_DATE = {27-30/01/2016}, } @INPROCEEDINGS{BOMPOLAS_2016_INPROCEEDINGS_BMFCP_362297, AUTHOR = {Bompolas, S. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {Reassessing inflectional regularity in Modern Greek conjugation}, YEAR = {2016}, ABSTRACT = {Paradigm-based approaches to word processing/learning assume that word forms are not acquired in isolation, but through associative relations linking members of the same word family (e.g. a paradigm, or a set of forms filling the same paradigm cell). Principles of correlative learning offer a set of dynamic equations that are key to modelling this complex dynamic at a considerable level of detail. We use these dynamic equations to simulate acquisition of Modern Greek conjugation, and we compare the results with evidence from German and Italian. Simulations show that different Greek verb classes are processed and acquired differentially, depending on their degrees of formal transparency and predictability. We relate these results to psycholinguistic evidence on Modern Greek word processing, and interpret our findings as supporting a view of the mental lexicon as an emergent integrative system.}, KEYWORDS = {word processing, paradigm-based learning, morphological processing, Greek stem allomoprhy, Temporal Self-Organising Map}, PAGES = {72-77}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009242702\&origin=inward}, VOLUME = {1749}, DOI = {10.4000/books.aaccademia.1721}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {978-88-99982-08-9}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}, CONFERENCE_PLACE = {Napoli, Italy}, CONFERENCE_DATE = {05-07/12/2016}, BOOKTITLE = {CLiC-it \& EVALITA 2016-Proceedings of Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}, EDITOR = {Basile, P. and Corazza, A. and Monetmagni, S. and Nissim, M. and Patti, V. and Semeraro, G. and Sprugnoli, R.}, } @INPROCEEDINGS{BRANDO_2016_INPROCEEDINGS_BAF_348461, AUTHOR = {Brando, C. and Abadie, N. and Frontini, F.}, TITLE = {Linked Data Quality for Domain-Specific Named-Entity Linking}, YEAR = {2016}, ABSTRACT = {We present outgoing research whose goal is to assess quality of Linked Data for its usage in domain-specific Named-entity Linking (NEL). NEL is the task of assigning appropriate referents, typically an Uniform Resource Identifier (URI), to mentions of entities (e.g. persons or places) identified in textual documents. Nowadays, many of these approaches strongly rely on Linked Data as knowledge base. However, the scope of the chosen data sets can have an important influence on the performances of NEL as texts often concern specific domains of knowledge. In this paper, we describe LD quality aspects which should be considered for improving NEL in domain-specific contexts, then propose quality metrics and compute them for both French DBpedia and the French National Library (BnF) data sets thereby to discuss the opportunity of using these data sets for the linking of authors in old French Literary digital editions. Our ultimate goal is to improve a Natural Language Processing (NLP) pipeline for the automatic annotation of these texts.}, KEYWORDS = {Linked Data, Quality, Named Entity Linking}, PAGES = {13-24}, URL = {https://publications.cnr.it/doc/348461}, CONFERENCE_NAME = {Atelier-Qualité des Données du Web (QLOD'16) Joint à la 16ème édition de la conférence internationale francophone EGC 2016}, CONFERENCE_PLACE = {Reims}, CONFERENCE_DATE = {19/01/2016}, } @INPROCEEDINGS{BRUNATO_2016_INPROCEEDINGS_BCDV_366726, AUTHOR = {Brunato, D. and Cimino, A. and Dell'Orletta, F. and Venturi, G.}, TITLE = {PaCCSS-IT: A Parallel Corpus of Complex-Simple Sentences for Automatic Text Simplification}, YEAR = {2016}, ABSTRACT = {In this paper we present PaCCSS-IT, a Parallel Corpus of Complex-Simple Sentences for ITalian. To build the resource we develop a new method for automatically acquiring a corpus of complex-simple paired sentences able to intercept structural transformations and particularly suitable for text simplification. The method requires a wide amount of texts that can be easily extracted from the web making it suitable also for less-resourced languages. We test it on the Italian language making available the biggest Italian corpus for automatic text simplification.}, KEYWORDS = {Automatic Text Simplification, Sentence alignment, Italian corpus}, PAGES = {351-361}, URL = {https://www.aclweb.org/anthology/D/D16/D16-1034.pdf}, DOI = {10.18653/v1/d16-1034}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-945626-25-8}, CONFERENCE_NAME = {Conference on Empirical Methods in Natural Language Processing (EMNLP 2016)}, CONFERENCE_PLACE = {Austin, Texas}, CONFERENCE_DATE = {01-05/11/2016}, } @INPROCEEDINGS{CALZOLARI_2016_INPROCEEDINGS_C_367150, AUTHOR = {Calzolari, N.}, TITLE = {Preface: General Chair}, YEAR = {2016}, ABSTRACT = {COLING is organised under the auspices of the International Committee on Computational Linguistics (ICCL, http://nlp.shef.ac.uk/iccl/index.html). ICCL is a very special committee, with no fixed rules and no funding, whose only function is to make sure that aCOLING appears every two years and that it is a good and friendly conference.}, KEYWORDS = {Computational Linguistics}, URL = {http://aclweb.org/anthology/C/C16/C16-1000.pdf}, ISBN = {978-4-87974-702-0}, CONFERENCE_NAME = {The 26th International Conference on Computational Linguistics, COLING 2016}, CONFERENCE_PLACE = {Osaka, Japan}, CONFERENCE_DATE = {December 11-16, 2016}, } @INPROCEEDINGS{CALZOLARI_2016_INPROCEEDINGS_C_367173, AUTHOR = {Calzolari, N.}, TITLE = {Introduction of the Conference Chair and ELRA President Nicoletta Calzolari}, YEAR = {2016}, ABSTRACT = {Chair of the 10th International Conference on Language Resources and Evaluation and ELRA President}, KEYWORDS = {Language Resources}, PAGES = {1-6}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {LREC 2016, Tenth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Portorose, Slovenia}, CONFERENCE_DATE = {May 23-28, 2016}, BOOKTITLE = {The LREC 2016 Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{CONNOR_2016_INPROCEEDINGS_CC_364655, AUTHOR = {Connor, R. and Cardillo, F. A.}, TITLE = {Quantifying the specificity of near-duplicate image classification functions}, YEAR = {2016}, ABSTRACT = {There are many published methods for detecting similar and near-duplicate images. Here, we consider their use in the context of unsupervised near-duplicate detection, where the task is to find a (relatively small) near-duplicate intersection of two large candidate sets. Such scenarios are of particular importance in forensic near-duplicate detection. The essential properties of a such a function are: performance, sensitivity, and specificity. We show that, as collection sizes increase, then specificity becomes the most important of these, as without very high specificity huge numbers of false positive matches will be identified. This makes even very fast, highly sensitive methods completely useless. Until now, to our knowledge, no attempt has been made to measure the specificity of near-duplicate finders, or even to compare them with each other. Recently, a benchmark set of near-duplicate images has been established which allows such assessment by giving a near-duplicate ground truth over a large general image collection. Using this we establish a methodology for calculating specificity. A number of the most likely candidate functions are compared with each other and accurate measurement of sensitivity vs. specificity are given. We believe these are the first such figures be to calculated for any such function.}, KEYWORDS = {near-duplicate image detection}, PAGES = {647-654}, URL = {https://publications.cnr.it/doc/364655}, CONFERENCE_NAME = {11th International Conference on Computer Vision Theory and Applications}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {27-29/02/2016}, } @INPROCEEDINGS{CONNOR_2016_INPROCEEDINGS_CVCR_363066, AUTHOR = {Connor, R. and Vadicamo, L. and Cardillo, F. A. and Rabitti, F.}, TITLE = {Supermetric search with the four-point property}, YEAR = {2016}, ABSTRACT = {Metric indexing research is concerned with the efficient evaluation of queries in metric spaces. In general, a large space of objects is arranged in such a way that, when a further object is presented as a query, those objects most similar to the query can be efficiently found. Most such mechanisms rely upon the triangle inequality property of the metric governing the space. The triangle inequality property is equivalent to a finite embedding property, which states that any three points of the space can be isometrically embedded in two-dimensional Euclidean space. In this paper, we examine a class of semimetric space which is finitely 4-embeddable in three-dimensional Euclidean space. In mathematics this property has been extensively studied and is generally known as the four-point property. All spaces with the four-point property are metric spaces, but they also have some stronger geometric guarantees. We coin the term supermetric space as, in terms of metric search, they are significantly more tractable. We show some stronger geometric guarantees deriving from the four-point property which can be used in indexing to great effect, and show results for two of the SISAP benchmark searches that are substantially better than any previously published.}, KEYWORDS = {Similarity search, Metric Space, Supermetric Space, Metric Indexing, Four-point property, Hilbert Embedding, H. 3. 3 INFORMATION STORAGE AND RETRIEVAL. Information Search and Retrieval}, PAGES = {51-64}, URL = {https://link.springer.com/chapter/10.1007%2F978-3-319-46759-7_4}, VOLUME = {9939}, DOI = {10.1007/978-3-319-46759-7_4}, CONFERENCE_NAME = {Similarity Search and Applications. 9th International Conference}, CONFERENCE_PLACE = {Tokyo, Japan}, CONFERENCE_DATE = {24-26 October 2016}, BOOKTITLE = {Similarity Search and Applications. SISAP 2016}, EDITOR = {Amsaleg, L. and Houle, M. and Schubert, E.}, } @INPROCEEDINGS{DELGRATTA_2016_INPROCEEDINGS_DFMPRBKSC_355425, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Khan, F. and Soria, C. and Calzolari, N.}, TITLE = {LREC as a Graph: People and Resources in a Network}, YEAR = {2016}, ABSTRACT = {This proposal describes a new way to visualise resources in the LREMap, a community-built repository of language resource descriptions and uses. The LREMap is represented as a force-directed graph, where resources, papers and authors are nodes. The analysis of the visual representation of the underlying graph is used to study how the community gathers around LRs and how LRs are used in research.}, KEYWORDS = {Language Resources, Resources Documentation, Data Visualisation}, PAGES = {2529-2532}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{DELLORLETTA_2016_INPROCEEDINGS_DMV_366757, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Esplorazioni computazionali nello spazio dell'interlingua: verso una nuova metodologia di indagine}, YEAR = {2016}, ABSTRACT = {Il presente contributo intende proporre un innovativo approccio all'identificazione delle caratteristiche linguistiche che aiutano a definire l'interlingua. Tale approccio consiste nella ricostruzione del profilo linguistico di corpora di produzioni scritte da apprendenti una lingua seconda basato su strumenti di trattamento automatico del linguaggio.}, KEYWORDS = {interlingua, annotazione linguistica automatica, monitoraggio linguistico}, PAGES = {143-161}, URL = {https://www.bulzoni.it/it/catalogo/lingue-in-contatto-contact-linguistics.html}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, ISBN = {978-88-6897-029-1}, CONFERENCE_NAME = {XLVIII Congresso Internazionale di Studi della Società di Linguistica Italiana (SLI 2014)}, CONFERENCE_PLACE = {Udine}, CONFERENCE_DATE = {25-27 settembre 2014}, } @INPROCEEDINGS{DELLORLETTA_2016_INPROCEEDINGS_DV_366752, AUTHOR = {Dell'Orletta, F. and Venturi, G.}, TITLE = {ULISSE: una strategia di adattamento al dominio per l'annotazione sintattica automatica}, YEAR = {2016}, ABSTRACT = {This paper deals with Domain Adaptation for automatic syntactic annotation. Until the half of the 1980s, automatic linguistic annotation was based on algorithms built on groups of hand-written rules, defined a priori on the basis of the knowledge of the system to formalise. Subsequently, thanks to the progress of research in the field of Artificial Intelligence and to the development of linguistic resources, algorithms based on machine learning techniques began to be employed. The major difficulties of those algorithms were due to certain aspects of natural language such as ambiguities, diachronic evolutions, or language variations from the original domain of knowledge. More specifically, the issue of Domain Adaptation can be put in the following terms: "can an annotated corpus [which is representative of a specific linguistic variety] be used for the syntactic analysis of a second corpus [which is representative of a different linguistic variety]?". The author answer presenting an algorithm called ULISSE (Unsupervised LInguistically-driven Selection of dEpendency parses), which selects in an optima way the most representative sentences of a new target domain and feed them to the parser in addition to the original training set.}, KEYWORDS = {Domain Adaptation, annotazione sintattica automatica}, PAGES = {55-79}, URL = {http://www.italianlp.it/wp-content/uploads/2016/10/Compter_Parler_Soigner_ULISSE.pdf}, ISBN = {978-88-6952-038-9}, CONFERENCE_NAME = {Atti del convegno "Compter parler soigner: tra linguistica e intelligenza artificiale"}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {15-17 dicembre 2014}, } @INPROCEEDINGS{FERRO_2016_INPROCEEDINGS_FCPGS_362349, AUTHOR = {Ferro, M. and Cardillo, F. A. and Pirrelli, V. and Gagné, C. L. and Spalding, T. L.}, TITLE = {Written word production and lexical self-organisation: evidence from English (pseudo)compounds}, YEAR = {2016}, ABSTRACT = {Elevation in typing latency for the initial letter of the second constituent of an English compound, relative to the latency for the final letter of the first constituent of the same compound, provides evidence that implementation of a motor plan for written compound production involves smaller constituents, in both semantically transparent and semantically opaque compounds. We investigate here the implications of this evidence for algorithmic models of lexical organisation, to show that effects of differential perception of the internal structure of compounds and pseudo-compounds can also be simulated as peripheral stages of lexical access by a self-organising connectionist architecture, even in the absence of morphosemantic information. This complementary evidence supports a maximizationof-opportunity approach to lexical modelling, accounting for the integration of effects of pre-lexical and lexical access.}, KEYWORDS = {compound, pseudo-compound, written word production, lexical self-organisation, temporal self organising map}, PAGES = {146-151}, URL = {http://ceur-ws.org/Vol-1749/}, VOLUME = {1749}, DOI = {10.4000/books.aaccademia.1775}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {9788899982546}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it 2016)}, CONFERENCE_PLACE = {Napoli (Italia)}, CONFERENCE_DATE = {5-6/12/2016}, BOOKTITLE = {Proceedings CLiC-it 2016}, EDITOR = {Basile, P. and Corazza, A. and Cutugno, F. and Montemagni, S. and Nissim, M. and Patti, V. and Semeraro, G. and Sprugnoli, R.}, } @INPROCEEDINGS{GIANNINI_2016_INPROCEEDINGS_GBGP_348024, AUTHOR = {Giannini, S. and Biagioni, S. and Goggi, S. and Pardelli, G.}, TITLE = {Grey Literature citations in the age of Digital Repositories and Open Access}, YEAR = {2016}, ABSTRACT = {The work measures grey citations in the years 2012, 2013 and 2014 and then describes the features of GL documents cited in different areas of knowledge: Computational Linguistics, Computer Science and Engineering. With the aim of surveying a wide and varied range of resources, we selected a sample data based on the bibliographical references of articles contained in four journals - all indexed by Scopus Citation Database and ISI Web of Science, with an Impact Factor (IF) over the last three years - and two proceedings of international conferences held in 2012 and 2014.}, KEYWORDS = {Grey Literature Citations}, PAGES = {137-145}, URL = {https://publications.cnr.it/doc/348024}, VOLUME = {17}, ISBN = {978-90-77484-27-2}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature: A New Wave of Textual and Non-Textual Grey literature}, CONFERENCE_PLACE = {Amsterdam, NL}, CONFERENCE_DATE = {1-2 December 2015}, BOOKTITLE = {A New Wave of Textual and Non-Textual Grey literature}, } @INPROCEEDINGS{GOGGI_2016_INPROCEEDINGS_GPBFMMDB_350374, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2016}, ABSTRACT = {Here we present the final results of the MAPS (Marine Planning and Service Platform) project, an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. The system takes as input non-textual data (measurements) and text - both published papers and documentation - and it provides an advanced search facility thanks to the rich set of metadata and, above all, to the possibility of a refined and domain targeted key-word indexing of texts using Natural Language Processing (NLP) techniques. The paper describes the system in its details providing also evidence of evaluation.}, KEYWORDS = {Information Extraction, Search Engine, Operative Oceanography}, PAGES = {104-111}, URL = {https://publications.cnr.it/doc/350374}, VOLUME = {17}, ISBN = {978-90-77484-27-2}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {December 1st-2nd 2015}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KBM_366525, AUTHOR = {Khan, A. F. and Bellandi, A. and Monachini, M.}, TITLE = {Tools and Instruments for Building and Querying Diachronic Computational Lexica}, YEAR = {2016}, ABSTRACT = {This article describes work on enabling the addition of temporal information to senses of words in linguistic linked open data lexica based on the lemonDia model. Our contribution in this article is twofold. On the one hand, we demonstrate how lemonDia enables the querying of diachronic lexical datasets using OWL-oriented Semantic Web based technologies. On the other hand, we present a preliminary version of an interactive interface intended to help users in creating lexical datasets that model meaning change over time.}, KEYWORDS = {OWL-oriented Semantic Web based technologies}, PAGES = {164-171}, URL = {https://www.clarin-d.net/images/lt4dh/pdf/LT4DH22.pdf}, ISBN = {978-4-87974-708-2}, CONFERENCE_NAME = {Language Technology Resources and Tools for Digital Humanities (LT4DH 2016)}, CONFERENCE_PLACE = {Osaka, Japan}, CONFERENCE_DATE = {December 11-16, 2016}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KDM_355470, AUTHOR = {Khan, F. and Díaz Vera, J. E. and Monachini, M.}, TITLE = {Representing Polysemy and Diachronic Lexico-Semantic Data on the Semantic Web}, YEAR = {2016}, ABSTRACT = {In this article we will outline two different vocabularies, both extensions of the lemon model, for representing diachronic lexico-semantic data on the Semantic Web. This is especially useful for repre-senting the evolution of scientific terminologies where many terms are polysemous and or imported from other languages. The first vocabulary, polyLemon , allows for the representation of data about polysemy; the second, lemonDIA the representation of meaning shift over time.}, KEYWORDS = {Language Resources, Resource Data Framework (RDF)}, PAGES = {37-45}, URL = {http://ceur-ws.org/Vol-1595/paper4.pdf}, VOLUME = {1595}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Second International Workshop on Semantic Web for Scientific Heritage co-located with 13th Extended Semantic Web Conference (ESWC 2016)}, CONFERENCE_PLACE = {Heraklion, Greece}, CONFERENCE_DATE = {May 30th, 2016}, BOOKTITLE = {SWASH 2016 Semantic Web for Scientific Heritage Proceedings of the Second International Workshop on Semantic Web for Scientific Heritage co-located with 13th Extended Semantic Web Conference (ESWC 2016)}, EDITOR = {Draelants, I. and Zucker, C. F. and Monnin, A. and Zucker, A.}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KDM_355442, AUTHOR = {Khan, F. and Díaz Vera, J. and Monachini, M.}, TITLE = {The Representation of an Old English Emotion Lexicon as Linked Open Data}, YEAR = {2016}, ABSTRACT = {We present the ongoing conversion of a lexicon of emotion terms in Old English (OE) into RDF using an extension of lemon called lemonDIA and which we briefly describe. We focus on the translation of the subset of the lexicon dealing with terms for shame and guilt and give a number of illustrative example.}, KEYWORDS = {Linguistic Linked Open Data, Old English, Lexicon}, PAGES = {73-76}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, BOOKTITLE = {LDL 2016 5th Workshop on Linked Data in Linguistics: Managing, Building and Using Linked Language Resources}, EDITOR = {McCrae, J. P. and Chiarcos, C. and Ponsoda, E. M. and Declerck, T. and Osenova, P. and Hellmann, S.}, } @INPROCEEDINGS{MONTEMAGNI_2016_INPROCEEDINGS_M_372010, AUTHOR = {Montemagni, S.}, TITLE = {Preface}, YEAR = {2016}, ABSTRACT = {Our very warm welcome to CLiC - it 2016 (http://clic - it2016.dieti.unina.it/), the 3 rd edition of the Italian Conference on Computational Linguistics , held on December 5 th and 6 th , in Naples, Italy, co - located with Evalita 2016 ( http://www.evalita.it/2016 ), hosted and locally organized by Università Federico II, one the oldest public and laic universities in the world. The organization of the conference is the result of a fruitful conjoint effort of different research groups (Istituto di Linguistica Computazionale "Antonio Zampolli" del CNR, Università degli Studi di Bari Aldo Moro and Università degli Studi di Napoli Federico II) showing the nationwide spreading of Computational Linguistics in Italy. The CLiC - it conference series is organized by the Italian Association for Computational Linguistics (AILC) and has clearly established itself as the premier national forum for research and development in the fields of Computational Linguistics (CL) and Natural Language Processing (NLP), where leading researchers and practitioners from academia and industry meet to share their challenges, solutions, research results, and experiences. CLiC - it covers all aspects of computational linguistics and natural language (both written and spoken) processing, and targets state - of - art theoretical results, experimental methodologies, technologies, as well as application perspectives, which may contribute to advance the field.}, KEYWORDS = {Computational Linguistics Natural Language Processing Speech Tools for Italian CLiC-it EVALITA}, URL = {https://publications.cnr.it/doc/372010}, VOLUME = {1749}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, CONFERENCE_NAME = {CLiC-it \& EVALITA 2016}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-7/12/2016}, BOOKTITLE = {Proceedings CLiC-it 2016 and EVALITA 2016}, EDITOR = {Montemagni, S.}, } @INPROCEEDINGS{NAHLI_2016_INPROCEEDINGS_NFMKZK_355436, AUTHOR = {Nahli, O. and Frontini, F. and Monachini, M. and Khan, F. and Zarghili, A. and Khalfi, M.}, TITLE = {Al Qamus al Muhit, a Medieval Arabic Lexicon in LMF}, YEAR = {2016}, ABSTRACT = {This paper describes the conversion into LMF, a standard lexicographic digital format of 'al-q?m?s al-mu???, a Medieval Arabic lexicon. The lexicon is first described, then all the steps required for the conversion are illustrated. The work is will produce a useful lexicographic resource for Arabic NLP, but is also interesting per se, to study the implications of adapting the LMF model to the Arabic language. Some reflections are offered as to the status of roots with respect to previously suggested representations. In particular, roots are, in our opinion are to be not treated as lexical entries, but modeled as lexical metadata for classifying and identifying lexical entries. In this manner, each root connects all entries that are derived from it.}, KEYWORDS = {Arabic Lexicon, LMF, Al Qamus al Muhi}, PAGES = {943-950}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{ORLETTI_2016_INPROCEEDINGS_ODI_366723, AUTHOR = {Orletti, F. and Dell'Orletta, F. and Iovino, R.}, TITLE = {La leggibilità dei testi di ambito medico rivolti al paziente: Il caso dei bugiardini di farmaci senza obbligo di prescrizione medica}, YEAR = {2016}, ABSTRACT = {In this paper we present the first results of an exploratory analysis of simplification of the package leaflets of medicines, considered representative texts of doctor-patient communication. It will be shown how natural language processing tools can be used to reconstruct the linguistic profile of these texts and to guide their simplification.}, KEYWORDS = {leggibilità}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009291162\&origin=inward}, VOLUME = {1749}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-6/12/2016}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{PARDELLI_2016_INPROCEEDINGS_PGGB_355458, AUTHOR = {Pardelli, G. and Goggi, S. and Giannini, S. and Biagioni, S.}, TITLE = {Two decades of terminology: European framework programmes titles}, YEAR = {2016}, ABSTRACT = {This work analyses a corpus made of the titles of research projects belonging to the last four European Commission Framework Programmes (FP4, FP5, FP6, FP7) during a time span of nearly two decades (1994-2012). The starting point is the idea of creating a corpus of titles which would constitute a terminological niche, a sort of "cluster map" offering an overall vision on the terms used and the links between them. Moreover, by performing a terminological comparison over a period of time it is possible to trace the presence of obsolete words in outdated research areas as well as of neologisms in the most recent fields. Within this scenario, the minimal purpose is to build a corpus of titles of European projects belonging to the several Framework Programmes in order to obtain a terminological mapping of relevant words in the various research areas: particularly significant would be those terms spread across different domains or those extremely tied to a specific domain. A term could actually be found in many fields and being able to acknowledge and retrieve this cross-presence means being able to linking those different domains by means of a process of terminological mapping.}, KEYWORDS = {Terminology Extraction, Natural Language Processing, Terminological Comparison}, PAGES = {373-378}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {LREC 2016-Tenth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 May}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{PESINI_2016_INPROCEEDINGS_PDB_288074, AUTHOR = {Pesini, L. and Del Grosso, A. M. and Bozzi, A.}, TITLE = {F. de Saussure e la linguistica romanza. Un'applicazione web per l'edizione elettronica dei manoscritti}, YEAR = {2016}, ABSTRACT = {Se il ruolo di F. de Saussure nella linguistica indoeuropea e nella linguistica generale è ben noto, il suo interesse nell'ambito della linguistica romanza è stato quasi del tutto trascurato dagli studiosi. Fino ad oggi è stato difficile valutare il contributo di Saussure alla romanistica, dato che i suoi lavori (riguardanti soprattutto i patois francesi e la toponomastica) restano in gran parte inediti.Soltanto nel quadro di una nuova edizione critica digitale di tutte le opere di Saussure sarà possibile apprezzare il suo profondo interesse per la filologia romanza e far luce sul ruolo che lo studio dell'evoluzione delle lingue neolatine ebbe nella riflessione generale di questo linguista, sia comparatista che romanista.}, KEYWORDS = {Computational and collaborative philology, digital humanities, software engineering}, PAGES = {239-254}, URL = {http://www.atilf.fr/cilpr2013/actes/section-16/CILPR-2013-16-Pesini-Bozzi-Del_Grosso.pdf}, PUBLISHER = {Société de linguistique romane/ÉLiPhi (Strasbourg, FRA)}, ISBN = {979-10-91460-32-3}, CONFERENCE_NAME = {XXVIIe Congrès international de linguistique et de philologie romanes}, CONFERENCE_PLACE = {Nancy}, CONFERENCE_DATE = {15-20 juillet 2013}, EDITOR = {Buchi, É. and Chauveau, J. and Pierrel, J. M.}, } @INPROCEEDINGS{PICCINI_2016_INPROCEEDINGS_PBBG_388601, AUTHOR = {Piccini, S. and Bellandi, A. and Benotto, G. and Giovannetti, E.}, TITLE = {La modellazione diacronica di risorse termino-ontologiche nell'ambito delle digital humanities: Esperimenti su clavius}, YEAR = {2016}, ABSTRACT = {In this work, we present an experiment in the modeling of a diachronic termino-ontological resource named CLAVIUS through both the N-ary relations model and the 4D-fluents approach. Some of the salient differences of these two models are discussed. The overall objective of this research is to illustrate the main advantages and disadvantages in the adoption of a given model to build diachronic resources.}, KEYWORDS = {Diachronic terminology, termino-ontological resources, n-ary models, perdurantist models}, PAGES = {205-245}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009289234\&origin=inward}, VOLUME = {1749}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {9788899982089}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {Third Italian Conference on Computational Linguistics}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{PICCINI_2016_INPROCEEDINGS_PGBR_282572, AUTHOR = {Piccini, S. and Giovannetti, E. and Bellandi, A. and Ruimy, N.}, TITLE = {Le lexique électronique de la terminologie de Ferdinand de Saussure: une première}, YEAR = {2016}, ABSTRACT = {Le Projet italien intitulé "Per un'edizione digitale dei manoscritti di Ferdinand de Saussure" a pour objectif la création d'un prototype d'édition numérique des textes du grand linguiste genevois, réalisé à partir d'une sélection de ses documents autographes. Dans ce projet, l'un des volets novateurs est la création du premier thésaurus-lexique électronique de la terminologie linguistique Saussurienne. Afin de réaliser cette base de connaissance, nous nous sommes inspirés du modèle lexical SIMPLE qui a permis le développement de vastes lexiques sémantiques informatisés, et l'avons adapté aux exigences de notre domaine de la connaissance. Le lexique réalisé a été conçu dans le but d'offrir un outil performant pour les études saussuriennes. Il fournit en effet une représentation structurée de la terminologie de Saussure, une définition du contenu sémantique de chacun des termes ainsi qu'un cadre explicite de la nature et de l'importance des liens qui les unissent. Ce faisceau d'informations devrait donc contribuer de manière significative à mieux maîtriser le vocabulaire saussurien et, partant, à éclairer certains aspects originaux de la pensée du père de la linguistique moderne.}, KEYWORDS = {Saussure, lessici computazionali, ontologie}, PAGES = {255-267}, URL = {http://www.atilf.fr/cilpr2013/actes/section-16/CILPR-2013-16-Piccini-Giovannetti-Bellandi-Ruimy.pdf}, ISBN = {979-10-91460-32-3}, CONFERENCE_NAME = {XXVII Congrès international de linguistique et de philologie romanes}, CONFERENCE_PLACE = {Nancy}, CONFERENCE_DATE = {15-20/06/2013}, BOOKTITLE = {Section 16: Projets en cours; ressources et outils nouveaux}, EDITOR = {Trotter, D. and Bozzi, A. and Fairon, C.}, } @INPROCEEDINGS{PIERI_2016_INPROCEEDINGS_PBD_366724, AUTHOR = {Pieri, G. and Brunato, D. and Dell'Orletta, F.}, TITLE = {Studio sull'ordinamento dei costituenti nel confronto tra generi e complessità}, YEAR = {2016}, ABSTRACT = {In questo articolo presentiamo uno studio sull'ordine dei costituenti in italiano basato su corpora annotati in maniera automatica fino all'analisi sintattica a dipendenze. L'indagine comparativa ha permesso di valutare l'influenza sia del genere testuale sia della complessità linguistica nella distribuzione dei fenomeni di marcatezza sintattica.}, KEYWORDS = {Complessità linguistica, Corpora annotati, Generi testuali}, PAGES = {5}, URL = {http://ceur-ws.org/Vol-1749/paper44.pdf}, VOLUME = {1749}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it 2016)}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-6/12/2016}, EDITOR = {Basile, P. and Corazza, A. and Cutugno, F. and Montemagni, S. and Nissim, M. and Patti, V. and Semeraro, G. and Sprugnoli, R.}, } @INPROCEEDINGS{POPESCU_2016_INPROCEEDINGS_PLDCC_363731, AUTHOR = {Popescu, V. and Liu, L. and Del Gratta, R. and Choukri, K. and Calzolari, N.}, TITLE = {New Developments in the LRE Map}, YEAR = {2016}, ABSTRACT = {In this paper we describe the new developments brought to LRE Map, especially in terms of the user interface of the Web application, of the searching of the information therein, and of the data model updates. Thus, users now have several new search facilities, such as faceted search and fuzzy textual search, they can now register, log in and store search bookmarks for further perusal. Moreover, the data model now includes the notion of paper and author, which allows for linking the resources to the scientific works. Also, users can now visualise author-provided field values and normalised values. The normalisation has been manual and enables a better grouping of the entries. Last but not least, provisions have been made towards linked open data (LOD) aspects, by exposing an RDF access point allowing to query on the authors, papers and resources. Finally, a complete technological overhaul of the whole application has been undertaken, especially in terms of the Web infrastructure and of the text search backend.}, KEYWORDS = {Language resource, LRE Map, Information search and retrieval, Data modelling}, PAGES = {4526-4530}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/pdf/1256_Paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28/05/2016}, } @INPROCEEDINGS{RUSSO_2016_INPROCEEDINGS_RM_367412, AUTHOR = {Russo, I. and Monachini, M.}, TITLE = {Samskara minimal structural features for detecting subjectivity and polarity in Italian tweets}, YEAR = {2016}, ABSTRACT = {Sentiment analysis classification tasks strongly depend on the properties of the medium that is used to communicate opinionated content. There are some limitations in Twitter that force the user to exploit structural properties of this social network with features that have pragmatic and communicative functions. Samskara is a system that uses minimal structural features to classify Italian tweets as instantiations of a textual genre, obtaining good results for subjectivity classification, while polarity classification needs substantial improvements.}, KEYWORDS = {sentiment analysis, twitter}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009270160\&origin=inward}, VOLUME = {1749}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop EVALITA 2016}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {7/12/2016}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{SORIA_2016_INPROCEEDINGS_SRQHGST_355526, AUTHOR = {Soria, C. and Russo, I. and Quochi, V. and Hicks, D. and Gurrutxaga, A. and Sarhimaa, A. and Tuomisto, M.}, TITLE = {Fostering digital representation of EU regional and minority languages: the Digital Language Diversity Project}, YEAR = {2016}, ABSTRACT = {Poor digital representation of minority languages further prevents their usability on digital media and devices. The Digital Language Diversity Project, a three-year project funded under the Erasmus+ programme, aims at addressing the problem of low digital representation of EU regional and minority languages by giving their speakers the intellectual an practical skills to create, share, and reuse online digital content. Availability of digital content and technical support to use it are essential prerequisites for the development of language-based digital applications, which in turn can boost digital usage of these languages. In this paper we introduce the project, its aims, objectives and current activities for sustaining digital usability of minority languages through adult education.}, KEYWORDS = {Less-resourced languages, Language Technology, digital language vitality, digital language diversity}, PAGES = {3256-3260}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, } @INPROCEEDINGS{TUSA_2016_INPROCEEDINGS_TDMV_366754, AUTHOR = {Tusa, E. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Dieci sfumature di marcatezza sintattica: Verso una nozione computazionale di complessita}, YEAR = {2016}, ABSTRACT = {In this work, we will investigate whether and to what extent algorithms typically used to assess the reliability of the output of syntactic parsers can be used to study the correlation between processing complexity and the linguistic notion of markedness. Although still preliminary, achieved results show the key role of features such as dependency direction and length in defining the markedness degrees of a given syntactic construction.}, KEYWORDS = {marcatezza sintattica, complessità linguistica, annotazione linguistica automatica}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009279517\&origin=inward}, VOLUME = {1749}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-6 dicembre 2016}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{VALSECCHI_2016_INPROCEEDINGS_VABMP_356355, AUTHOR = {Valsecchi, F. and Abrate, M. and Bacciu, C. and Marchetti, A. and Piccini, S.}, TITLE = {Text Encoder and Annotator: an all-in-one editor for transcribing and annotating manuscripts with RDF}, YEAR = {2016}, ABSTRACT = {In the context of the digitization of manuscripts, transcription and annotation are often distinct, sequential steps. This could lead to difficulties in improving the transcribed text when annotations have already been defined. In order to avoid this, we devised an approach which merges the two steps into the same process. Text Encoder and Annotator (TEA) is a prototype application embracing this concept. TEA is based on a lightweight language syntax which annotates text using Semantic Web technologies. Our approach is currently being developed within the Clavius on the Web project, devoted to studying the manuscripts of Christophorus Clavius, an influential 16th century mathematician and astronomer.}, KEYWORDS = {RDF, Semantic Annotation, Semantic Web}, PAGES = {399-407}, URL = {https://publications.cnr.it/doc/356355}, VOLUME = {9989}, DOI = {10.1007/978-3-319-47602-5_52}, CONFERENCE_NAME = {European Semantic Web Conference 2016}, CONFERENCE_PLACE = {Heraklion, Greece}, CONFERENCE_DATE = {29/05-02/06-2016}, BOOKTITLE = {The Semantic Web}, } @INPROCEEDINGS{WEINGART_2016_INPROCEEDINGS_WG_364954, AUTHOR = {Weingart, A. and Giovannetti, E.}, TITLE = {A lexicon for Old Occitan medico-botanical terminology in lemon}, YEAR = {2016}, ABSTRACT = {The article presents the adaptation of the lemon model (a model for lexica as RDF data) for a multilingual and multi-alphabetical lexicon of Old Occitan medico-botanical terminology. The lexicon is the core component of an ontology-based information system that will be constructed and implemented within the DFG-funded project "Dictionnaire de Termes Médico-botaniques de l'Ancien Occitan" (DiTMAO). The difficulties for the lemmatization raised by the particularities of the corpus (terms in Latin, Hebrew and Arabic script and corresponding terms in other ancient languages, mostly Hebrew and Arabic) can be perfectly solved by extending the basic properties of lemon and introducing domain specific vocabulary.}, KEYWORDS = {lemon model, RDF, multilingual, multi-alphabetical, historical lexicon, medico-botanical terminology, Old Occitan, Hebrew, Arabic}, PAGES = {25-36}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84978818653\&origin=inward}, VOLUME = {1595}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Second International Workshop on Semantic Web for Scientific Heritage (SW4SH 2016)}, CONFERENCE_PLACE = {Héraklion, Greece}, CONFERENCE_DATE = {30/05/2016}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{WIELING_2016_INPROCEEDINGS_WSCM_359168, AUTHOR = {Wieling, M. and Sassolini, E. and Cucurullo, S. and Montemagni, S.}, TITLE = {ALT Explored: Integrating an Online Dialectometric Tool and an Online Dialect Atlas}, YEAR = {2016}, ABSTRACT = {In this paper, we illustrate the integration of an online dialectometric tool, Gabmap, together with an online dialect atlas, the Atlante Lessicale Toscano (ALT-Web). By using a newly created url-based interface to Gabmap, ALT-Web is able to take advantage of the sophisticated dialect visualization and exploration options incorporated in Gabmap. For example, distribution maps showing the distribution in the Tuscan dialect area of a specific dialectal form (selected via the ALT-Web website) are easily obtainable. Furthermore, the complete ALT-Web dataset as well as subsets of the data (selected via the ALT-Web website) can be automatically uploaded and explored in Gabmap. By combining these two online applications, macro- and micro-analyses of dialectal data (respectively offered by Gabmap and ALT-Web) are effectively and dynamically combined.}, KEYWORDS = {Lexicon, Lexical Database, Tools, Systems, Applications}, PAGES = {3265-3272}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {LREC 2016}, CONFERENCE_PLACE = {Portorož, Slovenia}, CONFERENCE_DATE = {23/10/2016}, } @INPROCEEDINGS{ARRIGONI_2016_INPROCEEDINGS_AKMB_363708, AUTHOR = {Arrigoni, S. and Khan, F. and Monachini, M. and Boschetti, F.}, TITLE = {Misurare Memorata Poetis: prime statistiche}, YEAR = {2016}, KEYWORDS = {intertestualità, temi e motivi}, PAGES = {151-155}, URL = {http://www.himeros.eu/aiucd2016/c47.pdf}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/9/2016}, BOOKTITLE = {AIUCD 2016-Book of Abstracts}, EDITOR = {Boschetti, F.}, } @INPROCEEDINGS{BARTOLINI_2016_INPROCEEDINGS_BPGGB_362848, AUTHOR = {Bartolini, R. and Pardelli, G. and Goggi, S. and Giannini, S. and Biagioni, S.}, TITLE = {A terminological "journey" in the Grey Literature domain}, YEAR = {2016}, ABSTRACT = {"When we read the articles or papers of a particular domain, we can recognize some lexical items in the texts as technical terms. In a domain where new knowledge is generated, new terms are constantly created to fulfil the needs of the domain, while others become obsolete. In addition, existing terms may undergo changes of meaning..." (Kageura K.,1998/1999). According to Kaugera, our aim with this work is to make a "journey" in the Grey Literature (GL) domain in order to offer an overall vision on the terms used and the links" "between them. Moreover, by performing a terminological comparison over a given period of time it could be possible to trace the presence of obsolete words as well as of neologisms in the most recent research fields.Within this scenario, the work analyzes a corpus constituted of the entire amount of full" "research papers published in the GL conference series over a time span of more than one decade (2003-2014) with the aim of creating a terminological map of relevant words. "... corpora used to extract terminological units can be further investigated to find semantic and conceptual information on terms or to represent conceptual relationships between terms. (Bourigault D. et al., 2001). Another interesting inquiry is the terminology used in the GL conferences for describing the types of documents (Pej?ová P. et al., 2012). The work is split up in four sections: creation of the corpus by acquiring the digital papers of GL conference proceedings (GL5 - GL16)1; data cleaning; data processing; terminological" "analysis and comparison. The corpus - made up of 231 research papers (for a total amount of 785.042 tokens) - was processed using a Natural Language Processing (NLP) tool for term extraction developed at the Institute of Computational Linguistics "Antonio Zampolli" of CNR (Goggi et al. 2015; 2016). This tool is what is called a "pipeline" (that is, a sequence of different tools) which extracts lexical knowledge from texts: in short, this is a rule system tool for knowledge extraction and document indexing that combines NLP technologies for term extraction and techniques to measure the associative strength of multi-words. This tool extracts a list of single (monograms) and multi-word terms (bigrams and trigrams) ordered by frequency with respect to the context. The pipeline - used as semantic engine within the MAPS project - has been customized for the extraction of terms from our corpus. This survey on the results of the information extraction process performed by the described NLP tool has been a sort of linguistic path in the past and present of terminology used in GL proceedings. By means of samplings, it has been possible to obtain the terminological flow in GL domain and to determine if and how the lexicon was evolving over these twelve years and investigate on its dynamic nature.}, KEYWORDS = {Grey Literature, Digital Repositories, Open Access}, PAGES = {79-84}, URL = {https://publications.cnr.it/doc/362848}, VOLUME = {18}, ISBN = {978-90-77484-29-6}, CONFERENCE_NAME = {GL18-Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {New York, US}, CONFERENCE_DATE = {28-29 November 2016}, BOOKTITLE = {Leveraging Diversity in Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{DELGROSSO_2016_INPROCEEDINGS_DBMG_360640, AUTHOR = {Del Grosso, A. M. and Boschetti, F. and Marchi, S. and Giovannetti, E.}, TITLE = {Vantaggi dell'Astrazione attraverso l'Approccio Orientato agli Oggetti per il Digital Scholarly Editing}, YEAR = {2016}, KEYWORDS = {Object Oriented Design, ADT, Digital Textual Scholarship}, URL = {http://www.himeros.eu/aiucd2016/c33.pdf}, DOI = {10.6092/unibo/amsacta/5559}, ISBN = {978-88-942535-0-4}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/09/2016}, } @INPROCEEDINGS{DOLOWYRYBINSKA_2016_INPROCEEDINGS_DS_354799, AUTHOR = {Dolowy Rybinska, N. and Soria, C.}, TITLE = {Surveying the ethnolinguistic vitality of two regional collateral languages: the case of Kashubian and Piedmontese}, YEAR = {2016}, ABSTRACT = {The paper presents the results of a Polish-Italian research project concerning the vitality of two regional collateral languages: Kashubian in Poland and Piedmontese in Italy. Despite their diffeent status (Kashubian is a language recognised under the Polish law while Piedmontese is not), they are both perceived as dialects of the State language by the inhabitants of Poland and Italy. The status and prestige of both languages in their respective countries are low; consciousness about the importance of their maintenance within the communities and outside them is weakening. As they belong to the same language family as the dominant language they were/are treated as dialects of the State languages not worth of preservation. Current accounts of language vitality for Kashubian and Piedmontese are not entirely satisfactory in that they seem to overestimate the importance of the number of speakers over speakers' attitudes and stigma. In this paper, we will present the preliminary results of the survey, focusssing on the interdependence between actual and perceived use of the two languages on the one side, and different ethnolinguistic vitality parameters, such as self-assessment of language proficiency, awareness of the language institutional status and policies, attitudes towards the language, and language ideology. This research is a pilot study that aims to raise the discussion on current assessment of ethnolinguistic vitality and to broaden it to other languages that are contested, unrecognized or treated as dialects of the State languages.}, KEYWORDS = {regional and minority languages ethnolinguistic vitality study}, URL = {https://publications.cnr.it/doc/354799}, CONFERENCE_NAME = {Contested Languages in the Old World #2}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {5-6/05/2016}, } @INPROCEEDINGS{FRONTINI_2016_INPROCEEDINGS_FCG_357603, AUTHOR = {Frontini, F. and Carmen, B. and Ganascia, J. G.}, TITLE = {REDEN ONLINE: Disambiguation, Linking and Visualisation of References in TEI Digital Editions}, YEAR = {2016}, KEYWORDS = {entity linking, visualization, literary criticism, TEI}, URL = {http://dh2016.adho.org/abstracts/362}, CONFERENCE_NAME = {Digital Humanities 2016}, CONFERENCE_PLACE = {Jagiellonian University \& Pedagogical University, Kraków}, CONFERENCE_DATE = {11-16/07/2016}, BOOKTITLE = {Digital Humanities 2016: Conference Abstracts}, } @INPROCEEDINGS{MANZELLA_2016_INPROCEEDINGS_MBBDDFMMMNS_355476, AUTHOR = {Manzella, G. M. R. and Bartolini, R. and Bustaffa, F. and D'Angelo, P. and De Mattei, M. and Frontini, F. and Maltese, M. and Medone, D. and Monachini, M. and Novellino, A. and Spada, A.}, TITLE = {Marine Planning and Service Platform: Specific Ontology Based semantic Search Engine Serving Data Management and Sustainable Development}, YEAR = {2016}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is aiming at building a computer platform supporting a Marine Information and Knowledge System. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. In oceanography the cost related to data collection is very high and the new paradigm is based on the concept to collect once and re-use many times (for re-analysis, marine environment assessment, studies on trends, etc). This concept requires the access to quality controlled data and to information that is provided in reports (grey literature) and/or in relevant scientific literature. Hence, creation of new technology is needed by integrating several disciplines such as data management, information systems, knowledge management...}, KEYWORDS = {Marine Information, Knowledge System}, PAGES = {2}, URL = {http://meetingorganizer.copernicus.org/EGU2016/orals/20144}, VOLUME = {18}, PUBLISHER = {Copernicus GmbH (Katlenburg-Lindau, Germania)}, ISSN = {1607-7962}, CONFERENCE_NAME = {European Geosciences Union General Assembly (EGU 2016)}, CONFERENCE_PLACE = {Vienna, Austria}, CONFERENCE_DATE = {17-22 aprile 2016}, BOOKTITLE = {Geophysical research abstracts (Online)}, } @INPROCEEDINGS{MONACHINI_2016_INPROCEEDINGS_M_368274, AUTHOR = {Monachini, M.}, TITLE = {CLARIN-IT The Italian Common Language Resources and Technology Infrastructure CLARIN-IT: l'infrastruttura di ricerca per le scienze umane e sociali}, YEAR = {2016}, ABSTRACT = {The CLARIN-IT National Coordinator presented a keynote CLARIN-IT, l'Infrastruttura di Ricerca per le Scienze Umane e Sociali, in the 5th Annual Conference of the Associazione per l'Informatica Umanistica e la Cultura Digitale (AIUCD) held in Venezia from 7th to 9th September 2016. It is time for research infrastructures to be able to guarantee interoperability and integration between the instruments for philological studies and the instruments for the analysis of large textual corpora, breaking down the rigid barriers between digital and computational philology, on the one hand, and corpus linguistics on the other hand. Programma: https://docs.google.com/viewer?a=v\&pid=sites\&srcid=dW5pdmUuaXR8YWl1Y2QyMDE2fGd4OjIyMDhhMzk2ODk0MjUyNDQ}, KEYWORDS = {CLARIN-IT, scienze umane e sociali}, URL = {http://www.clarin-it.it/en/content/clarin-it-aiucd-2016}, CONFERENCE_NAME = {5th Annual Conference of the Associazione per l'Informatica Umanistica e la Cultura Digitale (AIUCD)}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {7th to 9th September 2016}, } @INPROCEEDINGS{MONACHINI_2016_INPROCEEDINGS_M_382195, AUTHOR = {Monachini, M.}, TITLE = {Infrastrutture e ricerca nel settore umanistico}, YEAR = {2016}, ABSTRACT = {L'informatica applicata allo studio del testo ha una lunga storia che parte dagli anni '50 dello scorso secolo. Nel corso del tempo allo sviluppo di risorse e strumenti prevalentemente pensati per l'analisi linguistica, come ad esempio la lemmatizzazione, si sono affiancati metodi, risorse e strumenti più squisitamente filologici, come la codifica delle varianti, i repertori digitali di molteplici edizioni del medesimo testo e gli strumenti per l'allineamento automatico delle stesse. Tuttavia la conoscenza di queste tecniche ed il loro utilizzo è ancora piuttosto limitato nel panorama degli studi italiani di filologia classica, nonostante alcune acquisizioni concettuali e tecniche di prima grandezza nel settore siano dovute a studiosi italiani. Il workshop si propone di discutere temi e problematiche attinenti la filologia digitale sulla scorta di una rassegna del settore iniziata all'università di Parma con la collaborazione del CNR-ILC di Pisa: o Motivazioni per l'adozione di tecniche di filologia digitale nel campo della ricerca e dell'insegnamento. Barriere al loro utilizzo. o I risultati di una rassegna sull'impiego di tecniche di filologia digitale da parte di studiosi italiani sulla base di un questionario o Diffusione della conoscenza del settore in Italia o Correnti e future tematiche di ricerca}, KEYWORDS = {Digital Humanities, Computational Philology}, URL = {https://publications.cnr.it/doc/382195}, CONFERENCE_NAME = {Utilizzo e diffusione di metodi, strumenti e tecnologie digitali per gli studi filologici: l'applicazione della filologia digitale al greco antico}, CONFERENCE_PLACE = {Parma}, CONFERENCE_DATE = {10. 10. 2016}, } @INPROCEEDINGS{MONACHINI_2016_INPROCEEDINGS_MEF_368272, AUTHOR = {Monachini, M. and Enea, A. and Frontini, F.}, TITLE = {CLARIN-IT: servizi per la comunità italiana delle scienze umane e sociali}, YEAR = {2016}, ABSTRACT = {CLARIN-IT -The Italian Common Language Resources and Technology Infrastructure: Monica Monachini - CLARIN Italian National Coordinator Alessandro Enea - Responsible of ILCforCLARIN \& contact person for IDEM Francesca Frontini - Standing Committee for CLARIN Technical Centres (SCCTC) ILC-CNR National Representative}, KEYWORDS = {CLARIN-IT, The Italian Common Language Resources and Technology Infrastructure}, URL = {http://www.clarin-it.it/en/content/clarin-it-idem-day-2016}, CONFERENCE_NAME = {CLARIN-IT @ IDEM Day 2016}, CONFERENCE_PLACE = {Roma [Università degli Studi di Roma Tre]}, CONFERENCE_DATE = {6-8 giugno 2016}, } @INPROCEEDINGS{NAHLI_2016_INPROCEEDINGS_NBAT_363709, AUTHOR = {Nahli, O. and Boschetti, F. and Arrigoni, S. and Tessarolo, L.}, TITLE = {Il corpus di testi arabi in Memorata Poetis}, YEAR = {2016}, KEYWORDS = {letteratura araba, temi e motivi}, PAGES = {157-162}, URL = {http://www.himeros.eu/aiucd2016/c03.pdf}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/9/2016}, BOOKTITLE = {AIUCD 2016-Book of Abstracts}, EDITOR = {Boschetti, F.}, } @INPROCEEDINGS{PARDELLI_2016_INPROCEEDINGS_PGMBR_362073, AUTHOR = {Pardelli, G. and Goggi, S. and Monachini, M. and Bartolini, R. and Russo, I.}, TITLE = {A Geographical Visualization of GL Community: a Snapshot}, YEAR = {2016}, ABSTRACT = {"Today, in the spirit of science, grey literature communities are called to demonstrate their know-how and merit to wider audiences" [Farace Dominic J., 2011]. This quotation stresses the important role of the several international organizations in producing and disseminating knowledge in the field of Grey Literature (GL): the paper aims to provide a first snapshot of the geographical distribution of GL organizations and their participation to the annual International Conference on Grey Literature over the time (in the period from 2003 to 2015). Nowadays a visual representation of data is often associated with the traditional statistical graphs, in particular for representing complex phenomena by means of maps and diagrams, which allow a deeper and more focused analysis of the data. In our case the geographical representation of stakeholders in government, academics, business and industry aims at visualizing the GL community across the globe: it concerns 675 organizations which over the years have contributed to the development of a common vision on the most pressing issues of the field by using new paradigms such as Open Acces and the social networks.}, KEYWORDS = {Geographical Visualization, Grey Literature}, PAGES = {67-67}, URL = {https://publications.cnr.it/doc/362073}, VOLUME = {18}, ISBN = {978-90-77484-29-6}, CONFERENCE_NAME = {Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {New York}, CONFERENCE_DATE = {November 28-29, 2016}, BOOKTITLE = {GL18 Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{RECCHIA_2016_INPROCEEDINGS_RFMD_362391, AUTHOR = {Recchia, V. and Ferro, M. and Maglie, R. and Dodaro, A.}, TITLE = {Readability of current patient information leaflets for informed consent in UK radiotherapy centers}, YEAR = {2016}, ABSTRACT = {Background: Guidelines on informed consent recommend the use of plain language and readability standards to enhance patient's comprehension, engagement and shared decision making. Aim: To assess the readability of current patient information leaflets (PILs) used for informed consent in radiotherapy. Methods: We evaluated PILs (n=38) from three radiation therapy centers in UK. They regard the most common radiation therapy techniques for different kinds of cancer and body disctricts, such as bladder, bowel, colo-rectum, brain, breast-chest, femal pelvis, prostate, lung, linphomas, stomach. We analyzed each text with Flesch-Kincaid (F-K) grade level, with higher numbers indicating harder-to-read text (from 0 = easy, to 25 = difficult). Then, we compared the related grade levels to the health literacy recommended standard of US grade level 5, indicating that patient education texts might be understood by a typical student in the US primary school. Results: Readibility is suboptimal for the analised PILs (red, green and blue points in the figure) and should be improved with respect to the international standard score (red dotted line in the figure). The results show a mean grade level equal to 8.1 (std = 0.8), thus suggesting the need of a 3-points decrease on average. Conclusion: Current PILs for informed consent in the three analised radiotherapy centers are hardly readable for the average patient. Although the readability scores achieved in the three centers is not very low, substantially higher readability scores should be achieved with novel PILs which explicitly discuss risks/benefits and other elements relevant for informed consent, and should be prepared by following standard recommendations of plain language.}, KEYWORDS = {Ethics and communication, Communicating Risk and Uncertainty, Health Literacy}, PAGES = {1}, URL = {http://www.communication.aau.dk/research/dihm/events/comet2016/}, CONFERENCE_NAME = {COMMUNICATION, MEDICINE AND ETHICS CONFERENCE 2016}, CONFERENCE_PLACE = {Aalborg, Denmark}, CONFERENCE_DATE = {4-6/6/2016}, } @INPROCEEDINGS{RIZZETTO_2016_INPROCEEDINGS_RTFPTBS_363705, AUTHOR = {Rizzetto, M. and Trevisiol, A. and Falcone, D. and Pilon, N. and Tomè, P. and Boschetti, F. and Springmann, U.}, TITLE = {Nuove frontiere delle Digital Humanities in classe: esperienze dal campo}, YEAR = {2016}, KEYWORDS = {didattica, ocr, latino umanistico}, PAGES = {119-122}, URL = {http://www.himeros.eu/aiucd2016/c32.pdf}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/9/2016}, BOOKTITLE = {AIUCD 2016-Book of Abstracts}, EDITOR = {Boschetti, F.}, } @INPROCEEDINGS{SASSOLINI_2016_INPROCEEDINGS_SCC_382394, AUTHOR = {Sassolini, E. and Cucurullo, S. and Cinini, A.}, TITLE = {I corpora digitali: dall'obsolescenza tecnologica, alla salvaguardia e alla condivisione}, YEAR = {2016}, ABSTRACT = {Il progetto di recupero, nato pochi anni fa come iniziativa fortemente voluta da ILC, prosegue oggi con la collaborazione di molte istituzioni pubbliche e private, impegnate sullo stesso fronte. Approccio al recupero a tappe: inizialmente lavorando su testi che erano stati prodotti per essere indicizzati con le prime procedure di analisi testuale presenti all'ILC sin dalla fine degli anni '70 del secolo scorso. Definizione di criteri da adottare per la scelta dei testi, basati sullo studio di casi significativi e sull'importanza dei materiali, spesso legati alla realizzazione di autorevoli progetti nazionali e internazionali.}, KEYWORDS = {recupero testi, conversione in formato XML, valorizzazione dei risultati}, PAGES = {1-3}, URL = {https://www.eventi.garr.it/it/conf16/home/materiali-conferenza-2016/paper}, CONFERENCE_NAME = {Conferenza GARR 2016-The CreActive Network}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {30/11/2016-02/12/2016}, } @INPROCEEDINGS{STANZIONE_2016_INPROCEEDINGS_SRMTBD_363706, AUTHOR = {Stanzione, A. and Re, G. and Mugelli, G. and Taddei, A. and Boschetti, F. and Del Gratta, R.}, TITLE = {Homeric Greek WordNet: costruire una risorsa lessico-semantica fra ricerca e didattica}, YEAR = {2016}, KEYWORDS = {wordnet, semantica, greco antico}, PAGES = {129-132}, URL = {http://www.himeros.eu/aiucd2016/c40.pdf}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/9/2016}, BOOKTITLE = {AIUCD 2016-Book of Abstracts}, EDITOR = {Boschetti, F.}, } @TECHREPORT{CARLINO_2016_TECHREPORT_C_483689, AUTHOR = {Carlino, M.}, TITLE = {Rapporto annuale 2015 del CNR-ILC}, YEAR = {2016}, ABSTRACT = {Rapporto Annuale 2015 del Cnr-Istituto di Linguistica Computazionale "Antonio Zampolli" (CNR-ILC)}, KEYWORDS = {CNR-ILC, Annual Report, Rapporto Annuale, ILC, Istituto di Linguistica Computazionale, Zampolli, Activity report}, PAGES = {1-50}, URL = {https://publications.cnr.it/doc/483689}, } @MISC{BARONI_2016_MISC_BA_483771, AUTHOR = {Baroni, P. and Affè, F.}, TITLE = {ILC4CLARIN Web Site}, YEAR = {2016}, ABSTRACT = {Sito Web dello ILC4CLARIN Centre at the Institute for Computational Linguistics, realizzato con WordPress, sviluppato in italiano e inglese}, KEYWORDS = {CLARIN, Metadata Providing Centre, Service Providing Centre}, URL = {https://ilc4clarin.ilc.cnr.it}, } @MISC{DELGROSSO_2016_MISC_D_390558, AUTHOR = {Del Grosso, A. M.}, TITLE = {il progetto "Cultura Digitale": Promozione alla cittadinanza Digitale}, YEAR = {2016}, ABSTRACT = {Il contributo presenta il lavoro di allineamento e traduzione svolto con strumenti digitali dagli studenti del Liceo Classico Medi-Livatino di San Marco dei Cavoti (BN) su una selezione di passi tratti dall'opera di Ippocrate "Sulle Arie, Sulle Acque, e Sui Luoghi".}, KEYWORDS = {digital humanities, cultura digitale, tools, scuola digitale}, URL = {https://publications.cnr.it/doc/390558}, } @MISC{DELGROSSO_2016_MISC_D_355220, AUTHOR = {Del Grosso, A. M.}, TITLE = {Modelli concettuali e architetture Object-Oriented per la progettazione e lo sviluppo di una Digital Scholarly Platform}, YEAR = {2016}, ABSTRACT = {"OOP e DH": incontro possibile? E' possibile modellare il dominio relativo allo studio scientifico del testo attraverso l'approccio Object-Oriented? Cosa implica, dal punto di vista metodologico e tecnologico, progettare e sviluppare strumenti modulari e riusabili per l'analisi scientifica di risorse testuali? L'intervento esaminerà questi e ad altri temi di natura ingegneristica nell'ambito delle Digital Humanities. Il seminario, quindi, ripercorrerà alcuni risultati ottenuti all'interno della linea di ricerca presente presso l'ILC-CNR orientata allo sviluppo di componenti software per sistemi Web di linguistica e filologia computazionale volti al trattamento di testi di tradizione medievale, a stampa e di autori moderni e contemporanei. La progettazione di strumenti computazionali nel dominio delle Digital Humanities deve necessariamente rispondere alle esigenze di diverse tipologie di utenti: 1) l'utente generico, inteso come persona curiosa oppure pubblico poco esperto che ha comunque competenze per arricchire il prodotto digitale; 2) l'utente accademico, inteso come lo studente, il docente, o il ricercatore, in grado di aumentare la significatività, l'espressività e la rilevanza della risorsa digitale, 3) il programmatore, inteso come sviluppatore di applicazioni nel campo dell'Informatica Umanistica. Durante il seminario si introdurranno le basi della modellazione Object-Oriented a partire da concreti casi di studio ed esperienze maturate in progetti di ricerca nazionali ed internazionali. In particolare, si illustreranno gli esiti di alcune iniziative: oil progetto "Clavius On the Web", finanziato dal Registro.it, che vede coinvolti due istituti del CNR di Pisa (IIT-CNR e ILC-CNR) e l'Archivio Storico della Pontificia Università Gregoriana (APUG); oil progetto "Greek into Arabic", finanziato dall'European Resource Council e diretto dalla Prof.ssa Cristina D'Ancona dell'Università di Pisa; oil progetto PRIN "Per un'edizione dei manoscritti di F. De Saussure", diretto dal Prof. Daniele Gambarara dell'Università della Calabria. oil progetto "Talmud" finanziato dal MIUR e partecipato dal CNR, dall'Unione delle Comunità Ebraiche e dal Collegio Rabbinico Italiano.}, KEYWORDS = {Object-Oriented, UML, Filologia Computazionale, Literary Computing, Digital Humanities}, URL = {http://www.labcd.unipi.it/seminari/angelo-mario-del-grosso-modelli-concettuali-e-architetture-object-oriented-per-la-progettazione-e-lo-sviluppo-di-una-digital-scholarly-platform/}, } @MISC{NAHLI_2016_MISC_N_390724, AUTHOR = {Nahli, O.}, TITLE = {Corpus dei testi arabi in "Memorata Poestis"}, YEAR = {2016}, ABSTRACT = {Corpus epigrafico arabo, per motivi didattici e scientifici, i testi sono stati vocalizzati e tradotti in italiano.}, KEYWORDS = {Poesia, Epigrafi, Memorata Poetis, lingua araba}, URL = {http://www.memoratapoetis.it/public/}, } @ARTICLE{ATTARDI_2015_ARTICLE_ABBCDMPSS_366713, AUTHOR = {Attardi, G. and Basile, V. and Bosco, C. and Caselli, T. and Dell'Orletta, F. and Montemagni, S. and Patti, V. and Simi, M. and Sprugnoli, R.}, TITLE = {State of the Art Language Technologies for Italian: The EVALITA 2014 Perspective}, YEAR = {2015}, ABSTRACT = {Shared task evaluation campaigns represent a well established form of competitive evaluation, an important opportunity to propose and tackle new challenges for a specific research area and a way to foster the development of benchmarks, tools and resources. The advantages of this approach are evident in any experimental field, including the area of Natural Language Processing. An outlook on state-of-the-art language technologies for Italian can be obtained by reflecting on the results of the recently held workshop "Evaluation of NLP and Speech Tools for Italian", EVALITA 2014. The motivations underlying individual shared tasks, the level of knowledge and development achieved within each of them, the impact on applications, society and economy at large as well as directions for future research will be discussed from this perspective.}, KEYWORDS = {Evaluation Campaign, Natural Language Processing, Dependency Parsing, Sentiment Analysis, Temporal Processing}, PAGES = {43-61}, URL = {https://publications.cnr.it/doc/366713}, VOLUME = {9}, DOI = {10.3233/IA-150076}, PUBLISHER = {Associazione Italiana per l'Intelligenza Artificiale (Bari, Italia)}, ISSN = {1724-8035}, JOURNAL = {Intelligenza Artificiale}, } @ARTICLE{BARBAGLI_2015_ARTICLE_BLDMV_357152, AUTHOR = {Barbagli, A. and Lucisano, P. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Il ruolo delle tecnologie del linguaggio nel monitoraggio dell'evoluzione delle abilità di scrittura: primi risultati}, YEAR = {2015}, ABSTRACT = {L'ultimo decennio ha visto l'affermarsi a livello internazionale dell'uso di tecnologie del linguaggio per lo studio dei processi di apprendimento. Questo contributo riporta i primi e promettenti risultati di uno studio interdisciplinare che si è avvalso di metodi e tecniche di analisi propri della linguistica computazionale, della linguistica e della pedagogia sperimentale. Lo studio, finalizzato al monitoraggio dell'evoluzione del processo di apprendimento della lingua italiana, è stato condotto a partire dalle produzione scritte di studenti della scuola secondaria di primo grado con strumenti di annotazione linguistica automatica e di estrazione di conoscenza e ha portato all'identificazione di un insieme di tratti qualificanti il processo di apprendimento linguistico.}, KEYWORDS = {evoluzione delle competenze linguistiche, Didattica Sperimentale, Estrazione di conoscenza, Annotazione linguistica automatica}, PAGES = {99-117}, URL = {https://journals.openedition.org/ijcol/326}, DOI = {10.4000/ijcol.326}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{DELGRATTA_2015_ARTICLE_DFKM_287051, AUTHOR = {Del Gratta, R. and Frontini, F. and Khan, F. and Monachini, M.}, TITLE = {Converting the PAROLE SIMPLE CLIPS Lexicon into RDF with lemon}, YEAR = {2015}, ABSTRACT = {This paper describes the publication and linking of (parts of) PAROLE SIMPLE CLIPS (PSC), a large scale Italian lexicon, to the Semantic Web and the Linked Data cloud using the lemon model. The main challenge of the conversion is discussed, namely the reconciliation between the PSC semantic structure which contains richly encoded semantic information, following the qualia structure of the Generative Lexicon theory and the lemon view of lexical sense as a reified pairing of a lexical item and a concept in an ontology. The result is two datasets: one consists of a list of lemon lexical entries with their lexical properties, relations and senses; the other consists of a list of OWL individuals representing the referents for the lexical senses. These OWL individuals are linked to each other by a set of semantic relations and mapped onto the SIMPLE OWL ontology of higher level semantic types.}, KEYWORDS = {lemon, linked data, generative lexicon, RDF, OWL, lexical resource}, PAGES = {387-392}, URL = {http://www.semantic-web-journal.net/content/converting-parole-simple-clips-lexicon-rdf-lemon-0}, VOLUME = {6}, DOI = {10.3233/SW-140168}, PUBLISHER = {IOS Press (Amsterdam, Paesi Bassi)}, ISSN = {1570-0844}, JOURNAL = {Semantic web (Print)}, } @ARTICLE{GIANNINI_2015_ARTICLE_GBGP_329507, AUTHOR = {Giannini, S. and Biagioni, S. and Goggi, S. and Pardelli, G.}, TITLE = {Mapping Italian grey communities: what is there beyond the Academy?}, YEAR = {2015}, ABSTRACT = {This research aims at verifying whether - and eventually how much - the grey literature available on the web is actually structured, accessible or even managed by systems dealing with its organization and aiming at its retrieval and storing. The utmost goal is to build up a map of non-academic communities and their mechanisms for managing, presenting and disseminating this type of material. It is a sort of journey among the streams of the Web, which channel meeting minutes, manifests, fliers, pictures, newspapers articles, journalistic services and audio/video material on various topics. These "grey" products - by conveying basic information about social and popular culture - store, represent and spread knowledge.}, KEYWORDS = {Italian Grey Literature A. 1 INTRODUCTORY AND SURVEY}, PAGES = {17-28}, URL = {http://www.greynet.org/thegreyjournal.html}, VOLUME = {11}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{GOGGI_2015_ARTICLE_GMFBPDBM_334894, AUTHOR = {Goggi, S. and Monachini, M. and Frontini, F. and Bartolini, R. and Pardelli, G. and De Mattei, M. and Bustaffa, F. and Manzella, G.}, TITLE = {Marine Planning and Service Platform (MAPS) An Advanced Research Engine for Grey Literature in Marine Science}, YEAR = {2015}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting a Marine Information and Knowledge System, as part of the data management activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. We will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced search engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the great impact that the processing, re-use as well as application of grey data have on societal needs/problems and their answers.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {171-178}, URL = {https://publications.cnr.it/doc/334894}, VOLUME = {11}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{GOGGI_2015_ARTICLE_GPGBB_329873, AUTHOR = {Goggi, S. and Pardelli, G. and Giannini, S. and Biagioni, S. and Battisti, M.}, TITLE = {La littérature grise des projets de recherche européens}, YEAR = {2015}, ABSTRACT = {Les projets scientifiques financés par la Commission européenne produisent de la littérature grise. Une étude menée en 2013 sur 226 projets CNR du 7e programme-cadre (2007-2013) a analysé la typologie, le format et la disponibilité des documents signalés sur le serveur Cordis (rapports de recherche et articles scientifiques) et les sites projets (contenant listes de partenaires, brochures, communiqués,...}, KEYWORDS = {Grey Literature. European Commission Projects}, PAGES = {34-34}, URL = {http://www.cairn.info/revue-i2d-information-donnees-et-documents-2015-1-p-34.htm}, VOLUME = {52}, DOI = {10.3917/i2d.151.0034}, PUBLISHER = {A. D. B. S (Paris, Francia)}, ISSN = {0012-4508}, JOURNAL = {I2D-Information, données \& documents. Pratiques \& recherches}, } @ARTICLE{MARZI_2015_ARTICLE_MP_346413, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {A Neuro-Computational Approach to Understanding the Mental Lexicon}, YEAR = {2015}, ABSTRACT = {Human lexical knowledge does not appear to be organised to minimise storage, but rather to maximise processing efficiency. The way lexical information is stored reflects the way it is dynamically processed, accessed and retrieved. A detailed analysis of the way words are memorised, of the dynamic interaction between lexical representations and distribution and degrees of regularity in input data, can shed some light on the emergence of structures and relations within fully-stored words. We believe that a bottom-up investigation of low-level memory and processing functions can help understand the cognitive mechanisms that govern word processing in the mental lexicon. Neuro-computational models can play an important role in this inquiry, as they help understand the dynamic nature of lexical representations by establishing an explanatory connection between lexical structures and processing models dictated by the micro-functions of human brain. Starting from some linguistic, psycholinguistic and neuro-physiological evidence supporting a dynamic view of the mental lexicon as an integrative system, we illustrate Temporal Self Organising-Maps (TSOMs), artificial neural networks that can model such a view by memorising time series of symbolic units (words) as routinized patterns of short-term node activation. On the basis of a simple pool of principles of adaptive Hebbian synchronisation, TSOMs can perceive possible surface relations between word forms and store them by partially overlapping activation patterns, reflecting gradient levels of lexical specificity, from holistic to decompositional lexical representations. We believe that TSOMs offer an algorithmic model of the emergence of high-level, global and language-specific morphological structure through the working of low-level, language-aspecific processing functions, thus promising to bridge the persisting gap between high-level principles of grammar architecture (lexicon vs. rules), computational correlates (storage vs. processing) and low-level principles and localisations of brain functions. Extensions of the current TSOM architecture are envisaged and their theoretical implications are discussed.}, KEYWORDS = {Mental lexicon dynamic storage parallel distributed processing hebbian learning temporal self-organising maps}, PAGES = {493-535}, URL = {http://jcs.snu.ac.kr/jcs/issue/vol16/no4/05+Marzi+and+Pirrelli.pdf}, VOLUME = {16}, PUBLISHER = {Institute for cognitive science, Seoul national university (Seoul, Corea del Sud)}, ISSN = {1976-6939}, JOURNAL = {Journal of cognitive science (Seoul. Online)}, } @ARTICLE{PICCINI_2015_ARTICLE_P_452104, AUTHOR = {Piccini, S.}, TITLE = {Transimpersonal constructions in Lithuanian: towards the emergence of Split Intransitivity}, YEAR = {2015}, ABSTRACT = {Transimpersonalines konstrukcijos pastaruoju metu yra patraukusios daugelio tyreju, ypac funkcines tipologijos specialistu, demesi, kadangi jos atlieka svarbu vaidmeni formuojantis dalinio intranzityvumo (ang. split intransitivity) modeliams ivairiose kalbose. Straipsnyje naujausiu pasiekimu sviesoje ivertinamos kai kurios lietuviu kalbos konstrukcijos, gramatikose laikomos beasmenemis (,,impersonalinemis"), meginant interpretuoti jas kaip transimpersonalines. Tyrimas atliktas is sinchronines perspektyvos, didziausia demesi skiriant dabartinei bendrinei kalbai, taciau tam tikrais atvejais lyginama ir su senosios lietuviu kalbos ir ypac tarmiu duomenimis. Analize remiasi fizine bukle nusakanciais veiksmazodziais, kurie priklausomai nuo reiksmes gali buti vartojami ivairiose sintaksinese konstrukcijose. Sinchroniname lygmenyje matomas skirtingas siu eksperienciniu veiksmazodziu elgesys gali buti projektuojamas diachronineje perspektyvoje. Taip galima geriau isryskinti ivairius reanalizes proceso, rodancio laipsniska raida dalinio intranzityvumo atsiradimo kryptimi, etapus. Kai kurie veiksmazodziai, atrodo, reanalizes kelyje yra pazenge gana toli, taciau tikrieji dalinio intranzityvumo modeliai dar nesusiformave. Analizuojamuju veiksmazodziu eksperienciniu argumentu subjekto statusas sintakses poziuriu tebelieka problemiskas.}, KEYWORDS = {verbi impersonali, codifica non canonica del soggetto, lituano, intransitività scissa}, PAGES = {19-55}, URL = {http://www.baltistica.lt/index.php/baltistica/article/view/2239/2214}, VOLUME = {50}, DOI = {10.15388/baltistica.50.1.2239}, PUBLISHER = {Mintis; [poi] Vilniaus universiteto leidykla (Vilnius, Lituania)}, ISSN = {0132-6503}, JOURNAL = {Baltistica (Print)}, } @INCOLLECTION{BRANDO_2015_INCOLLECTION_BFG_334082, AUTHOR = {Brando, C. and Frontini, F. and Ganascia, J.}, TITLE = {Disambiguation of Named Entities in Cultural Heritage Texts Using Linked Data Sets}, YEAR = {2015}, ABSTRACT = {This paper proposes a graph-based algorithm baptized REDEN for the disambiguation of authors' names in French literary criticism texts and scientific essays from the 19th century. It leverages knowledge from different Linked Data sources in order to select candidates for each author mention, then performs fusion of DBpedia and BnF individuals into a single graph, and finally decides the best referent using the notion of graph centrality. Some experiments are conducted in order to identify the best size of disambiguation context and to assess the influence on centrality of specific relations represented as edges. This work will help scholars to trace the impact of authors' ideas across different works and time periods.}, KEYWORDS = {Named-entity disambiguation Centrality Linked data Data fusion Digital humanities}, PAGES = {505-514}, URL = {http://link.springer.com/chapter/10.1007%2F978-3-319-23201-0_51}, VOLUME = {539}, DOI = {10.1007/978-3-319-23201-0_51}, ISBN = {978-3-319-23200-3}, BOOKTITLE = {New Trends in Databases and Information Systems}, EDITOR = {Morzy, T. and Valduriez, P. and Bellatreche, L.}, } @INCOLLECTION{MARCHI_2015_INCOLLECTION_M_344710, AUTHOR = {Marchi, S.}, TITLE = {GREEK INTO ARABIC, A RESEARCH INFRASTRUCTURE BASED ON COMPUTATIONAL MODULES TO ANNOTATE AND QUERY HISTORICAL AND PHILOSOPHICAL DIGITAL TEXTS Part ii. System components and features}, YEAR = {2015}, ABSTRACT = {Computer technology nowadays allows users to build simple and effective tools designed to meet the needs of researchers and institutions in various fields of research. Since its reation, the World Wide Web prompted the existence of an environment that breaks down the boundaries of time (i.e. synchronous activity) and space (i.e. location of activities), a prerequisite for the design of tools enabling the collaboration among users. Over the past years text processing systems have become part and parcel of the daily language of scholars working in the field of Humanities, despite some objections raised against this type of technology because of their apparent lack of simplicity of usage, appropriateness, and flexibility. Usage requires special attention with respect to the interface between the information system and the user, while appropriateness and flexibility have not been sufficiently taken into account, not to mention that they two desiderata almost seem to be in contrast to each other. Therefore, it is not easy to plan and implement a text processing system which is suitable for specific types of research and at the same time as flexible as to operate in various fields of research.}, KEYWORDS = {textual scholarship, Collaborative Application, web application}, PAGES = {43-56}, URL = {http://www.olschki.it/libro/9788822263933}, VOLUME = {60}, PUBLISHER = {Leo S. Olschki (Firenze, ITA)}, ISBN = {9788822263933}, BOOKTITLE = {Digital texts, translations, lexicons in a multi-modular web application: methods and samples}, EDITOR = {Bozzi, A.}, } @INCOLLECTION{MORGAVI_2015_INCOLLECTION_MNMCFCM_333210, AUTHOR = {Morgavi, G. and Nerino, R. and Marconi, L. and Cutugno, P. and Ferraris, C. and Cinini, A. and Morando, M.}, TITLE = {An Integrated Approach to the Well-Being of the Elderly People at Home}, YEAR = {2015}, ABSTRACT = {The paper presents the outline and the preliminary developments of NINFA (iNtelligent Integrated Network For Aged people), a project for the well-being of the elderly people at home. This architecture is based on a service platform suited for elder people called the Virtual Village Network, whose user interface allows to deliver different services at home, namely: user supervision, communication and interaction among users for social inclusion, exergame delivering, monitoring of the wellness status.}, KEYWORDS = {ICT platform, Wellness network services, 3D movement analysis, Linguistic and cognitive analysis, exergames, "at-home" technologies}, PAGES = {265-274}, URL = {https://publications.cnr.it/doc/333210}, VOLUME = {XIV}, DOI = {10.1007/978-3-319-18374-9_25}, ISBN = {978-3-319-18373-2}, BOOKTITLE = {Springer-Ambient Assisted Living-Italian Forum 2014}, EDITOR = {Andò, P. B. and Siciliano, P. P. and Marletta, P. V. and Monteriù, P. A.}, } @INCOLLECTION{PIRRELLI_2015_INCOLLECTION_PFM_330234, AUTHOR = {Pirrelli, V. and Ferro, M. and Marzi, C.}, TITLE = {Computational complexity of abstractive morphology}, YEAR = {2015}, ABSTRACT = {Abstractive and constructive approaches to word structure make radically different assumptions concerning nature and role of the building blocks that make up a speaker's morphological competence. In this contribution, we show that the two views are also computationally different. In particular, we contend that a number of problems arising in connection with a subsymbolic implementation of the constructive view (as epitomised by classical multi-layered perceptrons) are tackled effectively, or disappear altogether, in a neurally-inspired implementation of associative networks, resting on key-notions such as self-organization and emergence. A particular variant of Kohonen's Self-Organizing Map is introduced as a model to explore and assess the implications of an abstractive approach in terms of its computational complexity. Details of the model (Temporal Self-Organizing Map, TSOM) and experimental data are shown to illustrate the interplay between processing and storage in language acquisition.}, KEYWORDS = {Word processing, computational complexity, mental lexicon, dynamic memories, self-organisation, word structure, morphology}, PAGES = {141-166}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84938781714\&origin=inward}, DOI = {10.1093/acprof:oso/9780198723769.003.0008}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {978-0-19-872376-9}, BOOKTITLE = {Understanding and Measuring Mprphological Complexity}, EDITOR = {Baerman, M. and Brown, D. and Corbett, G. G.}, } @INCOLLECTION{SIMI_2015_INCOLLECTION_SMB_330110, AUTHOR = {Simi, M. and Montemagni, S. and Bosco, C.}, TITLE = {Harmonizing and merging Italian treebanks: Towards a merged Italian dependency treebank and beyond}, YEAR = {2015}, ABSTRACT = {In this paper we address the challenge of combining existing CoNLL-compliant dependency-annotated corpora with the final aim of constructing a bigger treebank for the Italian language. To this end, we defined amethodology formapping different annotation schemes, based on: (i)The analysis of similarities and differences of considered source and target dependency annotation schemes; (ii) The analysis of the performance of state of the art dependency parsers trained on the source and target treebanks; (iii) The mapping of the source annotation scheme(s) onto a set of target (possibly underspecified) data categories. This methodology was applied in two different case studies. The first one was aimed at constructing a "Merged Italian Dependency Treebank" (MIDT) starting from existing Italian dependency treebanks, namely TUT and ISST-TANL. The second case study, still ongoing, consists in the conversion of the MIDT resource into the Stanford Dependencies de facto standard with the final aim of developing an "Italian Stanford Dependency Treebank" (ISDT).}, KEYWORDS = {Harmonization and merging of resources, Italian, Dependency Treebank}, PAGES = {3-23}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84927143016\&partnerID=q2rCbXpz}, VOLUME = {589}, DOI = {10.1007/978-3-319-14206-7_1}, PUBLISHER = {Springer International Publishing (CH-6330 Cham (ZG), CHE)}, ISBN = {978-3-319-14205-0}, BOOKTITLE = {Harmonization and Development of Resources and Tools for Italian Natural Language Processing within the PARLI Project}, EDITOR = {Basili, R. and Bosco, C. and Delmonte, R. and Moschitti, A. and Simi, M.}, } @INCOLLECTION{SORIA_2015_INCOLLECTION_S_333636, AUTHOR = {Soria, C.}, TITLE = {Assessing the effect of official recognition on the vitality of minority and regional languages: a case study from Italy}, YEAR = {2015}, ABSTRACT = {In 1999, a rather controversial Italian law granted official recognition to twelve endangered regional and minority languages but denied it to others that were nevertheless also classed as endangered by UNESCO and the Ethnologue. This turn of events has produced a perfect scenario to assess the impact of language policies on protected languages and, at the same time, the effects of lack of official protection and recognition for languages that are denied such institutional support. This chapter presents the results of a survey carried out among speakers of these endangered languages. It assesses their vitality in terms of speaker numbers, domains of use, intergenerational transmission and speaker attitudes, arguing that a correlation can be established, on the one hand, between positive speaker attitudes and favourable language policies and, on the other, between lack of policy support and negative language attitudes. The chapter further argues language policy can actually alter linguistic behaviour.}, KEYWORDS = {language policy, endangered languages, regional languages}, PAGES = {123-137}, URL = {https://publications.cnr.it/doc/333636}, PUBLISHER = {Cambridge university press (Cambridge, GBR)}, ISBN = {978-1-107-09922-7}, BOOKTITLE = {Policy and Planning for Endangered Languages}, EDITOR = {Jones, M. C.}, } @EDITORIAL{PIRRELLI_2015_EDITORIAL_PMF_329357, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, TITLE = {Proceedings of the NetWordS Final Conference on Word Knowledge and Word Usage: Representations and Processes in the Mental Lexicon}, YEAR = {2015}, ABSTRACT = {The international conference "Word Knowledge and Word Usage: Representations and processes in the mental lexicon" is the final outcome of 4 years of intense multi-disciplinary research networking and cooperation funded by the European Science Foundation within the framework of the NetWordS programme (May 2011 - April 2015). NetWordS' mission was to bring together experts of various research fields (from brain sciences and computing to cognition and linguistics) and of different theoretical inclinations, to advance the current awareness of theoretical, typological, psycholinguistic, computational and neurophysiological evidence on the structure and processing of words, with a view to developing novel research paradigms and bringing up a new generation of language scholars. The conference was intended to provide a first forum for assessing current progress of crossdisciplinary research on language architecture and usage, and discussing prospects of future synergy. People are known to memorise, parse and access words in a context-sensitive and opportunistic way, by caching their most habitual and productive processing patterns into routinized behavioural schemes. Speakers not only take advantage of token-based information such as frequency of individual, holistically stored words, but they are also able to organise stored words through paradigmatic structures (or word families) whose overall size and frequency is an important determinant of ease of lexical access and interpretation. Accordingly, lexical organisation is not necessarily functional to descriptive economy and minimisation of storage, but to more performance-oriented factors such as efficiency of memorisation, access and recall. Usage-based approaches to word processing lend support to this view, to promote explanatory frameworks that aim to investigate the stable correlation patterns linking distributional entrenchment of lexical units with productivity, internal structure and ease of interpretation. Ultimately, this is intended to establish a deep interconnection between performance-oriented,low-level lexical functions such as memorisation, rehearsal, access and recall, and their neuroanatomical correlates.}, KEYWORDS = {mental lexicon, linguistics, brain sciences, psycholinguistics, computing, cognition}, PAGES = {1-189}, URL = {http://ceur-ws.org/Vol-1347/}, VOLUME = {1347}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, } @INPROCEEDINGS{ALBANESI_2015_INPROCEEDINGS_ABBDG_332922, AUTHOR = {Albanesi, D. and Bellandi, A. and Benotto, G. and Di Segni, G. and Giovannetti, E.}, TITLE = {When Translation Requires Interpretation: Collaborative Computer-Assisted Translation of Ancient Texts}, YEAR = {2015}, ABSTRACT = {This paper introduces the main features of Traduco, a Web-based, collaborative Computer-Assisted Translation (CAT) tool developed to support the translation of ancient texts. In addition to the standard components offered by traditional CAT tools, Traduco includes a number of features designed to ease the translation of ancient texts, such as the Babylonian Talmud, posing specific structural, stylistic, linguistic and hermeneutical challenges.}, KEYWORDS = {Computer-Assisted Translation, Babylonian Talmud}, PAGES = {84-88}, URL = {https://publications.cnr.it/doc/332922}, ISBN = {978-1-941643-63-1}, CONFERENCE_NAME = {9th SIGHUM Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH 2015)}, CONFERENCE_PLACE = {Bejing}, CONFERENCE_DATE = {July 30, 2015}, BOOKTITLE = {Proceedings of the 9th SIGHUM Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities}, } @INPROCEEDINGS{BARBAGLI_2015_INPROCEEDINGS_BLDMV_357146, AUTHOR = {Barbagli, A. and Lucisano, P. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {CItA: un Corpus di Produzioni Scritte di Apprendenti l'Italiano L1 Annotato con Errori}, YEAR = {2015}, ABSTRACT = {In questo articolo presentiamo CItA il primo corpus di produzioni scritte di apprendenti l'italiano L1 del primo e del secondo anno della scuola secondaria di primo grado annotato con errori grammaticali, ortografici e lessicali. Le specificità del corpus e la sua natura diacronica lo rendono particolarmente utile sia per applicazioni linguistico-computazionali sia per studi socio-pedagogici.}, KEYWORDS = {Apprendiemento della lingua madre, evoluzione delle competenze linguistiche}, PAGES = {31-35}, URL = {http://www.italianlp.it/wp-content/uploads/2016/03/CItA_errori.pdf}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {2nd Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 dicembre 2015}, } @INPROCEEDINGS{BELLANDI_2015_INPROCEEDINGS_BBG_282565, AUTHOR = {Bellandi, A. and Bellusci, A. and Giovannetti, E.}, TITLE = {Computer Assisted Translation of Ancient Texts: the Babylonian Talmud Case Study}, YEAR = {2015}, ABSTRACT = {In this paper we introduce some of the features of the Computer Assisted Translation web application developed to support the translation of the Babylonian Talmud (BT) in Italian. The BT is a late antique Jewish anthological corpus, which, as other ancient texts, presents a number of hurdles related to its intrinsic linguistic and philological nature. In this work, we illustrate the solutions we adopted in the system, with particular emphasis on the Translation Memory and the translation suggestion component.}, KEYWORDS = {computer-assisted translation, Babylonian Talmud, Translation Memory}, PAGES = {287-302}, URL = {https://www.degruyter.com/view/book/9781501501289/10.1515/9781501501289.287.xml}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9781501501289}, CONFERENCE_NAME = {NLPCS 2014: 11th International Workshop on Natural Language Processing and Cognitive Science}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {27-29 ottobre 2014}, BOOKTITLE = {Natural Language Processing and Cognitive Science, Proceedings 2014}, EDITOR = {Sharp, B. and Delmonte, R.}, } @INPROCEEDINGS{BRANDO_2015_INPROCEEDINGS_BFG_344351, AUTHOR = {Brando, C. and Frontini, F. and Ganascia, J.}, TITLE = {Linked data for toponym linking in French literary texts}, YEAR = {2015}, ABSTRACT = {The present article discusses first experiments in toponym linking of Modern French digital editions aiming to provide an external referent to Linked Data sources. We have so far focused on testing two knowledge bases - French DBpedia and Geonames - for recall. Results highlight quality issues in these data sets for usage in NLP-tasks in domain-specific heritage texts.}, KEYWORDS = {Named-Entity Linking Linked Data Digital Humanities}, URL = {https://publications.cnr.it/doc/344351}, DOI = {10.1145/2837689.2837699}, PUBLISHER = {Association for Computing Machinery (New York, N. Y, Stati Uniti d'America)}, ISSN = {1933-7825}, ISBN = {978-1-4503-3937-7}, CONFERENCE_NAME = {GIR'15 9th Workshop on Geographic Information Retrieval}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {26-27th November, 2015}, BOOKTITLE = {GIR '15 Proceedings of the 9th Workshop on Geographic Information Retrieval}, EDITOR = {Purves, R. S. and Jones, C. B.}, } @INPROCEEDINGS{BRUNATO_2015_INPROCEEDINGS_BD_359256, AUTHOR = {Brunato, D. and Dell'Orletta, F.}, TITLE = {ISACCO: a corpus for investigating spoken and written language development in Italian school-age children}, YEAR = {2015}, ABSTRACT = {We present ISACCO (Italian school-age children corpus)1, a new corpus of oral and written retellings of Italian speaking children attending the primary school. All texts were digitalized and automatically enriched with linguistic information allowing preliminary explorations based on NLP features. Written retellings were also manually annotated with a typology of linguistic errors. The resource is conceived to support research and computational modeling of "later language acquisition", with an emphasis for comparative assessment of oral and written language skills across early school grades.}, KEYWORDS = {Child language acquisition, Oral and written language, multi-level linguistic analysis}, PAGES = {62-66}, URL = {http://www.italianlp.it/wp-content/uploads/2016/03/IsaccoCorpus.pdf}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics (CLiC-it 2015)}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {03/12/2015-04/12/2015}, BOOKTITLE = {Proceedings of the Second Italian Conference on Computational Linguistics (CLiC-it 2015)}, EDITOR = {Bosco, C. and Tonelli, S. and Zanzotto, F. M.}, } @INPROCEEDINGS{BRUNATO_2015_INPROCEEDINGS_BDVM_332693, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {Design and Annotation of the First Italian Corpus for Text Simplification}, YEAR = {2015}, ABSTRACT = {In this paper, we present design and construction of the first Italian corpus for automatic and semi--automatic text simplification. In line with current approaches, we propose a new annotation scheme specifically conceived to identify the typology of changes an original sentence undergoes when it is manually simplified. Such a scheme has been applied to two aligned Italian corpora, containing original texts with corresponding simplified versions, selected as representative of two different manual simplification strategies and addressing different target reader populations. Each corpus was annotated with the operations foreseen in the annotation scheme, covering different levels of linguistic description. Annotation results were analysed with the final aim of capturing peculiarities and differences of the different simplification strategies pursued in the two corpora.}, KEYWORDS = {Annotation Scheme, Automatic Text Simplification}, PAGES = {31-34}, URL = {https://aclweb.org/anthology/W/W15/W15-1604.pdf}, ISBN = {978-1-941643-47-1}, CONFERENCE_NAME = {Proceedings of LAW IX-The 9th Linguistic Annotation Workshop}, CONFERENCE_PLACE = {Denver, Colorado}, CONFERENCE_DATE = {5 giugno 2015}, } @INPROCEEDINGS{CHIARELLA_2015_INPROCEEDINGS_CBBCRZMC_336688, AUTHOR = {Chiarella, D. and Bibuli, M. and Bruzzone, G. and Caccia, M. and Ranieri, A. and Zereik, E. and Marconi, L. and Cutugno, P.}, TITLE = {Gesture-based Language for Diver-Robot Underwater Interaction}, YEAR = {2015}, ABSTRACT = {Underwater environment is characterized by harsh conditions and is difficult to monitor. The CADDY project deals with the development of a companion robot devoted to support and to monitor human operations and activities during the dive. In this scenario the communication and correct reception of messages between the diver and the robot are essential for success of the dive goals. However, the underwater environment poses a set of technical constraints hardly limiting the communication possibilities. For such reasons the solution proposed is to develop a communication language based on the consolidated and standardized diver gestures, commonly employed during professional and recreational dives, thus leading to the definition of a CADDY language, called CADDIAN, and a communication protocol. This article focuses on the creation of the language providing alphabet, syntax and semantics: future work will explain the part of recognition of gestures that is still in progress.}, KEYWORDS = {gesture language, human robot interaction, mobile robots, underwater environment, marine systems}, PAGES = {9}, URL = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=7271710\&filter=AND%28p_Publication_Number:7227859%29}, DOI = {10.1109/OCEANS-Genova.2015.7271710}, CONFERENCE_NAME = {OCEANS 2015 MTS/IEEE-Genova}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {18-21/05/2015}, } @INPROCEEDINGS{CHIARELLA_2015_INPROCEEDINGS_CCML_383465, AUTHOR = {Chiarella, D. and Cutugno, P. and Marconi, L. and Lucentini, R.}, TITLE = {Domain-specific languages: a gesture-based approach for Human Robot Interaction in underwater environments}, YEAR = {2015}, ABSTRACT = {This paper introduces a gesture-based language for Human Robot Interaction (HRI) specifically aimed to divers. Divers generally operate in environments with harsh conditions and, at the same time, difficult to monitor; in this scenario, any sudden event can create an emergency situation that may compromise the immersion or even turns into worse consequences involving the safety of divers themselves. To cope with such situations, standard procedures suggest to dive in pairs and to follow well-defined rules to avoid the risk of accidents. However, these procedures may not be sufficient to avoid dangerous events such as failure in the breathing apparatus, burst eardrum, decompression sickness and nitrogen narcosis. FP7 CADDY project was developed to overcome these problems, with the idea to transfer robotics technology in diving: the main aim is improving the level of safety during diving. CADDY project focuses, in fact, on the development of a companion robot designed to support human operations and activities during the dive, as well as to monitor the status of the diver and in such a way to prevent harmful events. Various problems have to be confronted to provide the diver a reliable and useful supporting robotic vehicle: one of them is the development of a communication and interaction methodology that allows the diver and the robot to cooperate actively for the fulfilment of tasks required when diving. Communication and correct reception of messages between the diver and underwater robot are essential for the success of the objectives of immersion. However, the underwater environment poses a number of difficult technical constraints limiting the possibilities of communication (electro-magnetic waves strong attenuation and signal scattering and dispersion). The most reliable solution for underwater communication is acoustic technology, with two main drawbacks: high prices of devices and very low data rates. To solve these issues, the solution proposed is the development of acommunication language (called CADDIAN) based, partly, on the consolidated and standardized diver gestures that are commonly employed during professional and recreational dives.}, KEYWORDS = {domain-specific languages, human robot interaction, gesture-based language, underwater communication}, PAGES = {12}, URL = {https://publications.cnr.it/doc/383465}, ISBN = {9789597152347}, CONFERENCE_NAME = {IX Conferencia Científica Internacional Lingüística}, CONFERENCE_PLACE = {La Habana Cuba}, CONFERENCE_DATE = {25-27/11/2015}, } @INPROCEEDINGS{CIGNONI_2015_INPROCEEDINGS_CFF_329387, AUTHOR = {Cignoni, L. and Fornaciari, G. and Fornaciari, A.}, TITLE = {Many hands make light work: collaborative CLIL activities for University courses in Medieval funerary archaeology}, YEAR = {2015}, ABSTRACT = {This paper describes the activities performed by the students of the course of funerary archaeology held at the Division of Palaeopathology of Pisa University in collaboration with the Institute for Computational Linguistics (ILC) of the National Research Council (CNR) in Pisa in the period April- June 2014. The lessons, which used a Content and Language Integrated Learning (CLIL) approach, were aimed at studying the funerary beliefs and burial practices in Italy and England in the Middle Ages. The 2014 course followed on from the courses of the year 2012 (focused on the more general issue of taphonomy; primary and secondary burials; single, double, or multiple burials), and 2013 (which examined the world of the ancient Romans and their burial customs of cremation and inhumation). The lessons were conducted by using extracts from self-contained specialized texts that were simple to read and that offered the basic concepts of medieval funerary archaeology. The students were supported by a reference text for funerary archaeology, which established the correct nomenclature to use when describing bodies, grave goods and tombs. Powerpoint slide presentations helped students break up the monotony of the text work and made the material more interesting and engaging. The slides were used to illustrate different types of burials in filled or empty spaces; the position of burials in both rural and urban environments; the disposition of the limbs in the burial; the rise of the Monasteries in the early Middle Ages and of the religious Orders of the Dominicans and Franciscans in the late Middle Ages. Each student was responsible for researching and reporting on a particular topic, and was supported by the use of information and communication techniques. Particular attention was devoted to the Books of Hours, important illuminated medieval manuscripts (containing psalms, short prayers and biblical quotations) that marked the different parts of the day and that were specifically composed for wealthy people. Classroom activities ranged from the simpler multi-matching and gap-filling exercises to the more complex tasks of providing definitions for given words, creating mind-maps, enriching a bilingual English-Italian glossary and providing contextualized examples for an English grammar book. Educational videos from the BBC or other channels and pertaining to the topics treated during the lessons were projected each time and were followed by direct questioning and more general conversation, to help students gain proficiency in oral communication. In the last three years, the Italian students from Pisa University have been working in collaboration with those of Ohio University on an excavation project carried out at the Field School in Medieval Archaeology and Bioarchaeology at Badia Pozzeveri (Lucca, Italy), to which the prestigious International journal SCIENCE dedicated a special issue and cover in December 2013. Finally, multidisciplinary elements were also included in the courses, by exploiting the information extracted from videos related to disciplines other than funerary archaeology, for example a BBC Channel 4 video describing the British meals of the day, the origins of which date back to medieval times.}, KEYWORDS = {CLIL, collaborative learning, medieval funerary archaeology, computer technology, archaeological field work}, PAGES = {2271-2279}, URL = {https://publications.cnr.it/doc/329387}, ISBN = {978-84-606-5763-7}, CONFERENCE_NAME = {9th International Technology, Education and Development Conference}, CONFERENCE_PLACE = {Madrid}, CONFERENCE_DATE = {2-4 marzo 2015}, } @INPROCEEDINGS{CRESCI_2015_INPROCEEDINGS_CCDT_337237, AUTHOR = {Cresci, S. and Cimino, A. and Dell'Orletta, F. and Tesconi, M.}, TITLE = {Crisis Mapping during Natural Disasters via Text Analysis of Social Media Messages}, YEAR = {2015}, ABSTRACT = {Recent disasters demonstrated the central role of social media during emergencies thus motivating the exploitation of such data for crisis mapping. We propose a crisis mapping system that addresses limitations of current state-of-the-art approaches by analyzing the textual content of disaster reports from a twofold perspective. A damage detection component employs a SVM classifier to detect mentions of damage among emergency reports. A novel geoparsing technique is proposed and used to perform message geolocation. We report on a case study to show how the information extracted through damage detection and message geolocation can be combined to produce accurate crisis maps. Our crisis maps clearly detect both highly and lightly damaged areas, thus opening up the possibility to prioritize rescue efforts where they are most needed.}, KEYWORDS = {crisis informatics, Emergency Management, geoparsing, social media mining, Twitter}, PAGES = {1-8}, URL = {https://publications.cnr.it/doc/337237}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, CONFERENCE_NAME = {Web Information Systems Engineering-WISE 2015}, CONFERENCE_PLACE = {Miami, USA}, CONFERENCE_DATE = {02/11/2015}, BOOKTITLE = {Lecture notes in computer science}, } @INPROCEEDINGS{CRESCI_2015_INPROCEEDINGS_CTCD_336952, AUTHOR = {Cresci, S. and Tesconi, M. and Cimino, A. and Dell'Orletta, F.}, TITLE = {A Linguistically-driven Approach to Cross-Event Damage Assessment of Natural Disasters from Social Media Messages}, YEAR = {2015}, ABSTRACT = {This work focuses on the analysis of Italian social media messages for disaster management and aims at the detection of messages carrying critical information for the damage assessment task. A main novelty of this study consists in the focus on out-domain and cross-event damage detection, and on the investigation of the most relevant tweet-derived features for these tasks. We devised different experiments by resorting to a wide set of linguistic features qualifying the lexical and grammatical structure of a text as well as ad-hoc features specifically implemented for this task. We investigated the most effective features that allow to achieve the best results. A further result of this study is the construction of the first manually annotated Italian corpus of social media messages for damage assessment.}, KEYWORDS = {crisis informatics, Damage assessment, Emergency Management, feature selection, social media mining, Social Sensing}, PAGES = {6}, URL = {https://publications.cnr.it/doc/336952}, CONFERENCE_NAME = {Proceedings of the 24th international conference companion on World Wide Web. ACM, 2015}, CONFERENCE_PLACE = {Florence, Italy}, CONFERENCE_DATE = {18/05/2015}, } @INPROCEEDINGS{CUTUGNO_2015_INPROCEEDINGS_CLMC_304735, AUTHOR = {Cutugno, P. and Lucentini, R. and Marconi, L. and Chiarella, D.}, TITLE = {Relaciones sin violencia: lenguaje, estereotipos y sexismo benévolo}, YEAR = {2015}, PAGES = {200-204}, URL = {https://publications.cnr.it/doc/304735}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {9789597174295}, CONFERENCE_NAME = {XIV Simposio Internacional de Comunicación Social: retos y perspectivas}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {19-23 gennaio 2015}, BOOKTITLE = {Comunicación Social: retos y perspectivas Vol. I°}, EDITOR = {Ruiz Miyares, L. and Muñoz Alvarado, A. and Alvarez Silva, M. R. and Pérez Joa, Y. and Jackson Rodríguez, D.}, } @INPROCEEDINGS{DELGRATTA_2015_INPROCEEDINGS_DFMPRBGKQSC_342213, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Goggi, S. and Khan, F. and Quochi, V. and Soria, C. and Calzolari, N.}, TITLE = {Visualising Italian Language Resources: a Snapshot}, YEAR = {2015}, ABSTRACT = {This paper aims to provide a first snapshot of Italian Language Resources (LRs) and their uses by the community, as documented by the papers presented at two different conferences, LREC2014 and CLiC-it 2014. The data of the former were drawn from the LOD version of the LRE Map, while those of the latter come from manually analyzing the proceedings. The results are presented in the form of visual graphs and confirm the initial hypothesis that Italian LRs require concrete actions to enhance their visibility.}, KEYWORDS = {Italian Language Resources}, PAGES = {100-104}, URL = {https://books.openedition.org/aaccademia/1277?lang=it}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 December 2015}, BOOKTITLE = {Proceedings of the Second Italian Conference on Computational Linguistics CLiC-it 2015}, EDITOR = {Bosco, C. and Tonelli, S. and Zanzotto, F. M.}, } @INPROCEEDINGS{FERRARI_2015_INPROCEEDINGS_FSGD_346045, AUTHOR = {Ferrari, A. and Spagnolo, G. O. and Gnesi, S. and Dell'Orletta, F.}, TITLE = {CMT and FDE: tools to bridge the gap between natural language documents and feature diagrams}, YEAR = {2015}, ABSTRACT = {A business subject who wishes to enter an established technological market is required to accurately analyse the features of the products of the different competitors. Such features are normally accessible through natural language (NL) brochures, or NL Web pages, which describe the products to potential customers. Building a feature model that hierarchically summarises the different features available in competing products can bring relevant benefits in market analysis. A company can easily visualise existing features, and reason about aspects that are not covered by the available solutions. However, designing a feature model starting from publicly available documents of existing products is a time consuming and error-prone task. In this paper, we present two tools, namely Commonality Mining Tool (CMT) and Feature Diagram Editor (FDE), which can jointly support the feature model definition process. CMT allows mining common and variant features from NL descriptions of existing products, by leveraging a natural language processing (NLP) approach based on contrastive analysis, which allows identifying domain-relevant terms from NL documents. FDE takes the commonalities and variabilities extracted by CMT, and renders them in a visual form. Moreover, FDE allows the graphical design and refinement of the final feature model, by means of an intuitive GUI}, KEYWORDS = {Software Product Lines, Variability Mining, Tools}, PAGES = {402-410}, URL = {http://dl.acm.org/citation.cfm?doid=2791060.2791117}, DOI = {10.1145/2791060.2791117}, ISBN = {978-1-4503-3613-0}, CONFERENCE_NAME = {19th International Conference on Software Product Line}, CONFERENCE_PLACE = {Nashville, TN, USA}, CONFERENCE_DATE = {20-24/07/2015}, } @INPROCEEDINGS{FERRO_2015_INPROCEEDINGS_FMP_331183, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Lexical parsability and morphological structure}, YEAR = {2015}, ABSTRACT = {A classical tenet in the psycholinguistic literature on the mental lexicon is that a parsed affix presents high activation levels (and thus contributes to activation spreading to other words with the same affix), and that such levels are tightly correlated with the affix productivity. In a number of influential papers, it has been suggested that parsability criteria interact with frequency to define morphological productivity in the lexicon. For example, the frequency of a derivative (e.g. government) relative to its base (govern) is shown to be a good predictor for parsability/productivity. The higher the frequency ratio, the more likely the morphological structure to be perceived, and the associated affix to be used productively. The present contribution intends to offer a computational explanatory basis for this correlational evidence, and assess its applicability to the acquisition of complex inflectional paradigms. In those languages, like Italian and German, whose inflection is stem-based rather than word-based, there is often no single paradigmatic form which can act as a base by being properly contained in all other inflected variants. Yet, it seems intuitive to suggest that verbs that are inflected for one paradigm cell only (e.g. neighbouring), are learned earlier and more easily but exhibit lower levels of perceived inflectional structure than verbs with richer paradigms. This appears to be in good accord with experimental evidence of time latencies in lexical decision, which are shown to correlate negatively with token frequency, paradigm size and paradigm entropy. Our simulations, based on Temporal Self-Organizing Maps (TSOMs) allow us to establish an interesting connection between inflectional parsability, frequency-based paradigm structure, and acquisitional constraints on the interaction between the human processor and working memory. Self-organising topological models of the mental lexicon can mimic the spatial and temporal organization of memory structures supporting the processing of symbolic sequences, and can provide an interesting framework for testing integrative accounts of lexical processing/acquisition as the complex result of general-purpose operations on word stimuli (e.g. working memory, long-term storage, sensory-motor mapping, rehearsal, unit integration, unit analysis, executive control, time-series processing), in line with recent acquisitions on the neuro-functional architecture of the perisylvian language network in the left hemisphere of human brain. Simulations of the incremental acquisition of "mini-paradigms" (small islands of morphological contrast encompassing up to three different forms for the same verb support the hypothesis that perception of structure (parsability) and morphological productivity strongly correlate in the inflectional lexica of German and Italian. In particular, by monitoring longitudinal progress in storage and generalisation of differently distributed inflectional paradigms in the two languages, we show that: i) high-frequency forms are stored and accessed significantly earlier than low-frequency forms; ii) deeply entrenched but paradigmatically isolated forms tend to block usage of other forms in the same paradigm; iii) low-frequency evenly distributed (highly entropic) intra-paradigmatic forms are acquired later but are easily extended. Our investigation credits the proposed computational framework with psycholinguistic plausibility, and grounds parsability-based models of morphological productivity on a specific, explicit proposal of lexical architecture. This provides an explanatory basis for both psycholinguistic and linguistic accounts of morphological structure, and offers an intermediate framework for scientific inquiry bridging the gap between linguistic units and functional units in neurosciences. Finally, it makes the interesting suggestion that principles of morpheme-based organisation of the mental lexicon are compatible with a learning strategy requiring memorisation of full forms.}, KEYWORDS = {morphological structure, word processing, token/type frequency}, PAGES = {22-37}, URL = {http://mmm.lis.upatras.gr/index.php/mmm/issue/view/293/showToc}, PUBLISHER = {Università degli Studi di Bologna (Bologna, Italia)}, ISSN = {1826-7491}, CONFERENCE_NAME = {Morphology and Semantics-Ninth Mediterranean Morphology Meeting}, CONFERENCE_PLACE = {Dubrovnik (Croatia)}, CONFERENCE_DATE = {15-18/09/2013}, BOOKTITLE = {Morphology and Semantics}, EDITOR = {Audring, J. and Koutsoukos, N. and Masini, F. and Raffaelli, I.}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FBG_307909, AUTHOR = {Frontini, F. and Boukhaled, M. A. and Ganascia, J.}, TITLE = {Linguistic Pattern Extraction and Analysis for Classic French Plays}, YEAR = {2015}, ABSTRACT = {Great authors of fiction and theatre have the capacity of creating memorable characters that take life and become almost as real as living persons to the readers/audience. The study of characterization, namely of how this is achieved, is a well-researched topic in corpus stylistics: for instance (Mahlberg, 2012) attempts to identify typical lexical patterns for memorable Dickens' characters by extracting those lexical bundles that stand out (namely are overrepresented) in comparison to a general corpus. In other works, authorship attribution methods are applied to the different characters of a play to identify whether the author has been able to provide each of them with a "distinct" voice. For instance (Vogel \& Lynch, 2008) compare individual Shakespeare characters against the whole play or even against all plays of the same author. The purpose of this paper is to propose a methodology for the study characterization of several characters in French plays of the classical period. The tools developed are meant to support textual analysis by: 1) Verifying the degree of characterization of each character with respect to others. 2) Automatically inducing a list of linguistic features that are significant, representative for that character. Preliminary investigations have been conducted on plays by Moliere, cross-comparing four protagonists from four different plays. The proposed methodology relies on sequential data mining for the extraction of linguistic patterns and on correspondence analysis for comparison of patterns frequencies in each character and for the visual representation of such differences.}, KEYWORDS = {computational stylometry, thater, sequential pattern mining}, PAGES = {3}, URL = {http://lipn.univ-paris13.fr/~charnois/conscilaGenres/resumes/frontini.pdf}, CONFERENCE_NAME = {Journée ConSciLa (Confrontations en Sciences du Langage) Grammaire des genres et des styles: quelles approches privilégier ?}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {16/01/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FBG_330648, AUTHOR = {Frontini, F. and Brando, C. and Ganascia, J.}, TITLE = {Semantic Web based Named Entity Linking for Digital Humanities and Heritage Texts}, YEAR = {2015}, ABSTRACT = {This paper proposes a graph based methodology for automatically disambiguating authors' mentions in a corpus of French literary criticism. Candidate referents are identified and evaluated using a graph based named entity linking algorithm, which exploits a knowledge-base built out of two different resources (DBpedia and the BnF linked data). The algorithm expands previous ones applied for word sense disambiguation and entity linking, with good results. Its novelty resides in the fact that it successfully combines a generic knowledge base such as DBpedia with a domain specific one, thus enabling the efficient annotation of minor authors. This will help specialists to follow mentions of the same author in different works of literary criticism, and thus to investigate their literary appreciation over time.}, KEYWORDS = {named-entity linking, linked data, digital humanities}, PAGES = {77-88}, URL = {http://ceur-ws.org/Vol-1364/paper9.pdf}, VOLUME = {Vol-1364}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {SW4SH 2015 Semantic Web for Scientific Heritage 2015}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {June, 1st 2015}, BOOKTITLE = {SW4SH 2015 Semantic Web for Scientific Heritage 2015}, EDITOR = {Zucker, A. and Draelants, I. and Zucker, C. F. and Monnin, A.}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FBG_331797, AUTHOR = {Frontini, F. and Brando, C. and Ganascia, J.}, TITLE = {Domain-adapted named-entity linker using Linked Data}, YEAR = {2015}, ABSTRACT = {We present REDEN, a tool for graph-based Named Entity Linking that allows for the disambiguation of entities using domain-specific Linked Data sources and different configurations (e.g. context size). It takes TEI-annotated texts as input and outputs them enriched with external references (URIs). The possibility of customizing indexes built from various knowledge sources by defining temporal and spatial extents makes REDEN particularly suited to handle domain-specific corpora such as enriched digital editions in the Digital Humanities.}, KEYWORDS = {named-entity disambiguation, evaluation, linked data, digital humanities}, PAGES = {10}, URL = {http://ceur-ws.org/Vol-1386/named_entity.pdf}, VOLUME = {Vol-1386}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Workshop on NLP Applications: Completing the Puzzle co-located with the 20th International Conference on Applications of Natural Language to Information Systems (NLDB 2015)}, CONFERENCE_PLACE = {Passau, Germany}, CONFERENCE_DATE = {June 17-19, 2015}, BOOKTITLE = {Proceedings of the Workshop on NLP Applications: Completing the Puzzle}, EDITOR = {Izquierdo, R.}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FQM_304304, AUTHOR = {Frontini, F. and Quochi, V. and Monachini, M.}, TITLE = {Generative Lexicon and polysemy: inducing logical alternations}, YEAR = {2015}, ABSTRACT = {The current paper brings together the results of a series of experiments for inducing regular sense alternations, or regular/ logical polysemy, from a computational lexicon based on the Generative Lexicon theory. The results are discussed in light of the potential benefits and uses of the amended algorithm.}, KEYWORDS = {Polysemy, Generative Lexicon, Logical Alternations}, PAGES = {7}, URL = {https://publications.cnr.it/doc/304304}, PUBLISHER = {MAPLEX2015 Multiple Approaches to Lexicon Conference (Yamagata, JPN)}, CONFERENCE_NAME = {MAPLEX2015 Multiple Approaches to Lexicon Conference}, CONFERENCE_PLACE = {Yamagata, Japan}, CONFERENCE_DATE = {February 9-10, 2015}, EDITOR = {Hsieh, S. and Kanzaki, K.}, } @INPROCEEDINGS{GIANNINI_2015_INPROCEEDINGS_GBGP_329374, AUTHOR = {Giannini, S. and Biagioni, S. and Goggi, S. and Pardelli, G.}, TITLE = {Mapping Italian grey communities: what is there beyond the Academy?}, YEAR = {2015}, ABSTRACT = {This research aims at verifying whether - and eventually how much - the grey literature available on the web is actually structured, accessible or even managed by systems dealing with its organization and aiming at its retrieval and storing. The utmost goal is to build up a map of non-academic communities and their mechanisms for managing, presenting and disseminating this type of material. It is a sort of journey among the streams of the Web, which channel meeting minutes, manifests, fliers, pictures, newspapers articles, journalistic services and audio/video material on various topics. These "grey" products - by conveying basic information about social and popular culture - store, represent and spread knowledge.}, KEYWORDS = {Italian Grey Literature A. 1 INTRODUCTORY AND SURVEY}, PAGES = {17-29}, URL = {http://www.textrelease.com/publications/proceedings.html}, VOLUME = {16}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-23-4}, CONFERENCE_NAME = {GL16-Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change}, CONFERENCE_PLACE = {Washington DC, USA (Library of Congress)}, CONFERENCE_DATE = {8-9 December 2014)}, BOOKTITLE = {Grey Literature Lobby: Engines and Requesters for Change}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GOGGI_2015_INPROCEEDINGS_GMFBPDBM_329370, AUTHOR = {Goggi, S. and Monachini, M. and Frontini, F. and Bartolini, R. and Pardelli, G. and De Mattei, M. and Bustaffa, F. and Manzella, G.}, TITLE = {Marine Planning and Service Platform (MAPS): An Advanced Research Engine for Grey Literature in Marine Science}, YEAR = {2015}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting a Marine Information and Knowledge System, as part of the data management activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. We will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced search engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the great impact that the processing, re-use as well as application of grey data have on societal needs/problems and their answers.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {108-114}, URL = {http://www.textrelease.com/gl16program.html}, VOLUME = {16}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-23-4}, CONFERENCE_NAME = {Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change}, CONFERENCE_PLACE = {Library of Congress Washington D. C., USA}, CONFERENCE_DATE = {December 8-9 2014}, BOOKTITLE = {Grey Literature Lobby: Engines and Requesters for Change}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GOGGI_2015_INPROCEEDINGS_GPSGB_318501, AUTHOR = {Goggi, S. and Pardelli, G. and Sassi, M. and Giannini, S. and Biagioni, S.}, TITLE = {A terminological survey on the titles of the Seventh Framework Programme (FP7)}, YEAR = {2015}, ABSTRACT = {This paper focuses on the automatic extraction of domain-specific knowledge from the European Commission projects of the 7th Framework Programme, hereinafter referred as FP7. The study is divided in three parts: the first part introduces the work starting from the building up of a corpus containing the titles of European Projects of the whole FP7 in order to obtain a relevant terminological sample for the different domains; the second describes software and methods while the third part focuses on the evaluation of results. Finally, we conclude by suggesting possible directions for further development of a comparison between terminological extraction from FP7 and FP5/FP6.}, KEYWORDS = {7th Framework Programme (FP7), Natural Language Processing, Terminology, Knowledge extraction, Grey Literature, I. 2. 7 Natural Language Processing. Text analysis, I. 2. 1 Applications and Expert Systems. Natural language interfaces}, PAGES = {223-227}, URL = {https://publications.cnr.it/doc/318501}, ISBN = {978-959-7174-28-8}, CONFERENCE_NAME = {Fourteenth International Symposium on Comunicación Social: retos y perspectivas}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {19-23 de enero 2015}, EDITOR = {Ruiz Miyares, L. and Álvarez Silva, M. R. and Muñoz Alvarado, A.}, } @INPROCEEDINGS{KHAN_2015_INPROCEEDINGS_KF_329646, AUTHOR = {Khan, F. and Frontini, F.}, TITLE = {Using Ontologies to Model Polysemy in Lexical Resources}, YEAR = {2015}, ABSTRACT = {In this article we look at how the use of ontologies can assist in analysing polysemy in natural languages. We develop a model, the Lexical-Sense-Ontology model (LSO), to represent the interaction between a lexicon and ontology, based on lemon. We use the LSO model to show how default rules can be used to represent semi-productivity in polysemy as well as discussing the kinds of ontological information that are useful for studying polysemy.}, KEYWORDS = {Polysemy, Ontology, Default Logic}, URL = {http://www.aclweb.org/anthology/W/W15/W15-0404.pdf}, CONFERENCE_NAME = {Workshop on Language and Ontologies}, CONFERENCE_PLACE = {London}, CONFERENCE_DATE = {14/04/2015}, BOOKTITLE = {Proceedings of the Workshop on Language and Ontologies}, } @INPROCEEDINGS{MARCONI_2015_INPROCEEDINGS_MCLCMM_304763, AUTHOR = {Marconi, L. and Cutugno, P. and Lucentini, R. and Chiarella, D. and Morgavi, G. and Morando, M.}, TITLE = {La tecnología como sostén de la organización de datos lingüísticos concernientes a las plantas medicinales}, YEAR = {2015}, PAGES = {605-609}, URL = {https://publications.cnr.it/doc/304763}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {9789597174301}, CONFERENCE_NAME = {XIV Simposio Internacional de Comunicación Social: retos y perspectivas}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {19-23 gennaio 2015}, BOOKTITLE = {Comunicación Social: retos y perspectivas Vol. II°}, EDITOR = {Ruiz Miyares, L. and Muñoz Alvarado, A. and Alvarez Silva, M. R. and Pérez Joa, Y. and Jackson Rodríguez, D.}, } @INPROCEEDINGS{MARZI_2015_INPROCEEDINGS_MFP_329352, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Lexical emergentism and the "frequency-by-regularity" interaction}, YEAR = {2015}, ABSTRACT = {In spite of considerable converging evidence of the role of inflectional paradigms in word acquisition and processing, little efforts have been put so far into providing detailed, algorithmic models of the interaction between lexical token frequency, paradigm frequency, paradigm regularity. We propose a neurocomputational account of this interaction, and discuss some theoretical implications of preliminary experimental results.}, KEYWORDS = {morphological strucutre, frequency distribution, temporal self-orgabnising maps}, PAGES = {37-41}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84927156830\&origin=inward}, VOLUME = {1347}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {NetWordS Final Conference on Word Knowledge and Word Usage: Representations and Processes in the Mental Lexicon}, CONFERENCE_PLACE = {Pisa (Italy)}, CONFERENCE_DATE = {30-31/03 01/04 2015}, BOOKTITLE = {Word Knowledge and Word Usage 2015}, EDITOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, } @INPROCEEDINGS{NAHLI_2015_INPROCEEDINGS_NM_342436, AUTHOR = {Nahli, O. and Marchi, S.}, TITLE = {Improved Written Arabic Word Parsing through Orthographic, Syntactic and Semantic constraints}, YEAR = {2015}, ABSTRACT = {The script-based and morphological characteristics of the Arabic language increase considerably the number of alternative analyses output by any morphological parser that does not use orthographic, syntactic and semantic constraints. In order to reduce time-wasting and error-prone proliferation of multiple outputs to be filtered in a post-processing phase, we have tried to optimize word processing by providing the morphological parser with multiple levels of information. We have operated at three such levels: orthography, morpho-syntax and semantics.}, KEYWORDS = {Arabic Language, Arabic NLP, Orthography, Morpho-syntax, Semantics}, PAGES = {210-214}, URL = {http://www.aaccademia.it/elenco-libri?aaref=CLIC_2015}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {9788899200626}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 Dicembre 2015}, } @INPROCEEDINGS{PIRRELLI_2015_INPROCEEDINGS_PNBDM_333414, AUTHOR = {Pirrelli, V. and Nahli, O. and Boschetti, F. and Del Gratta, R. and Marzi, C.}, TITLE = {Computational Linguistics and Language Physiology: Insights from Arabic NLP and Cooperative Editing}, YEAR = {2015}, ABSTRACT = {Computer processing of written Arabic raises a number of challenges to traditional parsing architectures on many levels of linguistic analysis. In this contribution, we review some of these core issues and the demands they make, to suggest different strategies to successfully tackle them. In the end, we assess these issues in connection with the behaviour of neuro-biologically inspired lexical architectures known as Temporal Self-Organising Maps. We show that, far from being language-specific problems, issues in Arabic processing can shed light on some fundamental characteristics of the human language processor, such as structure-based lexical recoding, concurrent, competitive activation of output candidates and dynamic selection of optimal solutions.}, KEYWORDS = {Non-concatenative morphology, Optical Character Recognition, WordNet, Temporal Self-organising Maps, Mental Lexicon, Language neuro-physiology}, PAGES = {1-8}, URL = {http://dl.acm.org/citation.cfm?id=2802612}, DOI = {10.1145/2802612.2802637}, ISBN = {978-1-4503-3295-8}, CONFERENCE_NAME = {Third AIUCD Annual Conference-Humanities and Their Methods in the Digital Ecosystem}, CONFERENCE_PLACE = {Bologna (IT)}, CONFERENCE_DATE = {18-19/09/2014}, BOOKTITLE = {Third AIUCD Annual Conference-Humanities and Their Methods in the Digital Ecosystem}, EDITOR = {Tomasi, F. and Del Turco, R. R. and Tammaro, A. M.}, } @INPROCEEDINGS{RICHTER_2015_INPROCEEDINGS_RCDV_357144, AUTHOR = {Richter, S. and Cimino, A. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Tracking the Evolution of Written Language Competence: an NLP-based Approach}, YEAR = {2015}, ABSTRACT = {In this paper, we present an NLP-based innovative approach for tracking the evolution of written language competence relying on different sets of linguistic features that predict text quality. This approach was tested on a corpus essays written by Italian L1 learners of the first and second year of the lower secondary school.}, KEYWORDS = {Evolution of Written Language Competence, multi-level linguistic analysis}, PAGES = {236-240}, URL = {http://www.italianlp.it/wp-content/uploads/2016/03/tracking-language-competence.pdf}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {2nd Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 dicembre 2015}, } @INPROCEEDINGS{RUSSO_2015_INPROCEEDINGS_RCM_332590, AUTHOR = {Russo, I. and Caselli, T. and Monachini, M.}, TITLE = {Extracting and Visualising Biographical Events from Wikipedia}, YEAR = {2015}, ABSTRACT = {This work presents a proposal for the development of a natural language processing module for event and temporal analysis of biographies as available in Wikipedia. At the current level of development, we restricted the extraction to temporally anchored events as they represent salient information which can be further used to extract additional events and facilitate their chronological ordering and the representation of a person's timeline. Visualising data about basic facts concerning groups of people helps with historical reasoning and enables comparisons among them.}, KEYWORDS = {mining biographies for structured information, visualising biographical data, temporal information}, PAGES = {111-115}, URL = {http://ceur-ws.org/Vol-1399/paper17.pdf}, VOLUME = {Vol-1399}, CONFERENCE_NAME = {BD2015 Biographical Data in a Digital World 2015}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {April 9, 2015}, BOOKTITLE = {BD2015 Biographical Data in a Digital World 2015}, EDITOR = {Braake, S. T. and Fokkens, A. and Sluijter, R. and Declerck, T. and Wandl Vogt, E.}, } @INPROCEEDINGS{RUSSO_2015_INPROCEEDINGS_RCS_331215, AUTHOR = {Russo, I. and Caselli, T. and Strapparava, C.}, TITLE = {SemEval-2015 Task 9: CLIPEval Implicit Polarity of Events}, YEAR = {2015}, ABSTRACT = {Sentiment analysis tends to focus on the po- larity of words, combining their values to de- tect which portion of a text is opinionated. CLIPEval wants to promote a more holistic approach, looking at psychological researches that frame the connotations of words as the emotional values activated by them. The implicit polarity of events is just one aspect of connotative meaning and we address it with a task that is based on a dataset of sentences annotated as instantiations of pleasant and un- pleasant events previously collected in psy- chological research as the ones on which human judgments converge.}, KEYWORDS = {sentiment analysis}, PAGES = {443-450}, URL = {http://alt.qcri.org/semeval2015/cdrom/pdf/SemEval077.pdf}, ISBN = {978-1-941643-40-2}, CONFERENCE_NAME = {Proceedings of SemEval-2015}, CONFERENCE_PLACE = {Denver, Colorado, USA}, CONFERENCE_DATE = {giugno 4-5, 2015}, } @INPROCEEDINGS{SORIA_2015_INPROCEEDINGS_S_332517, AUTHOR = {Soria, C.}, TITLE = {Towards a notion of "Digital Language Diversity"}, YEAR = {2015}, ABSTRACT = {This paper introduces the concept of digital language diversity and advocates for its increase in order to foster the digital vitality of languages, and secure their overall vitality.}, KEYWORDS = {digital language diversity, NLP, less-resourced languages, regional languages, minority languages, digital rights}, PAGES = {111-125}, URL = {https://publications.cnr.it/doc/332517}, CONFERENCE_NAME = {3rd International Conference on Linguistic and Culturaol Diversity in Cyberspace}, CONFERENCE_PLACE = {Yakutsk, Russian Federation}, CONFERENCE_DATE = {30/06/2014-03/07/2014}, BOOKTITLE = {Linguistic and Cultural Diversity in Cyberspace-Proceedings of the 3rd International Conference}, EDITOR = {Kuzmin, E. and Parshakova, A. and Ignatova, D.}, } @INPROCEEDINGS{VENTURI_2015_INPROCEEDINGS_VBDM_340387, AUTHOR = {Venturi, G. and Bellandi, T. and Dell'Orletta, F. and Montemagni, S.}, TITLE = {NLP-Based Readability Assessment of Health-Related Texts: a Case Study on Italian Informed Consent Forms}, YEAR = {2015}, ABSTRACT = {The paper illustrates the results of a case study aimed at investigating and enhancing the accessibility of Italian health-related documents by relying on advanced NLP techniques, with particular attention to informed consent forms. Results achieved show that the features automatically extracted from the linguistically annotated text and ranging across different levels of linguistic description have a high discriminative power in order to guarantee a reliable readability assessment.}, KEYWORDS = {Readability assessment, health-related information}, PAGES = {131-141}, URL = {http://www.aclweb.org/anthology/W15-2618}, ISBN = {978-1-941643-32-7}, CONFERENCE_NAME = {Sixth International Workshop on Health Text Mining and Information Analysis (Louhi)}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {17 settembre 2015}, } @INPROCEEDINGS{ALBANESI_2015_INPROCEEDINGS_ABBG_340309, AUTHOR = {Albanesi, A. and Bellandi, A. and Benotto, G. and Giovannetti, E.}, TITLE = {Translation, Annotation and Knowledge Modelling of the Babylonian Talmud: the Traduco System}, YEAR = {2015}, ABSTRACT = {In this work, we are going to present the Traduco System, a collaborative web-based application for the translation of the Babylonian Talmud (BT) into Italian. The System has been designed around a Computer-Assisted Translation (CAT) component, constituting its core. However, Traduco is not limited to assist the translation process and to provide printing functionalities. In fact, it allows linguistic and semantic annotations and advanced searches, paving the way to the construction of a talmudic knowledge base. In order to achieve these results, the Traduco development process abided by a model that took into account aspects of Natural Language Processing and Knowledge Engineering. The component based architectural structure was implemented using the object oriented Java 2 Enterprise Edition framework.}, KEYWORDS = {Computer-Assisted Translation, Interpretation, Semantic Annotation, Babylonian Talmud}, URL = {https://dh-abstracts.library.virginia.edu/works/2399}, CONFERENCE_NAME = {Digital Humanities 2015}, CONFERENCE_PLACE = {Sydney}, CONFERENCE_DATE = {29/06-03/07/2015}, } @INPROCEEDINGS{BOSCHETTI_2015_INPROCEEDINGS_BDDMDN_295474, AUTHOR = {Boschetti, F. and Del Gratta, R. and Del Grosso, A. and Monachini, M. and Diakoff, H. and Nahli, O.}, TITLE = {Collaborative Philology on the way to Web Services: the case of CoPhiWordnet}, YEAR = {2015}, ABSTRACT = {Starting from previous initiatives of the CoPhiLab, we show how they can be reinterpreted as Web Services, especially when they become part of a wider scenario: Web Services are used to make connections between lexicons, semantic resources and a fine grained text management. Linked Open Data is chosen to be the paradigm used to link the dierent resources, but also as the modality of data presentation.}, KEYWORDS = {Collaborative Philology, Web Services, Linked Open Data, Text Services, Text Interpretation}, URL = {http://langrid.org/wlsi2015/program.html}, CONFERENCE_NAME = {The Second International Workshop on Worldwide Language Service Infrastructure, WLSI 2015}, CONFERENCE_PLACE = {Kyoto}, CONFERENCE_DATE = {22-23rd January 2015}, } @INPROCEEDINGS{DEFELICE_2015_INPROCEEDINGS_D_300634, AUTHOR = {De Felice, I.}, TITLE = {GraDes: a corpus of grasp descriptions}, YEAR = {2015}, URL = {https://publications.cnr.it/doc/300634}, CONFERENCE_NAME = {AISV 2015}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {28-30/01/2014}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_F_315607, AUTHOR = {Frontini, F.}, TITLE = {What makes them different: the extraction of distinctive linguistic patterns for the protagonists of Molière's plays}, YEAR = {2015}, ABSTRACT = {Quantitative approaches to the study of style in literature are far from a modern novelty. They have however recently gained more and more popularity, not only among computer scientists and corpus linguistics, but also among some influential literary critics. The present panorama of quantitative techniques is very rich, but often confusing, with a plethora of denominations and methodologies often difficult to reconcile; computer scientists classify their work as stylometry or computational stylistics, while linguists may use the label corpus stylistics, and finally critics like Franco Moretti will talk about macro-analysis and distant reading. This talk will try first to identify the differences between these trends, distinguishing between corpus based and corpus driven approaches on the methodological side (Quiniou et al 2012), and (following Ramsey 2011) between experimental and hermeneutical approaches. Finally we will present ongoing work conducted at Labex OBVIL on syntactic pattern extraction from theatrical characters. The proposed approach, using correspondence analysis to extract distinctive traits for each character, is imagined rather as an hermeneutical tool, in the sense that it does not seek to demonstrate that two different characters have been endowed with significantly different stylistic traits by the playwright, but it does enable the visualisation of their relative distances and the extraction of those elements that make them distinct.}, URL = {https://publications.cnr.it/doc/315607}, CONFERENCE_NAME = {Cycle des séminaires ILES LIMSI}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {03/02/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_F_329647, AUTHOR = {Frontini, F.}, TITLE = {Analyse et extraction des motifs syntaxiques dans la prose de Robert Challe et de ses apocryphes}, YEAR = {2015}, ABSTRACT = {Cette contribution presente une extraction et une analyse des motifs syntaxiques dans la prose de Robert Challe et de ses apocryphes. En particulier nous analysons les différence dans la syntaxe des contes originaux des Illustres Françaises et celle des contes apocryphes.}, KEYWORDS = {Robert Challe, authorship attribution, stilistica computazionale}, URL = {http://obvil.paris-sorbonne.fr/sites/default/files/projets/analyse_motifs_syntaxiques_if_et_apocryphes.pdf}, CONFERENCE_NAME = {Robert Challe: approches numériques des questions d'auctorialité}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {28/03/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_F_332668, AUTHOR = {Frontini, F.}, TITLE = {Mining for characterising patterns in literature using correspondence analysis: an experiment on French novels}, YEAR = {2015}, ABSTRACT = {The talk presents and describes a bottom up methodology for the detection of stylistic traits in the syntax of literary texts. The extraction of syntactic patterns is performed blindly by a sequential pattern mining algorithm, while the identification of significant and interesting features is performed later by using correspondence analysis and filtering for the most contributive patterns.}, KEYWORDS = {computational stylistics, French}, URL = {https://publications.cnr.it/doc/332668}, CONFERENCE_NAME = {Göttingen Dialog in Digital Humanities}, CONFERENCE_PLACE = {Göttingen}, CONFERENCE_DATE = {14/07/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_F_336421, AUTHOR = {Frontini, F.}, TITLE = {Trattamento automatico del linguaggio per le Digital Humanities. Riconoscimento e disambiguazione di menzioni di autori in testi di critica letteraria}, YEAR = {2015}, ABSTRACT = {L'intervento scaturisce da una collaborazione tra ILC-CNR e il Labex OBVIL di Parigi. Lo scopo del progetto è quello di adattare ed estendere algoritmi di riconoscimento, classificazione e disambiguazione di entità nominate (in particolare menzioni di autori) nel "Corpus Critique", un insieme di testi di critica letteraria francese che il Labex OBVIL sta pubblicando in edizione digitale (formato TEI). Tali algoritmi si basano su approcci TAL supervisionati e non supervisionati e sfruttano massicciamente le basi di conoscenza, sia generiche (DBpedia) che di dominio, disponibili online sotto forma di linked data; lo scopo di tali lavori è di produrre risorse testuali annotate per facilitare la ricerca nell'ambito della storia della critica letteraria e della storia delle idee in generale. Durante il seminario verranno introdotti i formati e le risorse utilizzate, i criteri e le problematiche di annotazione emersi, e gli algoritmi riconoscimento e disambiguazione di entità nominate sviluppati. Più in generale si cercherà di mostrare con alcuni casi di utilizzo quali siano i vantaggi di arricchire risorse testuali con questo livello di annotazione, nel più ampio contesto delle convergenze tra digital humanities e trattamento automatico del linguaggio. Link http://obvil.paris-sorbonne.fr/ https://github.com/cvbrandoe/REDEN/blob/master/README.md}, KEYWORDS = {Named-entity disambiguation Centrality Linked data Data fusion Digital humanities}, URL = {https://publications.cnr.it/doc/336421}, CONFERENCE_NAME = {Seminario di Cultura Digitale}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {04/11/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FB_342185, AUTHOR = {Frontini, F. and Bénard, E.}, TITLE = {The Syntax of Stage. Studying Linguistic Patterns in Molière}, YEAR = {2015}, ABSTRACT = {Theatrical dialogue is a very peculiar type of communication, namely a written text that aims to mimic orality. Great playwrights use dialogue to create iconic human types, that actors then bring to life. Characterisation, comical effects and other plot devices are often achieved through the use of specific linguistic patterns. For this reason theatrical dialogue is an interesting test bed for computer-aided literary analysis and stylometric tools. In this talk we shall analyse the application of advanced pattern extraction techniques to the study of Molière's dialogue and characters, where by "pattern" we mean sequences of lexical elements and parts of speech. In particular we shall see how different types of extractions may provide experts with different views on the texts and target different aspects of stylistic choice.}, KEYWORDS = {Computational stylistics, syntactic patterns, Molière}, URL = {http://www.uni-goettingen.de/de/525494.html}, CONFERENCE_NAME = {Göttinger philologisches Forum}, CONFERENCE_PLACE = {Göttingen, Germany}, CONFERENCE_DATE = {03/12/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FBG_332819, AUTHOR = {Frontini, F. and Boukhaled, M. A. and Ganascia, J. G.}, TITLE = {Moliere's Raisonneurs: a quantitative study of distinctive linguistic patterns}, YEAR = {2015}, KEYWORDS = {Computational Stylistics, Correspondence analysis, Corpus linguistics, Molière}, PAGES = {114-117}, URL = {http://ucrel.lancs.ac.uk/cl2015/doc/CL2015-AbstractBook.pdf}, CONFERENCE_NAME = {Corpus Linguistics 2015}, CONFERENCE_PLACE = {Lancaster}, CONFERENCE_DATE = {21-24/07/2015}, BOOKTITLE = {Corpus Linguistics 2015-Abstract Book}, EDITOR = {Formato, F. and Hardie, A.}, } @INPROCEEDINGS{GIANNINI_2015_INPROCEEDINGS_GBGP_342303, AUTHOR = {Giannini, S. and Biagioni, S. and Goggi, S. and Pardelli, G.}, TITLE = {Grey Literature citations in the age of Digital Repositories and Open Access}, YEAR = {2015}, ABSTRACT = {The work measures grey citations in the years 2012, 2013 and 2014 and then describes the features of GL documents cited in different areas of knowledge: Computational Linguistics, Computer Science and Engineering. With the aim to survey a wide and varied range of resources, we selected a sample data based on the bibliographic references of articles contained in 4 journals - all indexed by the ISI Web of Science and with an Impact Factor over the last three years - and two proceedings of international conferences held in 2012 and 2014.}, KEYWORDS = {Grey Literature, Digital Repositories, Open Access}, PAGES = {109-110}, URL = {http://greyguide.isti.cnr.it/attachments/category/27/GL17_Program_Book.pdf}, VOLUME = {17}, ISBN = {978-90-77484-26-5}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature}, CONFERENCE_PLACE = {Amsterdam, NL}, CONFERENCE_DATE = {December 1-2}, BOOKTITLE = {GL17 Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GOGGI_2015_INPROCEEDINGS_GPBFMMDB_342221, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2015}, ABSTRACT = {Here we present the final results of MAPS (Marine Planning and Service Platform), an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. In previous publications the general architecture of the system as well as the set of metadata (Common Data Index) used to describe the documents were presented [3]; it was shown how individual oceanographic data-sets could be indexed within the MAPS library by types of measure, measurement tools, geographic areas, and also linked to specific textual documentation. Documentation is described using the current international standards: Title, Authors, Publisher, Language, Date of publication, Body/Institution, Abstract, etc.; serial publications are described in terms of ISSN, while books are assigned ISBN; content of various types on electronic networks is described by means of doi and url. Each description is linked to the document. Thanks to this, the MAPS library already enables researchers to go from structured oceanographic data to documents describing it. But this was not enough: documents may contain important information that has not been encoded in the metadata. Thus an advanced Search Engine was put in place that uses semantic-conceptual technologies in order to extract key concepts from unstructured text such as technical documents (reports and grey literature) and scientific papers and to make them indexable and searchable by the end user in the same way as the structured data (such as oceanographic observations and metadata) is. More specifically once a document is uploaded in the MAPS library, key domain concepts in documents are extracted via a natural language processing pipeline and used as additional information for its indexing. The key term identification algorithm is based on marine concepts that were pre-defined in a domain ontology, but crucially it also allows for the discovery of new related concepts. So for instance starting from the domain term salinity, related terms such as sea salinity and average sea salinity will also be identified as key terms and used for indexing and searching documents. A hybrid search system is then put in place, where users can search the library by metadata or by free text queries. In the latter case, the NLP pipeline performs an analysis of the text of the query, and when key concepts are matched, the relevant documents are presented. The results may be later refined by using other structured information (e.g. date of publication, area, ...). Currently a running system has been put in place, with data from satellites, buoys and sea stations; such data is documented and searchable by its relevant metadata and documentation. Results of quantitative evaluation in terms of information retrieval measures will be presented in the poster; more specifically, given an evaluation set defined by domain experts and composed of pre-defined queries together with documents that answer such queries, it will be shown how the system is highly accurate in retrieving the correct documents from the library. Though this work focuses on oceanography, its results may be easily extended to other domains; more generally, the possibility of enhancing the visibility and accessibility of grey literature via its connection to the data it describes and to an advanced full text indexing are of great relevance for the topic of this conference.}, KEYWORDS = {Information Extraction, Search Engine, Oceanography}, PAGES = {76-77}, URL = {https://publications.cnr.it/doc/342221}, VOLUME = {17}, ISBN = {978-90-77484-26-5}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {December 1-2}, BOOKTITLE = {GL17 Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{MONTEMAGNI_2015_INPROCEEDINGS_MWN_367807, AUTHOR = {Montemagni, S. and Wieling, M. and Nerbonne, J.}, TITLE = {The contribution of dialectometry to the study of the dialects of Italy. A case study on Tuscan}, YEAR = {2015}, ABSTRACT = {We will illustrate the extent to which the recent advances of dialectometry can help to gain insight into the nature of linguistic variation - both synchronically and diachronically - in the study of the dialects of Tuscany, which have a special status in the complex puzzle of Italian dialects. This will be done by discussing the results achieved in a case study carried out over the last five years based on the corpus of dialectal data of the Atlante Lessicale Toscano ('Lexical Atlas of Tuscany', henceforth ALT, Giacomelli et al., 2000), a regional linguistic atlas focusing on dialec tal variation throughout Tuscany, a region where both Tuscan and non-Tuscan dialects are spoken.}, KEYWORDS = {dialectometry, lexical atlas, italian dialects, Tuscany, Atlante Lessicale Toscano}, URL = {http://media.leidenuniv.nl/legacy/montemagni-wieling-nerbonne.pdf}, CONFERENCE_NAME = {Italian Dialect Meeting 2015 \& CIDSM X}, CONFERENCE_PLACE = {Leiden University-Centre for Linguistics}, CONFERENCE_DATE = {23 June 2015}, } @INPROCEEDINGS{SORIA_2015_INPROCEEDINGS_S_332521, AUTHOR = {Soria, C.}, TITLE = {Towards an Alliance for Digital Language Diversity: Vision, Goals, and Challenges}, YEAR = {2015}, ABSTRACT = {In order to foster the world's digital language diversity, and to ensure equal digital opportunities for languages, we encourage the creation of an Alliance for Digital Language Diversity, i.e. a network of different stakeholders involved in the creation and deployment of data. The Alliance needs - and presupposed - educational activities aimed at building the necessary digital skills and creating the psychological self-confidence necessary for speakers to produce data using their mother tongue.}, KEYWORDS = {digital language diversity, less-resourced languages, minority languages, digital rights, data production}, URL = {https://publications.cnr.it/doc/332521}, CONFERENCE_NAME = {Ugra Global Expert Meeting on Multilingualism in Cyberspace}, CONFERENCE_PLACE = {Khany-Maniysk, Russia}, CONFERENCE_DATE = {4-9/07/2015}, } @INPROCEEDINGS{VENTURI_2015_INPROCEEDINGS_VRMSTFB_340388, AUTHOR = {Venturi, G. and Rinnone, S. and Montemagni, S. and Sassi, M. and Terranova, G. and Flore, E. and Bellandi, T.}, TITLE = {Language technologies for automatic readability assessment of health-related Information: a preliminary investigation into the informed consent forms used in a regional health service}, YEAR = {2015}, ABSTRACT = {Rationale: Within an information society, where everyone should be able to access all available information, improving access to written language is becoming more and more a central issue. This is the case for health-related information which should be accessible to all members of the society, including people who have reading difficulties as a result of a low education level or of language-based learning disabilities or because the language of the text is not their native language. Moreover, the breakdown of doctor-patient communication is one of the most frequent cause of adverse events. Research questions: We conducted a preliminary investigation to assess the readability of a corpus of informed consent forms used before a clinical procedure in the hospitals of a Regional Healthcare Service. Secondary goals include the comparison of readability across specialties and healthcare trusts. Methods: Providing complex scientific information in a way that is comprehensible to a lay person is a challenge that nowadays can be addressed by resorting to advanced Natural Language Processing (NLP) techniques, which make it possible to monitor the linguistic complexity of texts at the syntactic and lexical levels and to support their simplification, whenever needed. The study has been carried out by combining NLP-enabled feature extraction and state-of-the-art machine learning algorithms. To this end we used READ-IT, the first NLP-based readability assessment tool for Italian. Results: We analysed 584 documents, covering 29 specialties, for a total of 607.790 word tokens, currently used at the 36 public hospitals in Tuscany. Although the readability level of all documents in the corpus is low, both at the lexical and syntactic level, significant differences can be observed between specialties and healthcare trust releasing the forms. With the readability level ranging between 0 (easy-to-read) and 100 (difficult-to-read), it resulted that the pediatric informed consent documents are the most easy-to-read forms (with an average score of 75) while the most difficult-to read documents are documents of the surgical area (whose average score is 80) (standard deviation 2). Discussion: The state of the art resulting from this preliminary study shows that NLP-based readability assessment tools can help to measure the linguistic complexity of informed consent forms and guide the editor to identify linguistically complex passages that need to be simplified, either syntactically or lexically. The use of an assessment tool designed for the general language is the main limitation of the study and should be addressed through the customization of the tool to assess the readability of the healthcare jargon. A further step of the research consider also the design of a guidance to prepare readable informed consent forms.}, KEYWORDS = {Readability assessment, health-related information}, URL = {http://static1.squarespace.com/static/561c0d01e4b0b5ad2e65cc48/t/561d44dfe4b089431662d174/1444758751213/LibrettoProgramma.pdf}, CONFERENCE_NAME = {ISCOME 2015 Conference: "The Golden Bridge: Communication and Patient Safety"}, CONFERENCE_PLACE = {Montecatini Terme}, CONFERENCE_DATE = {15-16 giugno 2015}, } @TECHREPORT{CUCURULLO_2015_TECHREPORT_C_353217, AUTHOR = {Cucurullo, S.}, TITLE = {Sviluppo di funzioni software per il recupero di testi dell'Archivio Testuale dell'ILC e conversione in un formato di rappresentazione XML/TEI}, YEAR = {2015}, ABSTRACT = {Il presente rapporto documenta le attività svolte nell'ambito della Convenzione Operativa relativa allo sviluppo di funzioni software per il recupero di testi dell'Archivio Testuale dell'ILC e la conversione in un formato di rappresentazione XML/TEI, stipulata all'interno dell'Accordo di Collaborazione Scientifica ILC-CNR - Accademia della Crusca. In particolare, il rapporto si focalizza sui seguenti punti oggetto della Convenzione Operativa: 1. definizione di un formato di rappresentazione XML/TEI che tenga conto da un lato della tipologia di annotazioni presenti nei testi di partenza e dall'altro delle analisi ed elaborazioni a cui i testi convertiti dovranno essere sottoposti; 2. sviluppo di procedure di conversione dal formato dei "Periodici Milanesi" al formato XML/TEI e verifica dei risultati mediante parsing XML. Il Rapporto ripercorre le diverse fasi del lavoro, con particolare attenzione all'analisi dell'archivio testuale di partenza e ai risultati raggiunti, per arrivare a una discussione delle questioni che rimangono al momento aperte e degli sviluppi che possono prospettarsi per tali attività.}, KEYWORDS = {banca-dati testuale, Periodici Milanesi}, PAGES = {43}, URL = {https://publications.cnr.it/doc/353217}, } @TECHREPORT{CUCURULLO_2015_TECHREPORT_C_353218, AUTHOR = {Cucurullo, S.}, TITLE = {Sviluppo di funzioni software per il recupero di testi dell'Archivio Testuale dell' ILC e conversione in un formato di rappresentazione XML/TEI Fase 2}, YEAR = {2015}, ABSTRACT = {Il presente rapporto documenta le attività svolte nell'ambito della Convenzione Operativa relativa allo sviluppo di funzioni software per il recupero di testi dell'Archivio Testuale dell'ILC e la conversione in un formato di rappresentazione XML/TEI, stipulata all'interno dell'Accordo di Collaborazione Scientifica ILC-CNR - Accademia della Crusca. In particolare, questo rapporto si focalizza sul seguente oggetto della Convenzione Operativa: "sviluppo di procedure di conversione dal formato DBT al formato XML/TEI, secondo le indicazioni contenute nel Report relativo alla Fase 1 e verifica dei risultati mediante parsing XML costituito dal corpus dell'800 e '900 di testi estratto dal Patrimonio Testuale ILC, la cui composizione è stata concordata con l'Accademia della Crusca". Buona parte delle soluzioni adottate per quella tipologia di testi è stata scelta per essere utilizzata anche in testi non lemmatizzati, come nel caso di opere di autori italiani dell'Ottocento e del Novecento. La strutturazione generale del documento XML TEI in header e body e la definizione dei principali TAG utilizzati è condivisa sia dai testi di questo Corpus che da quelli lemmatizzati dei Periodici Milanesi, da cui siamo partiti perché presentavano una maggiore casistica di codifiche e contemporaneamente un formato di origine più lontano nel tempo. Si tratta infatti di formati e supporti di memorizzazione che precedono l'era del Personal Computer e che hanno già subito la trasformazione da EBCDIC ad ASCII.}, KEYWORDS = {Archivi Testuali}, PAGES = {21}, URL = {https://publications.cnr.it/doc/353218}, } @TECHREPORT{MARZI_2015_TECHREPORT_M_330235, AUTHOR = {Marzi, C.}, TITLE = {Word knowledge and word usage-Representations and processes in the mental lexicon}, YEAR = {2015}, ABSTRACT = {The final NetWordS Conference, held on the 30th and 31st of March, and 1st of April 2015 in Pisa, was convened by Prof. Pier Marco Bertinetto, Dr. Vito Pirrelli and Dr. Claudia Marzi, and brought together 91 participants (scholars, Post-Docs, PhD students) from numerous European, and some non-European, countries. A 3-day schedule involved all participants in a focused, cross-disciplinary discussion on representations and processes in the mental lexicon. People are known to understand, memorise and parse words in a context-sensitive, opportunistic way, by caching their most habitual and productive processing patterns into routinized behavioural schemes, similarly to what we observe for sequences of coordinated motor acts. Speakers, however, do not only take advantage of token-based information such as frequency of individual, holistically stored words, or episodic memories of word usage, but they are also able to organise stored word forms through abstract paradigmatic structures (or word families) whose overall size and distribution are important determinants of lexical categorisation, inference and productivity. Lexical organisation is, in fact, not necessarily functional to descriptive economy and minimisation of storage, but appears to be influenced by more dynamic, communicationoriented functions such as memorisation, prediction-based recognition and production. Lending support to this view, usage-based approaches to word processing have recently offered novel explanatory frameworks that capitalise on the stable correlation patterns between lexical representations on the one hand and process-based operations that make representations functional to communicative exchanges on the other hand. By focusing on the battery of cognitive functions supporting verbal communication (ranging from input recoding to rehearsal, access, recall and coactivation) and by exploring their psycholinguistic correlates and neuroanatomical substrates, these approaches promote a new view of language architecture as an emergent property of the interaction between language-specific input conditions and low-level, domain-specific cognitive predispositions.}, KEYWORDS = {word knowledge, word usage, mental lexicon, interdisciplinary approach}, PAGES = {2-12}, URL = {http://www.networds-esf.eu/uploads/NetWordS/Science_Meeting_Scientific_Report_5810.pdf}, } @MISC{BARONI_2015_MISC_B_349786, AUTHOR = {Baroni, P.}, TITLE = {2015-1-IT02-KA204-015090 DLDP: Interactive Web Site}, YEAR = {2015}, ABSTRACT = {Sito Web interattivo di DLDP - Digital Language Diversity Project (Programma Erasmus+ | Accordo di Sovvenzione N° 2015-1-IT02-KA204-015090), realizzato con Drupal, sviluppato in inglese, italiano, basco, finlandese, francese, tedesco e spagnolo}, KEYWORDS = {Sito web}, URL = {http://www.dldp.eu}, } @MISC{BARONI_2015_MISC_B_483772, AUTHOR = {Baroni, P.}, TITLE = {CLARIN-IT Web Site}, YEAR = {2015}, ABSTRACT = {Sito Web del Consorzio Nazionale CLARIN-IT, realizzato con Drupal, sviluppato in inglese e italiano}, KEYWORDS = {CLARIN, National Consortium}, URL = {https://www.clarin-it.it}, } @MISC{BARONI_2015_MISC_B_483785, AUTHOR = {Baroni, P.}, TITLE = {LaRI Web Site}, YEAR = {2015}, ABSTRACT = {Sito Web del Gruppo di Ricerca del CNR-ILC "LaRI - Risorse e Infrastrutture Linguistiche", realizzato con WordPress, sviluppato in italiano e inglese}, KEYWORDS = {risorse linguistiche, infrastrutture linguistiche}, URL = {http://lari.ilc.cnr.it}, } @MISC{CININI_2015_MISC_CCM_390864, AUTHOR = {Cinini, A. and Cutugno, P. and Marconi, L.}, TITLE = {Sviluppo di una banca dati strutturata di trascrizioni di parlato di singoli soggetti anziani monitorati nel tempo}, YEAR = {2015}, ABSTRACT = {L'Istituto di Linguistica Computazionale "Antonio Zampolli" del Consiglio Nazionale delle Ricerche, ILC-CNR, in qualità di partner del progetto Ninfa "iNtelligent Integrated Network For Aged people" e nell'ambito del WP3 "Analisi e test di implementazione di deficit cognitivo attraverso l'analisi del linguaggio", ha realizzato una banca dati strutturata costituita da un corpus di registrazioni e di trascrizioni dei singoli soggetti anziani monitorati nel tempo.}, KEYWORDS = {Natural Language Processing, Cognitive Impairment, trattamento automatico del linguaggio, analisi del linguaggio}, URL = {https://publications.cnr.it/doc/390864}, } @MISC{DANCONA_2015_MISC_DBNFCBDM_390659, AUTHOR = {D'Ancona, C. and Bozzi, A. and Nahli, O. and Farina, M. and Coda, E. and Boschetti, F. and Del Grosso, A. M. and Marchi, S.}, TITLE = {Banca dati testuale Greek into Arabic}, YEAR = {2015}, ABSTRACT = {Banca dati testuale con la codifica XML della pericopatura dei testi Greco-Arabo di alcuni trattati delle Enneadi di Plotino.}, KEYWORDS = {Digital Humanities, Computational Philology, Greek into Arabic, http: //g2a. ilc. cnr. it}, URL = {http://g2a.ilc.cnr.it/}, } @MISC{DELGROSSO_2015_MISC_D_390562, AUTHOR = {Del Grosso, A. M.}, TITLE = {Una applicazione Web per lo studio specialistico dei testi. Il modello adottato e i risultati fino ad oggi ottenuti}, YEAR = {2015}, ABSTRACT = {Il lavoro si concentra sulla realizzazione di una libreria di moduli software relativi ad una applicazione Web per la Textual Scholarship. Tale libreria è basata su un modello che considera la molteplicità degli approcci per analizzare un testo, soprattutto, ma non esclusivamente, antico. In questo quadro il modello prende in considerazione elementi di carattere filologico e linguistico fra i quali anche quelli che derivano dalla Linguistica computazionale. Dal momento che è impensabile che un sistema, sia pure complesso e articolato in sottosistemi, possa ambire a intervenire in qualunque punto della filiera delle analisi scientifiche sui testi, il lavoro è particolarmente apprezzabile perché impostato sulla base di una architettura informatica multi-modulare. In tal modo è possibile inserire nel sistema un cospicuo numero di moduli software ed è aperto alla possibilità di inserirne (o farne inserire) molti altri a seconda di specifiche necessità. Fra i moduli più significativi, sono già attivi quelli per: 1) l'annotazione di parti di testo (dalla singola parola ad una espressione completa, ecc.); 2) la classificazione delle annotazioni stesse secondo una tipologia indicata dall'utente (per esempio, annotazione di tipo semantico, morfologico, ontologico, ecc.); 3) la produzione di indici e concordanze; 4) l'allineamento fra testo e eventuale traduzione (antica o moderna); 5) l'estrazione di named entity (NER). Il modello e i moduli realizzati hanno mostrato grande efficacia in almeno 3 progetti: 1) Progetto PRIN 2008 "Edizione digitale dei manoscritti di F. de Saussure; 2) Progetto ERC advanced grant "Greek into Arabic: Philosophical Concepts and Linguistic Bridges"; 3) Progetto "Traduzione Italiana del Talmud Babilonese". L'applicazione parzialmente già realizzata prevede un auspicabile sviluppo nei prossimi anni con sperimentazioni su testi manoscritti di autori moderni e contemporanei, oltre che su opere antiche e medievali per finalità di critica testuale, è interamente open source e sviluppata con l'utilizzo di standard internazionali, quali,tra l'altro, il sistema di mark-up TEI.}, KEYWORDS = {computational philology, digital humanities, software enginnering}, URL = {https://publications.cnr.it/doc/390562}, } @MISC{MANCINI_2015_MISC_MPDL_390658, AUTHOR = {Mancini, L. and Pedretti, I. and Del Grosso, A. M. and Luzzi, D.}, TITLE = {Banca dati testuale codifica delle lettere Cristoforo Clavius}, YEAR = {2015}, ABSTRACT = {Banca dati testuale delle lettere di Cristoforo Clavio derivante dal lavoro di codifica fatto adottando il vocabolario XML e le linee guida della text encoding initiative (TEI). L'attività è frutto del progetto Clavius on The Web.}, KEYWORDS = {digital humanities, Cristoforo Clavio, computational philology, TEI-XML}, URL = {http://claviusontheweb.it}, } @MISC{MARCHI_2015_MISC_MD_390657, AUTHOR = {Marchi, S. and Del Grosso, A. M.}, TITLE = {Greek into Arabic philological Web platform}, YEAR = {2015}, ABSTRACT = {Piattaforma filologico-computazionale sviluppata nell'ambito del progetto ERC 2009 Advanced Grant n. 249431. Titolo: Greek into Arabic. Philosophical concepts and linguistic bridges.}, KEYWORDS = {computational philology, digital humanities, ERC, Greek into Arabic}, URL = {http://g2a.ilc.cnr.it/}, } @MISC{NAHLI_2015_MISC_N_390722, AUTHOR = {Nahli, O.}, TITLE = {Banca dati dell'analisi morfo-sintattica del testo "Aflūṭīn ʻinda al-ʻArab", ʻAbd al-Raḥmān Badawī, Cairo 1955, 1966}, YEAR = {2015}, ABSTRACT = {Banca dati testuali con l'analisi morfo-sintattica del testo "Afl???n ?inda l-?Arab"; editore ?A. Badaw?, D?r al-Nah?at al-?arabiyya, Cairo 1966}, KEYWORDS = {analisi morfo-sintattica, Lingua araba, Greek Into Arabic}, URL = {http://g2a.ilc.cnr.it:8080/Teologia_Wapp/Home.xhtml?centerPage=teologia}, } @MISC{NAHLI_2015_MISC_N_390727, AUTHOR = {Nahli, O.}, TITLE = {Aggiornamenti banca dati del Motore morfologico Aramorph}, YEAR = {2015}, ABSTRACT = {AraMorph's components are essentially two: the rule engine for morphological analysis and a repository of linguistic resources mainly composed of three lexicons: i) the dictStems lexicon, which contains 38.600 lemmas; ii) the dictPrefixes lexicon, which consists of sequences of proclitics and inflectional prefixes; iii) the dictSuffixes lexicon, which consists of sequences of inflectional suffixes and enclitics. These lexica are accompanied by three compatibility tables used for checking combinations of A (proclitics+prefixes), B (stems) and C (suffixes+enclitics). To cut down on arabic parse overgeneration, one has to enforce further restrictions in compatibility tables, e.g. the verb's ability to accept nominative and accusative pronouns, and to select a rational subject. We then augmented verb entries with subcategorization information such as case assignment and the restriction on rational subjects. At the same time, it was necessary to update compatibility tables.}, KEYWORDS = {analisi morfo-sintattica, Lingua araba, Aramorph}, URL = {http://hdl.handle.net/20.500.11752/ILC-94}, } @ARTICLE{BRUNATO_2014_ARTICLE_BV_311157, AUTHOR = {Brunato, D. and Venturi, G.}, TITLE = {Le tecnologie linguistico-computazionali nella misura della leggibilità di testi giuridici}, YEAR = {2014}, ABSTRACT = {Il presente contributo illustra una innovativa metodologia per il calcolo della leggibilità di un testo giuridico basata su strumenti di Trattamento Automatico del Linguaggio ed espressamente rivolta alla sua semplificazione. Inserendoci nel più ampio filone di ricerche che affronta il tema dell'accessibilità della lingua del diritto, discutiamo con esempi tratti da testi reali, il caso specifico della prosa burocratico-amministrativa dal momento che l'accessibilità a tali documenti costituisce un elemento chiave della comunicazione istituzioni-cittadini. A nostra conoscenza, tale studio rappresenta il primo tentativo volto a mostrare come tecnologie linguistico-computazionali allo stato dell'arte per la lingua italiana incomincino ad essere mature per costituire non solo un ausilio per definire automaticamente la leggibilità di testi giuridici ma anche una guida per una loro stesura semplificata. Tali funzionalità saranno illustrate grazie a READ-IT, il primo e al momento unico strumento di valutazione della leggibilità oggi esistente per la lingua italiana basato su strumenti di Trattamento Automatico del Linguaggio.}, PAGES = {111-142}, URL = {https://publications.cnr.it/doc/311157}, VOLUME = {XXIII}, PUBLISHER = {Edizioni Scientifiche Italiane (Firenze, Italia)}, ISSN = {0390-0975}, JOURNAL = {Informatica e diritto}, } @ARTICLE{CHERSI_2014_ARTICLE_CFPP_283372, AUTHOR = {Chersi, F. and Ferro, M. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Topological Self-Organization and Prediction Learning Support Both Action and Lexical Chains in the Brain}, YEAR = {2014}, ABSTRACT = {A growing body of evidence in cognitive psychology and neuroscience suggests a deep interconnection between sensory-motor and language systems in the brain. Based on recent neurophysiological findings on the anatomo-functional organization of the fronto-parietal network, we present a computational model showing that language processing may have reused or co-developed organizing principles, functionality, and learning mechanisms typical of premotor circuit. The proposed model combines principles of Hebbian topological self-organization and prediction learning. Trained on sequences of either motor or linguistic units, the network develops independent neuronal chains, formed by dedicated nodes encoding only context-specific stimuli. Moreover, neurons responding to the same stimulus or class of stimuli tend to cluster together to form topologically connected areas similar to those observed in the brain cortex. Simulations support a unitary explanatory framework reconciling neurophysiological motor data with established behavioral evidence on lexical acquisition, access, and recall.}, KEYWORDS = {Motor chains, Lexical chains, Serial working memory, Computational modeling, Self-organizing maps, Somatotopic organization, Prediction}, PAGES = {476-491}, URL = {http://onlinelibrary.wiley.com/doi/10.1111/tops.12094/abstract?deniedAccessCustomisedMessage=\&userIsAuthenticated=false}, VOLUME = {6}, DOI = {10.1111/tops.12094}, PUBLISHER = {Cognitive Science Society, Inc (Hoboken, NJ, Stati Uniti d'America)}, ISSN = {1756-8757}, JOURNAL = {Topics in cognitive science (Print)}, } @ARTICLE{CUTUGNO_2014_ARTICLE_CMMCM_282735, AUTHOR = {Cutugno, P. and Marconi, L. and Morgavi, G. and Chiarella, D. and Morando, M.}, TITLE = {Analysis of new collaborative writing within Web 2. 0}, YEAR = {2014}, ABSTRACT = {In recent years, the transition from Web 1.0 to Web 2.0 enabled the creation of content by the users of the Network: social networks, blogs, forums, chats and wikis have arisen.. Phenomena, such as collaborative/collective writing, already born at the beginning of the 20th century, found their natural setting, a wide audience of reference of writers and readers in multiple languages within the Web 2.0. In this paper our goal is to verify if and how the characteristics of the textual analysis of narrative plots can be used for the analysis of collaborative narrative texts. In particular, we will check if features like correctness, completeness, consistency and coherence together with tools for statistical analysis of language suitable for analysing the new collaborative writing 2.0.}, PAGES = {91-97}, URL = {https://publications.cnr.it/doc/282735}, VOLUME = {22}, PUBLISHER = {WSEAS Press (Wisconsin (Stati Uniti d'America), Stati Uniti d'America)}, ISSN = {1790-5109}, JOURNAL = {Recent Advances in Computer Engineering A Series of Reference Books and Textbooks}, } @ARTICLE{DEFELICE_2014_ARTICLE_D_285274, AUTHOR = {De Felice, I.}, TITLE = {«Possibilities of action» in language: affordances and verbal polysemy}, YEAR = {2014}, PAGES = {179-191}, URL = {https://publications.cnr.it/doc/285274}, VOLUME = {1}, PUBLISHER = {Il Mulino (Italia, Italia)}, ISSN = {2279-7777}, JOURNAL = {Reti, Saperi, Linguaggi. Italian Journal of Cognitive Sciences}, } @ARTICLE{DEFELICE_2014_ARTICLE_D_285275, AUTHOR = {De Felice, I.}, TITLE = {La sinestesia linguistica nella poesia latina}, YEAR = {2014}, ABSTRACT = {The main purpose of this study is to explore linguistic synaesthesia in Latin poetic language. Through the analysis of a poetic corpus, which consists of works of Catullus, Horace, Lucretius, Ovid, Vergil, all occurrences of twenty Latin synaesthetic adjectives (previously extracted by Aeneid and De Rerum Natura) were retrieved; all lemmas co-occurring with these adjectives in nominal phrases were then classified into the following categories according to their meaning in context: monoaesthetic, synaesthetic (touch, temperature perception, taste, smell, sight, motion perception, hearing), pseudo-synaesthetic (i.e. psycho-moral), abstract. The research not only shows how much linguistic synaesthesia is present in Latin poetry, but also demonstrates that Latin synaesthesias comply with cross-linguistic tendencies, especially with regard to the hypothesis of directional hierarchy.}, PAGES = {61-107}, URL = {https://publications.cnr.it/doc/285275}, VOLUME = {52}, PUBLISHER = {ETS (Pisa, Italia)}, ISSN = {0085-6827}, JOURNAL = {Studi e saggi linguistici}, } @ARTICLE{DEFELICE_2014_ARTICLE_D_289638, AUTHOR = {De Felice, I.}, TITLE = {From hands to handles: How objects' orientation affects grasp descriptions}, YEAR = {2014}, PAGES = {109-115}, URL = {http://www.neapolisanit.eu/neascience/wp-content/uploads/2014/12/ATTI_AISC_2014_ROMA2.pdf}, VOLUME = {5}, ISSN = {2282-6009}, JOURNAL = {Nea Science-Giornale Italiano di neuroscienze, psicologia e riabilitazione}, } @ARTICLE{DELLORLETTA_2014_ARTICLE_DMV_285640, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Assessing document and sentence readability in less resourced languages and across textual genres}, YEAR = {2014}, ABSTRACT = {In this paper, we tackle three underresearched issues of the automatic readability assessment literature, namely the evaluation of text readability in less resourced languages, with respect to sentences (as opposed to documents) as well as across textual genres. Different solutions to these issues have been tested by using and refining READ-IT, the first advanced readability assessment tool for Italian, which combines traditional raw text features with lexical, morpho-syntactic and syntactic information. In READ-IT readability assessment is carried out with respect to both documents and sentences, with the latter constituting an important novelty of the proposed approach: READ-IT shows a high accuracy in the document classification task and promising results in the sentence classification scenario. By comparing the results of two versions of READ-IT, adopting a classification- versus ranking-based approach, we also show that readability assessment is strongly influenced by textual genre; for this reason a genre-oriented notion of readability is needed. With classification-based approaches, reliable results can only be achieved with genre-specific models: Since this is far from being a workable solution, especially for less resourced languages, a new ranking method for readability assessment is proposed, based on the notion of distance.}, KEYWORDS = {readability assessment, less resourced languages, multi-level linguistic annotation, textual genres}, PAGES = {163-193}, URL = {http://www.ingentaconnect.com/content/jbp/itl/2014/00000165/00000002/art00005}, VOLUME = {165}, DOI = {10.1075/itl.165.2.03del}, PUBLISHER = {Peeters Publishers (Leuven, Belgio)}, ISSN = {1783-1490}, JOURNAL = {ITL. Internationaler technischer Literaturanzeiger (Online)}, } @ARTICLE{GOGGI_2014_ARTICLE_GPGB_284602, AUTHOR = {Goggi, S. and Pardelli, G. and Giannini, S. and Biagioni, S.}, TITLE = {Grey Literature in European Commission Projects}, YEAR = {2014}, ABSTRACT = {The survey is focused on the documentation produced by the European Commission (EC) projects involved in the Framework Programme for Research and Technological Development (hereafter FP7) and managed by the Italian National Research Council (hereafter CNR). In particular, the Grey Literature (GL) available on CORDIS and European Projects websites was analysed. In order to verify how it is managed and whether it is compliant with EC recommendations, some categories were introduced to identify, measure and evaluate the usability and availability of projects production. Data was obtained from a sample of European projects websites.}, KEYWORDS = {Grey Literature, European Commission Projects}, PAGES = {133-144}, URL = {http://www.greynet.org/thegreyjournal/previousissues.html}, VOLUME = {10}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{MARZI_2014_ARTICLE_MFK_288212, AUTHOR = {Marzi, C. and Ferro, M. and Keuleers, E.}, TITLE = {Perception of typicality in the lexicon: Wordlikeness, lexical density and morphonotactic constraints}, YEAR = {2014}, ABSTRACT = {The extent to which a symbolic time-series (a sequence of sounds or letters) is a typical word of a language, referred to as WORDLIKENESS, has been shown to have effects in speech perception and production, reading proficiency, lexical development and lexical access, short-term and long-term verbal memory. Two quantitative models have been suggested to account for these effects: serial phonotactic probabilities (the likelihood for a given symbolic sequence to appear in the lexicon) and lexical density (the extent to which other words can be obtained from a target word by changing, deleting or inserting one or more symbols in the target). The two measures are highly correlated and thus easy to be confounded in measuring their effects in lexical tasks. In this paper, we propose a computational model of lexical organisation, based on Self-Organising Maps with Hebbian connections defined over a temporal layer (TSOMs), providing a principled algorithmic account of effects of lexical acquisition, processing and access, to further investigate these issues. In particular, we show that (morpho-)phonotactic probabilities and lexical density, though correlated in lexical organisation, can be taken to focus on different aspects of speakers' word processing behaviour and thus provide independent cognitive contributions to our understanding of the principles of perception of typicality that govern lexical organisation.}, KEYWORDS = {wordlikeness, lexical access, word processing, frequency, memory}, PAGES = {171-191}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84919701117\&origin=inward}, VOLUME = {40}, PUBLISHER = {Zavod za lingvistiku Filozofskog fakulteta (Zagreb, Croazia)}, ISSN = {0586-0296}, JOURNAL = {Suvremena lingvistika}, } @ARTICLE{MARZI_2014_ARTICLE_MFP_287289, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Morphological structure through lexical parsability}, YEAR = {2014}, ABSTRACT = {The emergence of morphological structure in lexical acquisition is analysed in the computational framework of Temporal Self-Organising Maps (TSOMs), to provide an explanatory basis for both psycholinguistic and linguistic accounts of lexical parsability. The investigation we propose is grounded on the hypothesis that perception of morphological structure (parsability) and frequency strongly correlate in the acquisition of inflectional paradigms. Analysis of experimental results of word acquisition obtained by artificially varying training conditions, allows us to understand developmental competition between fully-inflected word forms, and to investigate a hierarchy of frequency effects. The computational and theoretical implications of such a memory-based view of the relationship between frequency and perception, and its potential to account}, KEYWORDS = {inflectional paradigms, morphological structure, token/type frequency, word processing}, PAGES = {263-290}, URL = {http://www.rivisteweb.it/doi/10.1418/78410}, VOLUME = {XIII}, DOI = {10.1418/78410}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{PIRRELLI_2014_ARTICLE_P_288043, AUTHOR = {Pirrelli, V.}, TITLE = {Review of "Computational Paralinguistics: Emotion, Affect and Personality in Speech and Language Processing" (by Schuller & Batliner, Wiley Publishing 2013)}, YEAR = {2014}, KEYWORDS = {Paralinguistics, Pragmatics, Language usage}, URL = {http://www.computingreviews.com/review/review_review.cfm?review_id=142608}, PUBLISHER = {Association for Computing Machinery (New York, N. Y, Stati Uniti d'America)}, ISSN = {1530-6585}, JOURNAL = {Computing reviews (Online)}, } @ARTICLE{SASSI_2014_ARTICLE_SBP_280559, AUTHOR = {Sassi, M. and Biagioni, S. and Pardelli, G.}, TITLE = {A Linguistic and Gender Approach to 1841 Tuscany Population Census}, YEAR = {2014}, ABSTRACT = {The Census of 1841 in Tuscany was the first official data registry which tried to describe Tuscan population as a whole on the basis of the Granducato's territory. With the use of special ad-hoc created forms, all demographic and socioeconomic characteristics of families and single persons in "Granducato di Toscana" were described. Work is developed in five points: (1) informatics retrieval of linguistic information from Tuscany of 1800 focused by the arts and craftsmanship more in use in families of that time; (2) gender division of works and craftsmanship; (3) observation of lexical disparity in the four communities and terminological curiosities of that historical period; (4) actually no longer existing craftsmanship; and (5) diachronic analysis of communities, where possible. In this scenario, the authors will introduce the methodology they used for data analysis. Tables and figures will be used to better focus different moments and results of the work. A Glossary in Appendix will contain the English translation of the Italian terms extracted from the Corpus.}, KEYWORDS = {Tuscany Population Census, sociological analysis, gender analysis, the 19th work terminology, linguistic statistics}, PAGES = {318-329}, URL = {http://www.davidpublishing.com/show.html?16049}, VOLUME = {12}, PUBLISHER = {USA-China Business Review (Journal), Inc (New York, NY, Stati Uniti d'America)}, ISSN = {1539-8080}, JOURNAL = {US-China foreign language}, } @ARTICLE{SORIA_2014_ARTICLE_SCMQBCMOP_285553, AUTHOR = {Soria, C. and Calzolari, N. and Monachini, M. and Quochi, V. and Bel, N. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {The language resource Strategic Agenda: the FLaReNet synthesis of community recommendations}, YEAR = {2014}, ABSTRACT = {The main purpose of this paper is to serve as a landmark for future research and in particular for future strategic, infrastructural and coordination initiatives. It presents a preliminary plan for actions and infrastructures that could become the basis for future initiatives in the sector of Language Resources and Technologies (LRTs). The FLaReNet Language Resource Strategic Agenda presents a set of recommen- dations for the development and progress of LRT in Europe, as issued from a three- year consultation of the FLaReNet European project. Recommendations cover a broad range of topics and activities, spanning over production and use of language resources, licensing, maintenance and preservation issues, infrastructures for language resour- ces, resource identification and sharing, evaluation and validation, interoperability and policy issues. The intended recipients belong to a large set of players and stakeholders in LRT, ranging from individuals to research and education institutions, to policy- makers, funding agencies, SMEs and large companies, service and media providers}, KEYWORDS = {Strategic agenda, Language resources planning, Recommended priority actions}, PAGES = {753-775}, URL = {https://publications.cnr.it/doc/285553}, VOLUME = {48}, DOI = {10.1007/s10579-014-9279-y}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{WIELING_2014_ARTICLE_WMNB_285543, AUTHOR = {Wieling, M. and Montemagni, S. and Nerbonne, J. and Baayen, R. H.}, TITLE = {Lexical differences between Tuscan dialects and standard Italian: Accounting for geographic and socio-demographic variation using generalized additive mixed modeling}, YEAR = {2014}, ABSTRACT = {This study uses a generalized additive mixed-effects regression model to predict lexical differences in Tuscan dialects with respect to standard Italian. We used lexical information for 170 concepts used by 2,060 speakers in 213 locations in Tuscany. In our model, geographical position was found to be an important predictor, with locations more distant from Florence having lexical forms more likely to differ from standard Italian. In addition, the geographical pattern varied significantly for low- versus high-frequency concepts and older versus younger speakers. Younger speakers generally used variants more likely to match the standard language. Several other factors emerged as significant. Male speakers as well as farmers were more likely to use lexical forms different from standard Italian. In contrast, higher-educated speakers used lexical forms more likely to match the standard. The model also indicates that lexical variants used in smaller communities are more likely to differ from standard Italian. The impact of community size, however, varied from concept to concept. For a majority of concepts, lexical variants used in smaller communities are more likely to differ from the standard Italian form. For a minority of concepts, however, lexical variants used in larger communities are more likely to differ from standard Italian. Similarly, the effect of the other community- and speaker-related predictors varied per concept. These results clearly show that the model succeeds in teasing apart different forces influencing the dialect landscape and helps us to shed light on the complex interaction between the standard Italian language and the Tuscan dialectal varieties. In addition, this study illustrates the potential of generalized additive mixed-effects regression modeling applied to dialect data.*}, KEYWORDS = {Tuscan dialects, lexical variation, generalized additive modeling, mixed-effects regression modeling, geographical variation}, PAGES = {669-692}, URL = {http://www.linguisticsociety.org/files/wieling.pdf}, VOLUME = {90}, PUBLISHER = {Linguistic Society of America [etc. ] (Washington, DC [etc. ], Stati Uniti d'America)}, ISSN = {0097-8507}, JOURNAL = {Language (Baltimore)}, } @INCOLLECTION{BOSCHETTI_2014_INCOLLECTION_B_288045, AUTHOR = {Boschetti, F.}, TITLE = {Corpus Linguistics and Greek}, YEAR = {2014}, PAGES = {391-394}, URL = {https://publications.cnr.it/doc/288045}, ISBN = {9789004225978}, BOOKTITLE = {Encyclopedia of Ancient Greek Language and Linguistics (3 vols)}, EDITOR = {Giannakis, G. K.}, } @INCOLLECTION{BOSCHETTI_2014_INCOLLECTION_B_308246, AUTHOR = {Boschetti, F.}, TITLE = {Strumenti on-line per l'analisi e l'annotazione di testi letterari ed epigrafici bilingui}, YEAR = {2014}, ABSTRACT = {Il presente contributo illustra alcuni metodi e strumenti per l'allineamento di testi bilingui e descrive in particolare il sistema sviluppato presso l'Istituto di Linguistica Computazionale «A. Zampolli» del Consiglio Nazionale delle Ricerche di Pisa, corredato di funzioni specifiche per l'epigrafia digitale. Lo strumento informatico facilita l'interrogazione e la visualizzazione dei passi in parallelo, oltre a permettere allo studioso di annotare singole parole o porzioni più estese di testo che si corrispondono, a giudizio delle studioso stesso, in modo più o meno fedele nelle due lingue.}, KEYWORDS = {Digital Epigraphy, Cooperative Philology}, PAGES = {1-9}, URL = {http://www.edizionicafoscari.unive.it/col/exp/30/59/Archivistica/3}, VOLUME = {3}, PUBLISHER = {Edizioni Ca' Foscari (Venezia, ITA)}, ISBN = {978-88-97735-94-6}, BOOKTITLE = {Memoria poetica e poesia della memoria-La versificazione epigrafica dall'antichità all'umanesimo}, EDITOR = {Pistellato, A.}, } @INCOLLECTION{BOZZI_2014_INCOLLECTION_B_322719, AUTHOR = {Bozzi, A.}, TITLE = {Computer-assisted Scholarly Editing of Manuscripts Sources}, YEAR = {2014}, ABSTRACT = {The contribution will concentrate on the specific aspect of textual criticism. I realize this is a discipline that could be defined as being 'very exclusive,' as the scholars are not numerically equivalent to the community of people working in other Humanities disciplines; for example historians, philosophers, or those dealing with the history and criticism of literature. However, if we consider that textual criticism covers a very large period (Ancient, Medieval and Modern times) and many languages, there is also an increase in the population of specialists.}, KEYWORDS = {Computer-aided Textual Scholarship, Textual Criticism, Computational Philology}, PAGES = {99-115}, URL = {http://www.oapen.org/search?identifier=515678}, PUBLISHER = {Amsterdam University Press (Amsterdam, NLD)}, ISBN = {978-90-896-4564-7}, BOOKTITLE = {New Publication Cultures in the Humanities. Exploring the Paradigm Shift}, EDITOR = {Dávidházi, P.}, } @INCOLLECTION{CALZOLARI_2014_INCOLLECTION_CNMQST_286868, AUTHOR = {Calzolari and Nicoletta and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Lexicons, Terminologies, Ontologies: Reflections from Experiences in Resource Construction}, YEAR = {2014}, ABSTRACT = {This contribution aims at highlighting the strong interconnection between lexicons, terminologies and ontologies and especially the fundamental role that ontologies and lexica mutually play. Our view is that lexical resources are evolving in nature, from ontologically based lexicons we are going towards lexically based ontologies. We explore different instantiations of the current trend of using formal ontologies as a core module of computational lexicons, presenting the advantages especially in multilingual and terminological contexts. We present work showing that the lexical knowledge already present in non formal computational lexicons can be exploited to derive or enrich a formal ontology without much manual effort. In the terminology domain, we describe the construction of a resource for biology, directly linked to a parallel domain-ontology, that combines characteristics of both lexicons and terminologies, so that is can allow for intelligent access to content. Finally, we describe our experience in two projects in which formal ontologies play a central role in the context of multilingual computational lexicons, where the ontology is what acts as the glue among the different monolingual lexicons and what provides cross-lingual reasoning capabilities.}, KEYWORDS = {Computational Lexicons, Ontology, Terminology, Interoperability, Standards}, PAGES = {103-121}, URL = {http://www.springer.com/computer/ai/book/978-3-642-45326-7}, VOLUME = {8003}, DOI = {10.1007/978-3-642-45327-4_7}, PUBLISHER = {Springer (Berlin Heidelberg, DEU)}, ISBN = {978-3-642-45326-7}, BOOKTITLE = {Language, Culture, Computation. Computational Linguistics and Linguistics. Essays Dedicated to Yaacov Choueka on the Occasion of His 75th Birthday, Part III}, EDITOR = {Dershowitz, N. and Nissan, E.}, } @INCOLLECTION{RUSSO_2014_INCOLLECTION_RC_288041, AUTHOR = {Russo, I. and Caselli, T.}, TITLE = {Converging evidences on the eventivity of Italian nouns}, YEAR = {2014}, ABSTRACT = {This paper aims at shedding lights on the complex semantic concept of "event noun". Starting with the working hypothesis that linguistic context and corpus-based distributional information can be decisive, we propose a measure for eventivity that relies on syntagmatic cues. By means of a comparison between syntagmatic evidence obtained from a corpus study and speakers' judgments, we have identified a measure of eventivity for nouns. The comparison with annotated data proves its soundness.}, KEYWORDS = {event nominals, syntagmatic cues, degree of eventivity}, PAGES = {179-200}, URL = {https://publications.cnr.it/doc/288041}, PUBLISHER = {Düsseldorf University Press (Düsseldorf, DEU)}, ISBN = {978-3-943460-87-2}, BOOKTITLE = {Meaning, frames, and conceptual representation}, EDITOR = {Gamerschlag, T. and Gerland, D. and Osswald, R. and Wiebke, P.}, } @EDITORIAL{BOSCO_2014_EDITORIAL_BCDFMS_330112, AUTHOR = {Bosco, C. and Cosi, P. and Dell'Orletta, F. and Falcone, M. and Montemagni, S. and Simi, M.}, TITLE = {Proceedings of the Fourth International Workshop EVALITA 2014}, YEAR = {2014}, KEYWORDS = {Trattamento Automatico del Linguaggio, Speech Processing, Lingua Italiana}, PAGES = {167}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-EVALITA-2014.pdf}, PUBLISHER = {Pisa University Press (Pisa, ITA)}, ISBN = {978-88-67414-72-7}, } @EDITORIAL{ELMOHAJIR_2014_EDITORIAL_EACAEPZE_330677, AUTHOR = {El Mohajir, M. and Al Achhab, M. and Chahhou, M. and Arioua, M. and El Mohajir, B. and Pirrelli, V. and Zarghili, A. and El Far, M.}, TITLE = {Proceedings of IEEE-CiST14-Third IEEE International Colloquium in Information Science and Technology (CIST)}, YEAR = {2014}, ABSTRACT = {The 3rd international IEEE Colloquium on Information Science and Technology (CIST'14) is part of the IEEE CONFERENCE SERIES that are held in Morocco, and is sponsored by the IEEE Morocco Section and the IEEE Morocco Computer \& Communication Joint Chapter, and the UAE IEEE Student Branch. The 2014 edition was organized in collaboration with the Faculty of Sciences of Tetuan, the national school of applied sciences of Tetuan and the University of Abdelmalek Essaadi. IEEE CIST is emerging as a key annual event that aims to serve as a forum to promote the exchange of the latest advances achieved by IT researchers, IT decision makers, IT managers, application designers and software engineers in the domain of information science and related technology. Computing challenges, models, applications and IT solutions will be discussed from the perspectives of academia, industry and government. In addition to the main conference topics, IEEE CIST will also provide a platform for supporting innovative and original contributions in three complementary disciplines that are: Arabic natural language processing, Information and multimedia processing and Internet of Things. We would like to extend our most sincere thanks and gratitude to the keynote speakers of IEEE CIST'14 for their important added value to this edition and to the Scientific Committee Members who helped us in the review process. We would like also to express our thanks to the IEEE Computer Society for their support through their Distinguished Lecturers Programs. We are also very glad to express our most sincere gratitude for the organizing committee members for their full dedication and professional organization of this edition. The success of this colloquium will be mainly attributed to the authors who contributed with their posters and talks. We hope that CIST will continue to offer a privileged context for participants to develop new ways and methods to achieve our objectives in advancing our research and projects. We can together achieve more and face more efficiently the challenges of the current millennium.}, PAGES = {440}, URL = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6996097}, VOLUME = {CFP1467R-ART}, DOI = {10.1109/CIST.2014.7016582}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-4799-5979-2}, } @EDITORIAL{FRANCESCONI_2014_EDITORIAL_FMPVW_310637, AUTHOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Venturi, G. and Wyner, A.}, TITLE = {Proceedings of the Fourth Workshop on Semantic Processing of Legal Texts}, YEAR = {2014}, PAGES = {33}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/workshops/LREC2014Workshop-SPLeT%20Proceedings.pdf}, PUBLISHER = {PARIGI: ELRA (Parigi, FRA)}, ISBN = {978-2-9517408-8-4}, } @EDITORIAL{PIRRELLI_2014_EDITORIAL_PR_300048, AUTHOR = {Pirrelli, V. and Raffaelli, I.}, TITLE = {Special Issue of Suvremena Lingvistika}, YEAR = {2014}, PAGES = {127-235}, URL = {https://publications.cnr.it/doc/300048}, PUBLISHER = {Croatian Philological Society (Zagreb, HRV)}, } @EDITORIAL{PRETORIUS_2014_EDITORIAL_PSB_285396, AUTHOR = {Pretorius, L. and Soria, C. and Baroni, P.}, TITLE = {Proceedings of the Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)}, YEAR = {2014}, ABSTRACT = {Proceedings del Workshop su Collaborazione e Computazione per le Lingue con Risorse Insufficienti nell'era dei Dati Aperti Collegati (CCURL 2014 | Reykjavik, 26/05/2014)}, KEYWORDS = {under-resourced languages}, PAGES = {107}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, } @INPROCEEDINGS{ABRATE_2014_INPROCEEDINGS_ADGLLMMPP_282569, AUTHOR = {Abrate, M. and Del Grosso, A. M. and Giovannetti, E. and Lo Duca, A. and Luzzi, D. and Mancini, L. and Marchetti, A. and Pedretti, I. and Piccini, S.}, TITLE = {Sharing Cultural Heritage: the Clavius on the Web Project}, YEAR = {2014}, ABSTRACT = {In the last few years the amount of manuscripts digitized and made available on the Web has been constantly increasing. However, there is still a considarable lack of results concerning both the explicitation of their content and the tools developed to make it available. The objective of the Clavius on the Web project is to develop a Web platform exposing a selection of Christophorus Clavius letters along with three different levels of analysis: linguistic, lexical and semantic. The multilayered annotation of the corpus involves a XML-TEI encoding followed by a tokenization step where each token is univocally identified through a CTS urn notation and then associated to a part-of-speech and a lemma. The text is lexically and semantically annotated on the basis of a lexicon and a domain ontology, the former structuring the most relevant terms occurring in the text and the latter representing the domain entities of interest (e.g. people, places, etc.). Moreover, each entity is connected to linked and non linked resources, including DBpedia and VIAF. Finally, the results of the three layers of analysis are gathered and shown through interactive visualization and storytelling techniques. A demo version of the integrated architecture was developed.}, KEYWORDS = {language technologies for digital cultural heritage, lexica and ontologies, data visualization}, PAGES = {8}, URL = {https://publications.cnr.it/doc/282569}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {LREC 2014-The 9th edition of the Language Resources and Evaluation Conference}, CONFERENCE_PLACE = {Reykjavik}, CONFERENCE_DATE = {26-31 maggio 2014}, } @INPROCEEDINGS{ANTICO_2014_INPROCEEDINGS_AQMM_286882, AUTHOR = {Antico, G. and Quochi, V. and Monachini, M. and Martinelli, M.}, TITLE = {Marrying Technical Writing with LRT}, YEAR = {2014}, ABSTRACT = {In the last years the Technical Writer operational scenarios and the workflow sensibly changed; specifically,"free style" writing - or manual writing - has become outdated and technical writing is now much more concerned with structured management of content than in the past. Technical writing has become more demanding due to a number of factors among which the rise and spread of mobile devices usage. This paper discusses the new needs of technical writing and content management business and how LRT can help it improve quality and productivity.}, KEYWORDS = {controlled language, technical writing, content management systems}, PAGES = {19-25}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may 2014}, EDITOR = {Isahara, H. and Lee, K. C. S. and Nam, S.}, } @INPROCEEDINGS{BARBAGLI_2014_INPROCEEDINGS_BLDMV_294078, AUTHOR = {Barbagli, A. and Lucisano, P. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Tecnologie del linguaggio e monitoraggio dell'evoluzione delle abilità di scrittura nella scuola secondaria di primo grado}, YEAR = {2014}, ABSTRACT = {L'ultimo decennio ha visto l'affermarsi a livello internazionale dell'uso di tecnologie del linguaggio per lo studio dei processi di apprendimento. Questo contributo, che si colloca all'interno di una ricerca più ampia di pedagogia sperimentale, riporta i primi e promettenti risultati di uno studio finalizzato al monitoraggio dell'evoluzione del processo di apprendimento della lingua italiana condotto a partire dalle produzione scritte degli studenti con strumenti di annotazione linguistica automatica e di estrazione di conoscenza.}, PAGES = {23-27}, URL = {http://www.italianlp.it/wp-content/uploads/2014/12/Tecnologie-del-linguaggio-per-la-scuola.pdf}, DOI = {10.12871/CLICIT201415}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics (CLiC-it 2014)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 dicembre 2014}, BOOKTITLE = {Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{BARTOLINI_2014_INPROCEEDINGS_BQDRM_286944, AUTHOR = {Bartolini, R. and Quochi, V. and De Felice, I. and Russo, I. and Monachini, M.}, TITLE = {From Synsets to Videos: Enriching ItalWordNet Multimodally}, YEAR = {2014}, ABSTRACT = {The paper describes the multimodal enrichment of ItalWordNet action verbs' entries by means of an automatic mapping with a conceptual ontology of action types instantiated by video scenes (ImagAct). The two resources present significative differences as well as interesting complementary features, such that a mapping of these two resources can lead to a an enrichment of IWN, through the connection between synsets and videos apt to illustrate the meaning described by glosses. Here, we describe an approach inspired by ontology matching methods for the automatic mapping of ImagAct video scenes onto ItalWordNet. The experiments described in the paper are conducted on Italian, but the same methodology can be extended to other languages for which WordNets have been created, since ImagAct is available also for English, Chinese and Spanish. This source of multimodal information can be exploited to design second language learning tools, as well as for language grounding in action recognition in video sources and potentially for robotics.}, KEYWORDS = {Action ontology, Multimodality, WordNet}, PAGES = {3110-3117}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {LREC 2014. European Language Resources Association ELRA: Paris (Francia)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{BELLANDI_2014_INPROCEEDINGS_BABBG_311736, AUTHOR = {Bellandi, A. and Albanesi, D. and Bellusci, A. and Bozzi, A. and Giovannetti, E.}, TITLE = {The Talmud System: a Collaborative Web Application for the Translation of the Babylonian Talmud Into Italian}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/311736}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics (CLiC-it 2014)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 dicembre 2014}, BOOKTITLE = {Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{BELLANDI_2014_INPROCEEDINGS_BBCG_282568, AUTHOR = {Bellandi, A. and Bellusci, A. and Cappelli, A. and Giovannetti, E.}, TITLE = {Graphic Visualization in Literary Text Interpretation}, YEAR = {2014}, ABSTRACT = {We here illustrate a possible approach combining existing technologies for Natural Language Processing (NLP), Knowledge Representation and Reasoning (KRR) and Data Visualization in a coherent Decision Support System (DSS). The approach to the development of the system we are working on can be articulated in two main steps: the customization and integration of existing tools for automatic text annotation (at least linguistic, lexicographic and semantic) and the construction of a user-friendly and highly expressive GUI. The interface should allow a user to: upload her/his own text, run the desired annotation tools, visually interact with the resulting multilayered network to: i) proof-read the results of the automatic annotations, ii) manually add missing elements and/or relations between elements and, finally, iii) formulate and verify specific interpretative hypotheses.}, KEYWORDS = {Computational Hermeneutics, Text processing, Knowledge Representation and Reasoning, Data Visualization}, PAGES = {392-397}, URL = {https://publications.cnr.it/doc/282568}, DOI = {10.1109/IV.2014.62}, PUBLISHER = {IEEE (New York, USA)}, CONFERENCE_NAME = {18th International Conference on Information Visualisation}, CONFERENCE_PLACE = {Parigi}, CONFERENCE_DATE = {15, 16, 17, 18 luglio 2014}, BOOKTITLE = {Information Visualisation}, EDITOR = {Banissi, E. and Bannatyne, M. W. M. and Marchese, F. T. and Sarfraz, M. and Ursyn, A. and Venturini, G. and Wyeld, T. G. and Cvek, U. and Trutschl, M. and Grinstein, G. and Geroimenko, V. and Kenderdine, S. and Bouali, F.}, } @INPROCEEDINGS{BELLANDI_2014_INPROCEEDINGS_BBCG_282571, AUTHOR = {Bellandi, A. and Bellusci, A. and Carniani, E. and Giovannetti, E.}, TITLE = {Content Elicitation: Towards a New Paradigm for the Analysis and Interpretation of Texts}, YEAR = {2014}, ABSTRACT = {In this paper we show how semantic technologies can be exploited, with the help of user friendly interfaces, to identify and structure the knowledge embedded in literary texts. The proposed approach, that we have called Content Elicitation, supports the experts in defining hierarchical and associative relationships between semantically annotated chunks of text denoting relevant entities, allowing visual structuring of knowledge, which can be edited by different experts in a collaborative way. This knowledge, formally coded as an ontology, can then be used by scholars and students as a guide for the analysis of the text and for the discovery of potential novel interpretations. We are testing and evaluating this approach on the Babylonian Talmud, due to its historical, linguistic, semantic and structural richness.}, KEYWORDS = {semantic annotation, knowledge representation, text ontology, content elicitation, literary computing, data visualization}, URL = {https://publications.cnr.it/doc/282571}, DOI = {10.2316/P.2014.810-031}, PUBLISHER = {Acta press (Calgary, CAN)}, CONFERENCE_NAME = {The 13th IASTED International Conference on Software Engineering}, CONFERENCE_PLACE = {Innsbruck}, CONFERENCE_DATE = {17-19 febbraio 2014}, EDITOR = {Hamza, M. H.}, } @INPROCEEDINGS{BELLUSCI_2014_INPROCEEDINGS_BBBCGM_311735, AUTHOR = {Bellusci, A. and Bellandi, A. and Benotto, G. and Cappelli, A. and Giovannetti, E. and Marchi, S.}, TITLE = {Towards a Decision Support System for Text Interpretation}, YEAR = {2014}, ABSTRACT = {This article illustrates the first steps towards the implementation of a Decision Support System aimed to recreate a research environment for scholars and provide them with computational tools to assist in the processing and interpretation of texts. While outlining the general characteristics of the system, the paper presents a minimal set of user requirements and provides a possible use case on Dante's Inferno.}, KEYWORDS = {DDS, XML, text interpretation, literary computing}, PAGES = {58-62}, URL = {http://clic.humnet.unipi.it/proceedings/vol1/CLICIT2014112.pdf}, VOLUME = {I}, DOI = {10.12871/CLICIT2014118}, ISBN = {9788867414727}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics (CLiC-it 2014)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 dicembre 2014}, BOOKTITLE = {Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{BIZZONI_2014_INPROCEEDINGS_BBDDMC_286958, AUTHOR = {Bizzoni, Y. and Boschetti, F. and Diakoff, H. and Del Gratta, R. and Monachini, M. and Crane, G.}, TITLE = {The Making of Ancient Greek WordNet}, YEAR = {2014}, ABSTRACT = {This paper describes the process of creation and review of a new lexico-semantic resource for the classical studies: AncientGreekWord- Net. The candidate sets of synonyms (synsets) are extracted from Greek-English dictionaries, on the assumption that Greek words translated by the same English word or phrase have a high probability of being synonyms or at least semantically closely related. The process of validation and the web interface developed to edit and query the resource are described in detail. The lexical coverage of Ancient Greek WordNet is illustrated and the accuracy is evaluated. Finally, scenarios for exploiting the resource are discussed.}, KEYWORDS = {Ancient Greek, Multilingualism, Classical Philology}, PAGES = {1140-1147}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, CONFERENCE_NAME = {LREC 2014. European Language Resources Association ELRA: Paris (Francia)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_B_288048, AUTHOR = {Boschetti, F.}, TITLE = {Acquisizione e Creazione di Risorse Plurilingui per gli Studi di Filologia Classica in Ambienti Collaborativi}, YEAR = {2014}, ABSTRACT = {Questo articolo illustra metodi e strumenti per l'acquisizione e l'estensione di risorse digitali plurilingui per gli studi classici, sviluppati in collaborazione tra il CoPhiLab dell'Ilc-Cnr e il Perseus Project della Tufts University. Si descrivono tre linee di intervento: a) la progettazione e l'implementazione di un sistema di correzione dell'output dell'Ocr applicato al Greco antico; b) la creazione e la valutazione di un nucleo di synsets per Ancient Greek WordNet e c) l'allineamento di un campione di testi greci e latini con le relative traduzioni italiane.}, KEYWORDS = {Greco Antico, OCR, WordNet, Allineamento}, PAGES = {55-67}, URL = {https://publications.cnr.it/doc/288048}, PUBLISHER = {CLEUP (Padova, ITA)}, ISBN = {9788867872602}, CONFERENCE_NAME = {AIUCD 2013}, CONFERENCE_DATE = {2014}, BOOKTITLE = {Collaborative Research Practices and Shared Infrastructures for Humanities Computing-2nd Aiucd Annual Conference, Aiucd 2013 Padua, Italy, 11-12 December 2013-Proceedings of Revised Papers}, EDITOR = {Agosti, M. and Tomasi, F.}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_B_288052, AUTHOR = {Boschetti, F.}, TITLE = {La localizzazione del Perseus Project in lingua italiana}, YEAR = {2014}, ABSTRACT = {Si illustra il progetto di localizzazione in lingua italiana dell'infrastruttura per lo studio dei classici greci e latini costituita dal Perseus Project (Tufts University, Medford, MA), usando risorse per l'analisi della nostra lingua sviluppate presso l'ILC-CNR di Pisa e mettendo a disposizione nuovi componenti software per la visualizzazione e l'annotazione di testi bilingui.}, KEYWORDS = {filologia computazionale, allineamento, lessico dinamico}, PAGES = {221-234}, URL = {http://digilab2.let.uniroma1.it/ojs/index.php/Quaderni_DigiLab/issue/view/12}, VOLUME = {24}, DOI = {10.13133/978-88-98533-27-5}, ISBN = {978-88-98533-27-5}, CONFERENCE_NAME = {AIUCD 2012}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {13-14 dicembre 2012}, BOOKTITLE = {Digital Humanities: progetti italiani ed esperienze di convergenza multidisciplinare-Atti del convegno annuale dell'Associazione per l'Informatica Umanistica e la Cultura Digitale (AIUCD) Firenze, 13-14 dicembre 2012}, EDITOR = {Ciotti, F.}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BCDLPPVML_288050, AUTHOR = {Boschetti, F. and Cimino, A. and Dell'Orletta, F. and Lebani, G. E. and Passaro, L. and Picchi, P. and Venturi, G. and Montemagni, S. and Lenci, A.}, TITLE = {Computational Analysis of Historical Documents: An Application to Italian War Bulletins in World War I and II}, YEAR = {2014}, ABSTRACT = {World War (WW) I and II represent crucial landmarks in the history on mankind: They have affected the destiny of whole generations and their consequences are still alive throughout Europe. In this paper we present an ongoing project to carry out a computational analysis of Italian war bulletins in WWI and WWII, by applying state-of-the-art tools for NLP and Information Extraction. The annotated texts and extracted information will be explored with a dedicated Web interface, allowing for multidimensional access and exploration of historical events through space and time.}, KEYWORDS = {World War I}, PAGES = {70-75}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/workshops/LREC2014Workshop-LRT4HDA%20Proceedings.pdf}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, CONFERENCE_NAME = {LREC 2014}, CONFERENCE_PLACE = {Reykjavik}, CONFERENCE_DATE = {26 May}, BOOKTITLE = {Proceedings of workshop on Language resources and technologies for processing and linking historical documents and archives-Deploying Linked Open Data in Cultural Heritage-LREC 2014, 26 May, Reykjavik, Iceland}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BDL_288070, AUTHOR = {Boschetti, F. and Del Gratta, R. and Lamé, M.}, TITLE = {Computer Assisted Annotation of Themes and Motifs in Ancient Greek Epigrams: First Steps}, YEAR = {2014}, ABSTRACT = {This paper aims at illustrating some tools to assist the manual annotation of themes and motifs in literary and epigraphic epigrams for the PRIN 2010/2011 Memorata Poetis Project.}, KEYWORDS = {Filologia collaborativa}, PAGES = {83-86}, URL = {http://clic.humnet.unipi.it/it/atti.html}, VOLUME = {1}, DOI = {10.12871/CLICIT2014158}, PUBLISHER = {Pisa University Press (Pisa, ITA)}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 dicembre 2014}, BOOKTITLE = {The First Italian Conference on Computational Linguistics-Proceedings}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{BRUNATO_2014_INPROCEEDINGS_B_311792, AUTHOR = {Brunato, D.}, TITLE = {Complessità necessaria o stereotipi del "burocratese"? Un'indagine sulla leggibilità del linguaggio amministrativo da una prospettiva linguistico-computazionale}, YEAR = {2014}, ABSTRACT = {Questo contributo intende presentare una metodologia di ricostruzione del profilo linguistico di un corpus di testi amministrativi basata sull'uso delle tecnologie linguistico-computazionali e finalizzata alla specializzazione di un indice di leggibilità "avanzato" sulle caratteristiche di questi testi. Tale metodologia, documentata in [3], si propone di indagare la variazione linguistica tramite il monitoraggio di parametri estratti automaticamente dal testo sottoposto ad analisi linguistica multi-livello. La complessità della lingua della pubblica amministrazione, soprattutto nei documenti rivolti al cittadino, è un problema ben noto e, malgrado le molteplici iniziative in favore di un linguaggio più chiaro ed efficace, tratti tipici del "burocratese" continuano a persistere, anche quando non imposti da requisiti di legittimità e precisione. Un ausilio alla semplificazione può venire dai sistemi per la misurazione della leggibilità del testo, come suggerito anche dai manuali di stile ispirati alla letteratura del Plain Language. Tuttavia le formule tradizionali, quali Gulpease [2], si limitano ad approssimare la complessità testuale, in quanto considerano esclusivamente parametri del testo superficiali, come la lunghezza della frase e della parola. Più recentemente, è emersa una nuova generazione di indici di leggibilità, fondati su metodologie di Trattamento Automatico del Linguaggio, che riescono a intercettare i luoghi di complessità del testo in maniera più granulare, computando un ampio spettro di parametri linguistici, che risultano anche maggiormente implicati nei processi di comprensione. È il caso di READ-IT [1], lo strumento utilizzato in questo studio. L'analisi linguistica ha esplorato la distribuzione di caratteristiche lessicali, morfo-sintattiche e sintattiche, estratte automaticamente da un "corpus parallelo monolingue" di testi amministrativi, ovvero internamente suddiviso in due sotto-corpora: uno costituito da testi autentici delle pubbliche amministrazioni e uno dalle relative versioni semplificate, frutto di un lavoro di riscrittura coordinato da linguisti. Queste caratteristiche, già risultate predittive del livello di leggibilità di testi giornalistici, sono state selezionate allo scopo di verificare l'incidenza delle peculiarità della scrittura amministrativa nella caratterizzazione della leggibilità. I risultati hanno permesso infatti di discriminare tra aspetti di complessità "ineliminabile" e tratti tipici del burocratese: i primi sono rappresentati da quei parametri che, pur indicativi di maggior complessità nella lingua comune, risultano similmente distribuiti nelle due sottovarietà; i secondi sono invece quelli che contraddistinguono solo la varietà dei testi originali, dunque quelli su cui hanno agito le riscritture. Questi dati potranno supportare tanto lo sviluppo di indici di leggibilità adattati alle peculiarità del linguaggio amministrativo, quanto le ricerche più attuali sulla semplificazione semiautomatica del testo.}, URL = {http://www.csfls.it/silfi2014/wp-content/uploads/2014/08/Atti-SILFI-2014-Riassunti.pdf}, ISBN = {978-88-96312-56-8}, CONFERENCE_NAME = {XIII Congresso della SILFI (Società Internazionale di Linguistica e Filologia Italiana)}, CONFERENCE_PLACE = {Palermo}, CONFERENCE_DATE = {22-24 settembre 2014}, BOOKTITLE = {La lingua variabile nei testi letterari, artistici e funzionali contemporanei. Analisi, interpretazione, traduzione}, EDITOR = {Di Studi Filologici, C. and Siciliani, L. and Di Scienze Umanistiche, D. and Di Palermo, U. D. S.}, } @INPROCEEDINGS{BRUNATO_2014_INPROCEEDINGS_BDVM_294073, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {Defining an annotation scheme with a view to automatic text simplification}, YEAR = {2014}, ABSTRACT = {This paper presents the preliminary steps of ongoing research in the field of automatic text simplification. In line with current approaches, we propose here a new annotation scheme specifically conceived to identify the typologies of changes an original sentence undergoes when it is manually simplified. Such a scheme has been tested on a parallel corpus available for Italian, which we have first aligned at sentence level and then annotated with simplification rules.}, PAGES = {87-92}, URL = {http://www.italianlp.it/wp-content/uploads/2014/12/Text-simplification.pdf}, DOI = {10.12871/CLICIT2014118}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics (CLiC-it 2014)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 dicembre 2014}, BOOKTITLE = {Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{CHIARELLA_2014_INPROCEEDINGS_CCMMM_282739, AUTHOR = {Chiarella, D. and Cutugno, P. and Marconi, L. and Morando, M. and Morgavi, G.}, TITLE = {La pesca, la caza, la agricultura y el bosque: una organización de datos lingüísticos por un léxico con referencia al mundo fang}, YEAR = {2014}, ABSTRACT = {El lenguaje es una forma privilegiada de desarrollo de ideas, expresión de sentimientos y conocimientos, enfrentarse con otros y también de cohesión social; el saber es generalmente procesado y transmitido a través del lenguaje. Cada persona está tan acostumbrada a vivir en su propia realidad cultural que la cree universal; cada cultura tiende a ponerse al centro del mundo y considerarse como un punto de referencia y medida de todas las otras. Un fenómeno evidente de la lengua fang es la falta de "palabras - conceptos" en sentido occidental, o palabras que abarcan una cantidad de objetos que tienen características en común.En la estructuración del diccionario hemos elegido algunos elementos de la cultura fang como organización y jerarquía social, constitución de la familia, ámbito económico, ámbito artístico, ámbito literario, ámbito de la medicina; además hemos examinado unos aspectos de la representación del tiempo y del espacio, algunas topologías de danza y juegos y unos elementos descriptivos del bosque.}, KEYWORDS = {Fang Spagnolo lessici specifici}, PAGES = {16}, URL = {https://publications.cnr.it/doc/282739}, PUBLISHER = {Centro Cultural Africano "Fernando Ortiz (Santiago de Cuba, CUB)}, ISBN = {9789592840195}, CONFERENCE_NAME = {XIII° Conferencia Internacional de Cultura Africana y Afroamericana}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {12-16 Aprile 2014}, } @INPROCEEDINGS{CIGNONI_2014_INPROCEEDINGS_CGMF_288032, AUTHOR = {Cignoni, L. and Giuffra, V. and Minozzi, S. and Fornaciari, G.}, TITLE = {CLIL Funerary Archaeology Courses for First-Cycle and Second-Cycle Degree Students}, YEAR = {2014}, ABSTRACT = {This paper reports on the differences between two specialized funerary archaeology courses conducted by a native language teacher from the Institute for Computational Linguistics of the National Research Council in Pisa and a subject specialist in paleopathology and funerary archaeology from the Division of Palaeopathology, Department of Translational Research on New Technologies in Medicine and Surgery of Pisa University. Lessons addressed to first cycle three-year Bachelor's degree undergraduates who were studying archaeology, art history, natural and environmental sciences took place in the second semester of the year 2012-2013. Classes in the same discipline and addressed to students from the same faculties had been held a year earlier for a second cycle twoyear Master's degree course. The classes were delivered in English using CLIL (exploitation of a vehicular foreign language to teach a special subject) associated with blended learning methodology (combination of face-to-face instructor-led training with web-based technology). Appropriate teaching materials selected by the two teachers covered a wide range of topics, from the study of death to ancient burials, rites, and dynamics of human settlements, as well as evidence of past human societies recovered by excavations. In particular, ancient Roman funerary customs (inhumation, cremation) and Medieval mortuary practices and burials were studied, alongside artifacts such as weapons, jewellery, and pottery vessels recovered from archaeological sites both in Italy and in Britain. Collaboration between language teacher and subject specialist was crucial for the selection of the reading and listening materials, for the correction of the oral and written work assigned to the students, and for the intervention on the part of the subject teacher to clarify points that had been raised, to assist the students during the individual presentations, pairwork or group discussions, and to encourage their work. Two researchers collaborating with the subject specialist also contributed to the lessons by presenting studies they had performed in their area of expertise and by assisting the students during the discussions. These student-centred tasks were aimed at accomplishing important educational goals such as student motivation, improved cognitive and academic performance, enhanced access to online learning resources, peer learning and collaboration. The 2012-2013 course proved to be much more interactive and challenging than the previous one, owing to the major emphasis given to the more practical aspects, in preparation for the fieldwork in archaeology and bioarchaeology, which was carried out in the summer of 2013, working with their peers from Ohio State University and other Universities in the USA, Canada and Australia. Particular attention was devoted to the language of funerary archaeology, and the trainees extracted definitions from the texts they were using to enrich an ongoing English-Italian glossary of funerary archaeology terms. The most important items and sentence structures of the English language were studied and revised, and an English grammar containing contextualized examples drawn from specialized works in that domain was enriched with new material. Student exchanges under different European and international programmes have emphasized on the need for specialist knowledge in specific thematic areas, alongside an oral and written command of a foreign language.}, KEYWORDS = {funerary archaeology, CLIL, Roman and Medieval archaeology, University education, collaborative learning}, URL = {https://publications.cnr.it/doc/288032}, PUBLISHER = {International Association of Technology, Education and Development (IATED) Academy (www. iated. org, ESP)}, ISBN = {978-84-616-8412-0}, CONFERENCE_NAME = {INTED (International Technology, Education and Development Conference)}, CONFERENCE_PLACE = {Valencia}, CONFERENCE_DATE = {10-12 March 2014}, } @INPROCEEDINGS{CIGNONI_2014_INPROCEEDINGS_CMS_281790, AUTHOR = {Cignoni, L. and Marinelli, R. and Spadoni, G. P.}, TITLE = {A CLIL/blended learning approach for cruise tourism courses in Italy using lexical/semantic databases and information technology resources}, YEAR = {2014}, ABSTRACT = {In this paper we discuss the possibility of exploiting specialized texts for cruise shipping, hotel and catering management courses in English to be held in nautical and other senior high schools in Italy, more and more involved in the promising and strongly developing field of tourism. The courses will be carried out using a content and language integrated (CLIL) approach, and will be run by native language teachers working alone or in collaboration with instructional supporting experts in the different sectors. The aims of the courses are to study the maritime terminology related to ships and navigation (crew members, safety and security systems on board, etc.) and, in particular, to the cruise ship industry and hospitality operations environment (passenger mobility on board and ashore during excursions, etc.). Students will become acquainted with the language of routine operations, giving directions, understanding commands in emergency situations, reporting on weather forecasts, and with the terms and definitions belonging to the tourist activity specialized in the management of cruise ships and passengers. The scarce number of text books available for maritime English makes it necessary to supply Italian students with a variety of material in paper and computer format, so as to help them expand their vocabulary in the foreign language with greater confidence and proficiency. The texts will include shipping and cruise shipping books and magazines, manuals, contracts, technical documents, cruise line and tour operator websites, passenger blogs, and other texts of the cruise community. The trainees, constantly exposed to the language, will work individually, in pairs and in groups, at the presence of English teachers and operators in the field of cruise tourism, and will perform activities that cover the four communication skills of reading, writing, listening and speaking (gap filling, matching, summarizing, etc.). They will use modern technological equipment including computers, ipads, and other devices, incorporated in the classrooms according to a blended learning approach, which combines face-to-face and on-line education. Students can explore the meanings of single words by consulting the English lexical semantic database WordNet implemented at Princeton University, alongside the Italian terminological database Mariterm containing data belonging to the navigation and sea transport domains, as well as visualized images. Both databases are managed by user-friendly tools that can be easily accessed by teachers and students. Mariterm can be constantly enriched and updated with new information in the different sectors of maritime English. Finally, a grammar illustrating the most important items of the English language will be made available to the students, who can copy it on a file and expand it with contextualized examples extracted from the texts they will be reading and share the outcomes with their peers. English has been internationally accepted as the language for communication, and is therefore particularly important for exchanges among those who wish to work in the tourism industry.}, KEYWORDS = {maritime English, tourism, CLIL, lexical semantic databases, blended learning}, PAGES = {6552-6559}, URL = {https://publications.cnr.it/doc/281790}, VOLUME = {1}, PUBLISHER = {International Association of Technology, Education and Development (IATED) (Valencia, ESP)}, ISBN = {978-84-616-8412-0}, CONFERENCE_NAME = {INTED2014. 8th International Technology, Education and Development Conference}, CONFERENCE_PLACE = {Valencia (Spain)}, CONFERENCE_DATE = {10th-12th of March, 2014}, BOOKTITLE = {INTED2014. 8th International Technology, Education and Development Conference. Valencia (Spain), 10th-12th of March, 2014. Proceedings}, EDITOR = {Chova, L. G. and Martínez, A. L. and Torres, I. C.}, } @INPROCEEDINGS{CIMINO_2014_INPROCEEDINGS_CCDT_294105, AUTHOR = {Cimino, A. and Cresci, S. and Dell'Orletta, F. and Tesconi, M.}, TITLE = {Linguistically-motivated and Lexicon Features for Sentiment Analysis of Italian Tweets}, YEAR = {2014}, ABSTRACT = {In this paper we describe our approach to EVALITA 2014 SENTIment POLarity Classification (SENTIPOLC) task. We participated only in the Polarity Classification sub-task. By resorting to a wide set of general-purpose features qualifying the lexical and grammatical structure of a text, automatically created ad-hoc lexicons and existing free available resources, we achieved the second best accuracy.}, KEYWORDS = {Lexicons resources}, URL = {https://publications.cnr.it/doc/294105}, CONFERENCE_NAME = {The 4th Conference for Evaluation of NLP and Speech Tools for Italian (EVALITA)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2014}, } @INPROCEEDINGS{DEFELICE_2014_INPROCEEDINGS_DBRQM_291282, AUTHOR = {De Felice, I. and Bartolini, R. and Russo, I. and Quochi, V. and Monachini, M.}, TITLE = {Evaluating ImagAct-WordNet mapping for English and Italian through videos}, YEAR = {2014}, ABSTRACT = {In this paper we present the results of the evaluation of an automatic mapping between two lexical resources, WordNet/ItalWordNet and ImagAct, a conceptual ontology of action types instantiated by video scenes. Results are compared with those obtained from a previous experiment performed only on Italian data. Differences between the two evaluation strategies, as well as between the quality of the mappings for the two languages considered in this paper, are iscussed.}, KEYWORDS = {Language Resources (LRs)}, PAGES = {128-131}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-CLICit-2014.pdf}, DOI = {10.12871/CLICIT2014126}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-88-67-41472-7}, CONFERENCE_NAME = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 \& the Fourth International Workshop EVALITA 2014. Pisa University Press srl: Pisa (Italia)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 December 2014, Pisa}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{DEFELICE_2014_INPROCEEDINGS_DDM_292073, AUTHOR = {De Felice, I. and Donati, M. and Marotta, G.}, TITLE = {CLaSSES: a new digital resource for Latin epiraphy}, YEAR = {2014}, ABSTRACT = {CLaSSES (Corpus for Latin Sociolinguistic Studies on Epigraphic textS) is an annotated corpus for quantitative and qualitative sociolinguistic analyses on Latin inscriptions. It allows specific researches on phonological and morphophonological phenomena of non-standard Latin forms with crucial reference to the typology of the text, its origin and chronological collocation. This paper presents the first macrosection of CLaSSES, focused on the inscriptions from the archaicearly period.}, PAGES = {132-137}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-EVALITA-2014.pdf}, PUBLISHER = {Pisa University Press (Pisa, ITA)}, CONFERENCE_NAME = {CLiC-it. La Prima Conferenza Italiana di Linguistica Computazionale}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {9-10/12/2014}, BOOKTITLE = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 and the Fourth International Workshop EVALITA 2014}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{DELGRATTA_2014_INPROCEEDINGS_DFKMS_285395, AUTHOR = {Del Gratta, R. and Frontini, F. and Khan, F. and Mariani, J. and Soria, C.}, TITLE = {The LREMap for Under-Resourced Languages}, YEAR = {2014}, ABSTRACT = {A complete picture of currently available language resources and technologies for the under-resourced languages of Europe is still lacking. Yet this would help policy makers, researchers and developers enormously in planning a roadmap for providing all languages with the necessary instruments to act as fully equipped languages in the digital era. In this paper we introduce the LRE Map and show its utility for documenting available language resources and technologies for under-resourced languages. The importance of the serialization of the LREMap into (L)LOD along with the possibility of its connection to a wider world is also introduced.}, KEYWORDS = {language resources, less-resourced languages, linguistic linked open data}, PAGES = {78-83}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, CONFERENCE_NAME = {Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)}, CONFERENCE_PLACE = {Reykjavik}, CONFERENCE_DATE = {26/05/2014}, BOOKTITLE = {Proceedings of the Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)}, EDITOR = {Pretorius, L. and Soria, C. and Baroni, P.}, } @INPROCEEDINGS{DELGRATTA_2014_INPROCEEDINGS_DN_318313, AUTHOR = {Del Gratta, R. and Nahli, O.}, TITLE = {Enhancing Arabic WordNet with the use on Princeton WordNet and a bilingual dictionary}, YEAR = {2014}, ABSTRACT = {This paper describes an heuristic-based approach to enhance existing WordNets with freely available bilingual resources. The approach has been applied to the Arabic WordNet using the AraMorph bilingual dictionary as bilingual resource, but its guidelines are quite general to be effectively applied to other languages. The English words extracted from the bilingual resource are checked against Princeton WordNet in order to quantify their coverage and to select only those words which share the same set of synsets. This strongly reduces the number of Arabic words of the pairs. These latter are then checked against the Arabic WordNet to make new words emerge and -possibly- add new synonyms.}, KEYWORDS = {WordNet, Arabic, English, Bilingual Resource, Enhancement}, PAGES = {278-284}, URL = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=\&arnumber=7016632}, DOI = {10.1109/CIST.2014.7016632}, PUBLISHER = {IEEE Communications Society (Piscataway, USA)}, ISBN = {978-1-4799-5978-5}, CONFERENCE_NAME = {ANLP IEEE CIST14}, CONFERENCE_PLACE = {Tetuan, Morocco}, CONFERENCE_DATE = {20-22/10/ 2014}, BOOKTITLE = {3rd International IEEE Colloquium on Information Science and Technology; From 20th to 22nd of October 2014 Tetuan-Chefchaouen Morocco}, } @INPROCEEDINGS{DELGRATTA_2014_INPROCEEDINGS_DPS_281039, AUTHOR = {Del Gratta, R. and Pardelli, G. and Sara, G.}, TITLE = {The LRE Map disclosed}, YEAR = {2014}, ABSTRACT = {This paper describes a serialization of the LRE Map database according to the RDF model. Due to the peculiar nature of the LRE Map, many ontologies are necessary to model the map in RDF, including newly created and reused ontologies. The importance of having the LRE Map in RDF and its connections to other open resources is also addressed.}, KEYWORDS = {Language Resource, LOD, Metadata}, PAGES = {3534-3541}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {EUROPEAN LANGUAGE RESOURCES ASSOC-ELRA FRANCE (Parigi, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {Ninth International Conference on Language Resources and Evaluation (LREC'14)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may 2014}, BOOKTITLE = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{DELGROSSO_2014_INPROCEEDINGS_DMMP_288069, AUTHOR = {Del Grosso, A. M. and Marchi, S. and Murano, F. and Pesini, L.}, TITLE = {A collaborative tool for philological research: experiments on Ferdinand de Saussure's manuscripts}, YEAR = {2014}, ABSTRACT = {The present paper describes a philological-computational tool developed by the Istituto di Linguistica Computazionale (ilc - cnr) of Pisa, aimed at creating a digital edition of Ferdinand de Saussure's unpublished manuscripts. Since the use of a digital edition and of the most modern computer technology allows a more in-depth research, the ilc is developing a set of digital tools in order to take ad- vantage of both the documents and the related information added by the scientific community. The integration exploits the Java enterprise platform by organizing the different features in modules. Thus, the tool meets the following requirements: (i) converting legacy digital resources into valid Xml documents (tei compliant); (ii) parallel visualization among imported texts and related images; (iii) search and in- dexing; (iv) handling of variant readings; and (v) collaborative annotation.}, KEYWORDS = {Computational and collaborative philology}, PAGES = {163-175}, URL = {https://publications.cnr.it/doc/288069}, PUBLISHER = {CLEUP (Padova, ITA)}, ISBN = {978-88-6787-260-2}, CONFERENCE_NAME = {Aiucd}, CONFERENCE_PLACE = {Padova}, CONFERENCE_DATE = {11-12 december 2013}, BOOKTITLE = {Collaborative Research Practices and Shared Infrastructures for Humanities Computing}, EDITOR = {Agosti, M. and Tomasi, F.}, } @INPROCEEDINGS{DELGROSSO_2014_INPROCEEDINGS_DN_295187, AUTHOR = {Del Grosso, A. M. and Nahli, O.}, TITLE = {Towards a flexible open-source software library for multi-layered scholarly textual studies: An Arabic case study dealing with semi-automatic language processing}, YEAR = {2014}, ABSTRACT = {This paper presents both the general model and a case study of the Computational and Collaborative Philology Library (CoPhiLib), an ongoing initiative underway at the Institute for Computational Linguistics (ILC) of the National Research Council (CNR), Pisa, Italy. The library, designed and organized as a reusable, abstract and open-source software component, aims at solving the needs of multi-lingual and cross-lingual analysis by exposing common Application Programming Interfaces (APIs). The core modules, coded by the Java programming language, constitute the groundwork of a Web platform designed to deal with textual scholarly needs. The Web application, implemented according to the Java Enterprise specifications, focuses on multi-layered analysis for the study of literary documents and related multimedia sources. This ambitious challenge seeks to obtain the management of textual resources, on the one hand by abstracting from current language, on the other hand by decoupling from the specific requirements of single projects. This goal is achieved thanks to methodologies declared by the "agile process", and by putting into effect suitable use case modeling, design patterns, and component-based architectures. The reusability and flexibility of the system have been tested on an Arabic case study: the system allows users to choose the morphological engine (such as AraMorph or Al-Khalil), along with linguistic granularity (i.e. with or without declension). Finally, the application enables the construction of annotated resources for further statistical engines (training set).}, KEYWORDS = {Design, Information Engineering, Design Patterns, Text Processing, Arabic Natural Language Processing}, PAGES = {285-290}, URL = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?tp=\&arnumber=7016633\&queryText%3Ddel+grosso+philology}, DOI = {10.1109/CIST.2014.7016633}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-4799-5978-5}, CONFERENCE_NAME = {Third IEEE International Colloquium in Information Science and Technology (CIST)}, CONFERENCE_PLACE = {Tetuan, Morocco}, CONFERENCE_DATE = {20-22/10/2014}, BOOKTITLE = {IEEE Cinference Publications-Catalog Number: CFP1467R-ART}, EDITOR = {El Mohajir, M. and Al Achhab, M. and Chahhou, M. and Mounir, A. and El Mohajir, B. and Pirrelli, V. and Zarghili, A. and Elfar, M.}, } @INPROCEEDINGS{DELLORLETTA_2014_INPROCEEDINGS_DVCM_285670, AUTHOR = {Dell'Orletta, F. and Venturi, G. and Cimino, A. and Montemagni, S.}, TITLE = {T2K: a System for Automatically Extracting and Organizing Knowledge from Texts}, YEAR = {2014}, ABSTRACT = {In this paper, we present T2K, a suite of tools for automatically extracting domain-specific knowledge from collections of Italian and English texts. T2K (Text-To-Knowledge v2) relies on a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine learning which are dynamically integrated to provide an accurate and incremental representation of the content of vast repositories of unstructured documents. Extracted knowledge ranges from domain-specific entities and named entities to the relations connecting them and can be used for indexing document collections with respect to different information types. T2K also includes "linguistic profiling" functionalities aimed at supporting the user in constructing the acquisition corpus, e.g. in selecting texts belonging to the same genre or characterized by the same degree of specialization or in monitoring the "added value" of newly inserted documents. T2K is a web application which can be accessed from any browser through a personal account which has been tested in a wide range of domains.}, KEYWORDS = {Natural Language Processing, Information Extraction, Knowledge Management}, PAGES = {2062-2070}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/590_Paper.pdf}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Reykjavik}, CONFERENCE_DATE = {26-31 maggio 2014}, } @INPROCEEDINGS{DELLORLETTA_2014_INPROCEEDINGS_DWCVM_294084, AUTHOR = {Dell'Orletta, F. and Wieling, M. and Cimino, A. and Venturi, G. and Montemagni, S.}, TITLE = {Assessing the readability of sentences: which corpora and features?}, YEAR = {2014}, ABSTRACT = {The paper investigates the problem of sentence readability assessment, which is modelled as a classification task, with a specific view to text simplification. In particular, it addresses two open issues connected with it, i.e. the corpora to be used for training, and the identification of the most effective features to determine sentence readability. An existing readability assessment tool developed for Italian was specialized at the level of training corpus and learning algorithm. A maximum entropy-based feature selection and ranking algorithm (grafting) was used to identify to the most relevant features: it turned out that assessing the readability of sentences is a complex task, requiring a high number of features, mainly syntactic ones.}, PAGES = {163-173}, URL = {http://acl2014.org/acl2014/W14-18/pdf/W14-1820.pdf}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-941643-03-7}, CONFERENCE_NAME = {9th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2014)}, CONFERENCE_PLACE = {Baltimore, Maryland, USA}, CONFERENCE_DATE = {26 giugno 2014}, BOOKTITLE = {Proceedings of 9th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2014)}, } @INPROCEEDINGS{FERRARI_2014_INPROCEEDINGS_FDSG_294419, AUTHOR = {Ferrari, A. and Dell'Orletta, F. and Spagnolo, G. O. and Gnesi, S.}, TITLE = {Measuring and improving the completeness of natural language requirements}, YEAR = {2014}, ABSTRACT = {[Context and motivation] System requirements specifications are normally written in natural language. These documents are required to be complete with respect to the input documents of the requirements definition phase, such as preliminary specifications, transcripts of meetings with the customers, etc. In other terms, they shall include all the relevant concepts and all the relevant interactions among concepts expressed in the input documents. [Question/Problem] Means are required to measure and improve the completeness of the requirements with respect to the input documents. [Principal idea/results] To measure this completeness, we propose two metrics that take into account the relevant terms of the input documents, and the relevant relationships among terms. Furthermore, to improve the completeness, we present a natural language processing tool named Completeness Assistant for Requirements (CAR), which supports the definition of the requirements: the tool helps the requirements engineer in discovering relevant concepts and interactions. [Contribution] We have performed a pilot test with CAR, which shows that the tool can help improving the completeness of the requirements with respect to the input documents. The study has also shown that CAR is actually useful in the identification of specific/alternative system behaviours that might be overseen without the tool. © 2014 Springer International Publishing Switzerland.}, KEYWORDS = {natural language processing, relation extraction, Requirements analysis}, PAGES = {23-38}, URL = {https://link.springer.com/chapter/10.1007%2F978-3-319-05843-6_3#citeas}, VOLUME = {8396}, DOI = {10.1007/978-3-319-05843-6_3}, ISBN = {978-3-319-05843-6}, CONFERENCE_NAME = {REFSQ 2014, Requirements Engineering: Foundation for Software Quality. 20th International Working Conference}, CONFERENCE_PLACE = {Essen, Germany}, CONFERENCE_DATE = {7-10 April 2014}, BOOKTITLE = {Requirements Engineering: Foundation for Software Quality 20th International Working Conference, REFSQ 2014, Essen, Germany, April 7-10, 2014. Proceedings}, EDITOR = {Salinesi, C. and Van De Weerd, I.}, } @INPROCEEDINGS{FRONTINI_2014_INPROCEEDINGS_FQM_291452, AUTHOR = {Frontini, F. and Quochi, V. and Monachini, M.}, TITLE = {Polysemy alternations extraction using the PAROLE SIMPLE CLIPS Italian lexicon}, YEAR = {2014}, ABSTRACT = {This paper presents the results of an experiment of polysemy alternations induction from a lexicon (Utt and Pad´o, 2011; Frontini et al., 2014), discussing the results and proposing an amendment in the original algorithm.}, KEYWORDS = {Language Resources and Technologies}, PAGES = {175-179}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-CLICit-2014.pdf}, DOI = {10.12871/CLICIT2014134}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-88-67-41472-7}, CONFERENCE_NAME = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 \& the Fourth International Workshop EVALITA 2014}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 December 2014, Pisa}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{FRONTINI_2014_INPROCEEDINGS_FQPUM_286984, AUTHOR = {Frontini, F. and Quochi, V. and Padó, S. and Utt, J. and Monachini, M.}, TITLE = {Polysemy Index for Nouns: an Experiment on Italian using the PAROLE SIMPLE CLIPS Lexical Database}, YEAR = {2014}, ABSTRACT = {An experiment is presented to induce a set of polysemous basic type alternations (such as ANIMAL-FOOD, or BUILDING-INSTITUTION) by deriving them from the sense alternations found in an existing lexical resource. The paper builds on previous work and applies those results to the Italian lexicon PAROLE SIMPLE CLIPS. The new results show how the set of frequent type alternations that can be induced from the lexicon is partly different from the set of polysemy relations selected and explicitly applied by lexicographers when building it. The analysis of mismatches shows that frequent type alternations do not always correspond to prototypical polysemy relations, nevertheless the proposed methodology represents a useful tool offered to lexicographers to systematically check for possible gaps in their resource.}, KEYWORDS = {Polysemy, lexical resources, semantics}, PAGES = {2955-2963}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, BOOKTITLE = {LREC 2014 Ninth International Conference on Language Resources and Evaluation Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{GAGGIOLI_2014_INPROCEEDINGS_GCSPTBCFCTDGTRR_283374, AUTHOR = {Gaggioli, A. and Cipresso, P. and Serino, S. and Pioggia, G. and Tartarisco, G. and Baldus, G. and Corda, D. and Ferro, M. and Carbonaro, N. and Tognetti, A. and De Rossi, D. and Giakoumis, D. and Tzovaras, D. and Riera, A. and Riva, G.}, TITLE = {A decision support system for real-time stress detection during virtual reality exposure}, YEAR = {2014}, ABSTRACT = {Virtual Reality (VR) is increasingly being used in combination with psycho-physiological measures to improve assessment of distress in mental health research and therapy. However, the analysis and interpretation of multiple physiological measures is time consuming and requires specific skills, which are not available to most clinicians. To address this issue, we designed and developed a Decision Support System (DSS) for automatic classification of stress levels during exposure to VR environments. The DSS integrates different biosensor data (ECG, breathing rate, EEG) and behavioral data (body gestures correlated with stress), following a training process in which self-rated and clinical-rated stress levels are used as ground truth. Detected stress events for each VR session are reported to the therapist as an aggregated value (ranging from 0 to 1) and graphically displayed on a diagram accessible by the therapist through a web-based interface.}, KEYWORDS = {Psychological Stress, Psychophysiology, Virtual Reality, Decision Support System, Biosensors}, PAGES = {114-120}, URL = {https://publications.cnr.it/doc/283374}, VOLUME = {196}, DOI = {10.3233/978-1-61499-375-9-114}, PUBLISHER = {IOS Press (Tokyo, Paesi Bassi)}, ISSN = {0926-9630}, CONFERENCE_NAME = {Medicine Meets Virtual Reality (MMVR21)}, BOOKTITLE = {Medicine Meets Virtual Reality}, EDITOR = {Westwood, J. D.}, } @INPROCEEDINGS{GOGGI_2014_INPROCEEDINGS_GPGB_280394, AUTHOR = {Goggi, S. and Pardelli, G. and Giannini, S. and Biagioni, S.}, TITLE = {Grey Literature in European Commission Projects}, YEAR = {2014}, ABSTRACT = {The survey is focused on the documentation produced by the European Commission (EC) projects involved in the Framework Programme for Research and Technological Development (hereafter FP7) and managed by the Italian National Research Council (hereafter CNR). In particular, the Grey Literature (GL) available on CORDIS and European Projects websites was analysed. In order to verify how it is managed and whether it is compliant with EC recommendations, some categories were introduced to identify, measure and evaluate the usability and availability of projects production. Data was obtained from a sample of European projects websites.}, KEYWORDS = {Grey Literature. European Commission Projects, A. 1 INTRODUCTORY AND SURVEY}, PAGES = {98-109}, URL = {https://publications.cnr.it/doc/280394}, VOLUME = {15}, ISBN = {978-90-77484-22-7}, CONFERENCE_NAME = {GL15-Fifteenth International Conference on Grey Literature. The Grey Audit: a Field Assessment in Grey Literature}, CONFERENCE_PLACE = {Bratislava, Slovakia}, CONFERENCE_DATE = {2-3 December 2013}, BOOKTITLE = {The Grey Audit: a Field Assessment in Grey Literature}, EDITOR = {Farace, D. J. and Frantzen, J. and Service, G. I. L. N.}, } @INPROCEEDINGS{KHAN_2014_INPROCEEDINGS_KBF_286824, AUTHOR = {Khan, F. and Boschetti, F. and Frontini, F.}, TITLE = {Using lemon to Model Lexical Semantic  Shift in Diachronic Lexical Resources}, YEAR = {2014}, ABSTRACT = {In this paper we propose a model, called lemonDIA, for representing lexical semantic change using the lemon framework and based on the ontological notion of the perdurant. Namely we extend the notion of sense in lemon by adding a temporal dimension and then define a class of perdurant entities that represents a shift in meaning of a word and which contains different related senses. We start by discussing the general problem of semantic shift and the utility of being able to easily access and represent such information in diachronic lexical resources. We then describe our model and illustrate it with examples.}, KEYWORDS = {lemon, linked data, OWL, ontologies, perdurants, semantic shift}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/workshops/LREC2014Workshop-LDL2014%20Proceedings.pdf}, CONFERENCE_NAME = {3rd Workshop on Linked Data in Linguistics: Multilingual Knowledge Resources and Natural Language Processing (LDL2014)}, CONFERENCE_PLACE = {Reykjavik}, CONFERENCE_DATE = {May 27th, 2014}, BOOKTITLE = {Proceedings of the 3rd Workshop on Linked Data in Linguistics (LDL-2014)}, EDITOR = {Chiarcos, C. and McCrae, J. P. and Osenova, P. and Vertan, C.}, } @INPROCEEDINGS{LAM_2014_INPROCEEDINGS_LT_319595, AUTHOR = {Lamé, M. and Tanca, C.}, TITLE = {Hi-storytelling: Street Museum & Speaking Stones! A Study Case}, YEAR = {2014}, ABSTRACT = {How could museums go down the streets, taking advantage of the historical primary sources, scattered everywhere, impossible to bring back inside the collections, such as inscriptions in situ or reused? We explore the inscription as a dispositive of information and communication and we apply its message to a fictional story telling on contemporary social networks.}, KEYWORDS = {epigrafia, epigrafica digitale, dispostivo epigrafico, social network, musei, storytelling}, URL = {http://mwf2014.museumsandtheweb.com/paper/hi-storytelling-street-museum-speaking-stones-a-study-case/}, CONFERENCE_NAME = {MWF2014: Museums and the Web Florence 2014}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {18-21 febbraio 2014}, EDITOR = {Cherry, R. and Proctor, N.}, } @INPROCEEDINGS{LYDING_2014_INPROCEEDINGS_LSBBCDDLP_289308, AUTHOR = {Lyding, V. and Stemle, E. and Borghetti, C. and Brunello, M. and Castagnoli, S. and Dell'Orletta, F. and Dittmann, H. and Lenci, A. and Pirrelli, V.}, TITLE = {The PAISÀ Corpus of Italian Web Texts}, YEAR = {2014}, ABSTRACT = {PAIS`A is a Creative Commons licensed, large web corpus of contemporary Italian. We describe the design, harvesting, and processing steps involved in its creation.}, PAGES = {36-43}, URL = {http://aclweb.org/anthology/W14-04}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, CONFERENCE_NAME = {Corpus annotation, Tree-bank, Corpus design, Corpus harvesting}, CONFERENCE_PLACE = {Gothenburg. Sweden}, CONFERENCE_DATE = {April 26, 2014}, BOOKTITLE = {Proceedings of the 9th Web as Corpus Workshop (WaC-9)}, EDITOR = {Bildhauer, F. and Schäfer, R.}, } @INPROCEEDINGS{MARCONI_2014_INPROCEEDINGS_M_286627, AUTHOR = {Marconi, L.}, TITLE = {La tecnología como auxilio en la creación de un diccionario de una lengua oral de Africa}, YEAR = {2014}, ABSTRACT = {Questo studio, riferito alla lingua fang della Guinea Equatoriale, intende mostrare come la tecnologia possa fornire un valido aiuto nello sviluppo di strumenti per la salvaguardia di lingue orali. Il fang, parlato da più di 1000000 di persone, è parlato soprattutto in Gabon. Camerun, Giunea Equatoriale e nell'estremo nordest del Congo e a Sao Tomé. Si considera suddiviso in sei dialetti principali: ntumu, okak, achí, meké, mveñ, nzaman. In Guinea Equatoriale risulta la più parlata tra le lingue nazionali e quella più omogenea, ciò nonostante si possono individuare due varietà: l'oka e l'ntumu. Il fang, lingua quasi esclusivamente orale, nonostante i numerosi parlanti è in pericolo di estinzione poiché non sta realizzando una evoluzione autonoma e usa lo spagnolo per descrivere elementi della società attuale; il fang è la lingua della comunicazione quotidiana, usata nelle comunicazioni private, in famiglia e nelle situazioni di lavoro non intellettuale. Il fattore più potente nella salvaguardia di una lingua è quello di utilizzarla, anche se in modo non perfetto, la sua introduzione in ogni occasione della vita fino a che il suo uso sia percepito come qualcosa di naturale e non percepito come artificiale; è essenziale quindi dirigere gli sforzi della trasmissione della lingua e della cultura alle generazioni più giovani realizzando strumenti (dizionari, file audio, ecc.) per proteggere la lingua e la cultura. Nell'articolo vengono fatte considerazioni sui dizionari cartacei ed elettronici, descritti gli scopi per la realizzazione di un dizionario elettronico, individuati i possibili destinatari del dizionario bilingue e descritte le linee guida per la progettazione, l'implementazione e la consultazione. Viene infine descritto sommariamente il dizionario realizzato fornendo informazioni sull'alfabeto adottato, sulla categorizzazione delle parole, sul tono, sulle varietà considerate e su elementi inclusi nella parte elettronica quali il suono e infine una descrizione degli ingressi sia per la parte fang-spagnolo che per la parte spagnolo-fang.}, KEYWORDS = {diccionarios electrónicos, lengua fang, español}, URL = {https://publications.cnr.it/doc/286627}, PUBLISHER = {Ediciones Catedra 2014 (Santiago de Cuba, CUB)}, ISBN = {9789592840195}, CONFERENCE_NAME = {XIII° Conferencia Internacional de Cultura Africana y Afroamericana}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {12-16 Aprile 2014}, } @INPROCEEDINGS{MARZI_2014_INPROCEEDINGS_MNF_295178, AUTHOR = {Marzi, C. and Nahli, O. and Ferro, M.}, TITLE = {Word Processing for Arabic Language: A reappraisal of morphology induction through adaptive memory self-organisation strategies}, YEAR = {2014}, ABSTRACT = {Modelling the mental lexicon focuses on processing and storage dynamics, since lexical organisation relies on the process of input recoding and adaptive strategies for long-term memory organisation. A fundamental issue in word processing is represented by the emergence of the morphological organisation level in the lexicon, based on paradigmatic relations between fully-stored word forms. Morphology induction can be defined as the task of identifying morphological formatives within morphologically complex word forms. In the computational framework we propose here (TSOMs), based on Self-Organising Maps with Hebbian connections defined over a temporal layer, the identification/perception of surface morphological relations involves the alignment of recoded representations of morphologically-related input words. Facing a non-concatenative morphology such as the Arabic inflectional system prompts a reappraisal of morphology induction through adaptive organisation strategies, which affect both lexical representations and long-term storage. We will show how a strongly adaptive self-organisation during training is conducive to emergent relations between stored word forms, and to high accuracy rates in generalising knowledge of stored words to unknown forms.}, KEYWORDS = {Non-concatenative morphological structure, lexical storage and access, SOMs, word recoding and processing, adaptive strategies, morphology}, PAGES = {241-247}, URL = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=7016626\&punumber%3D6996097}, DOI = {10.1109/CIST.2014.7016626}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-4799-5979-2}, CONFERENCE_NAME = {Third IEEE International Colloquium in Information Science and Technology (CIST)}, CONFERENCE_PLACE = {Tetuan (Morocco)}, CONFERENCE_DATE = {20-22/10/2014}, BOOKTITLE = {IEEE Conference Publications-Catalog Number: CFP1467R-ART}, EDITOR = {El Mohajir, M. and Al Achhab, M. and Chahhou, M. and Mounir, A. and El Mohajir, B. and Pirrelli, V. and Zarghili, A. and Elfar, M.}, } @INPROCEEDINGS{MONEGLIA_2014_INPROCEEDINGS_MBFGKMP_286990, AUTHOR = {Moneglia, M. and Brown, S. and Frontini, F. and Gagliardi, G. and Khan, F. and Monachini, M. and Panunzi, A.}, TITLE = {The IMAGACT Visual Ontology. an Extendable Multilingual Infrastructure for the Representation of Lexical Encoding of Action}, YEAR = {2014}, ABSTRACT = {Action verbs have many meanings, covering actions in different ontological types. Moreover, each language categorizes action in its own way. One verb can refer to many different actions and one action can be identified by more than one verb. The range of variations within and across languages is largely unknown, causing trouble for natural language processing tasks. IMAGACT is a corpus-based ontology of action concepts, derived from English and Italian spontaneous speech corpora, which makes use of the universal language of images to identify the different action types extended by verbs referring to action in English, Italian, Chinese and Spanish. This paper presents the infrastructure and the various linguistic information the user can derive from it. IMAGACT makes explicit the variation of meaning of action verbs within one language and allows comparisons of verb variations within and across languages. Because the action concepts are represented with videos, extension into new languages beyond those presently implemented in IMAGACT is done using competence-based judgments by mother-tongue informants without intense lexicographic work involving underdetermined semantic description}, KEYWORDS = {Lexicon, Lexical Database, Ontologies}, PAGES = {3425-3432}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{MONTEMAGNI_2014_INPROCEEDINGS_M_330111, AUTHOR = {Montemagni, S.}, TITLE = {DH@ILC: linee di attività e ricerca}, YEAR = {2014}, ABSTRACT = {Le principali linee di ricerca e sviluppo dell'ILC nel settore delle DH possono essere ricondotte ai seguenti filoni: acquisizione e conservazione di testi; progettazione e sviluppo di risorse e strumenti per il trattamento automatico di lingue classiche e varietà storiche della lingua; progettazione e sviluppo di strumenti per l'analisi del testo; costruzione di un'infrastruttura italiana per la ricerca nell'ambito delle scienze umane e sociali.}, KEYWORDS = {Digital Humanities, Trattamento Automatico del Linguaggio, Risorse Linguistiche}, PAGES = {101-111}, URL = {https://publications.cnr.it/doc/330111}, PUBLISHER = {CLEUP (Padova, ITA)}, ISBN = {9788867872602}, CONFERENCE_NAME = {2nd AIUCD Annual Conference}, CONFERENCE_PLACE = {Padova, Italy}, CONFERENCE_DATE = {11-12 December 2013}, BOOKTITLE = {Collaborative Research Practices and Shared Infrastructures for Humanities Computing}, EDITOR = {Agosti, M. and Tomasi, F.}, } @INPROCEEDINGS{MORGAVI_2014_INPROCEEDINGS_MNMCFCM_312489, AUTHOR = {Morgavi, G. and Nerino, R. and Marconi, L. and Cutugno, P. and Ferraris, C. and Cinini, A. and Morando, M.}, TITLE = {NINFA iNtelligent Integrated Network For Aged people}, YEAR = {2014}, ABSTRACT = {In this paper we present the NINFA project outline and its preliminary developments. The project is based on a service platform suited for elder people called the Virtual Village Network, whose user interface allows to deliver at home different services. i.e.: user supervision, communication and interaction among users for social inclusion, exergame delivering, monitoring of the wellness status. The preliminary work done on ICT technologies acceptability issues and on the implementation of the User Interface (UI) and of the Human Computer Interface (HCI) is presented. The HCI we developed is particularly suited for elderly people and motor impaired patients because the interaction is managed only by finger/hand gestures and vocal control. Furthermore, the relationship between a sets of motor, linguistic and cognitive parameters evaluated during exergame execution and the wellness status of the user is investigated.}, KEYWORDS = {service platform, elder people, at home services, exergame delivering, tele-monitoring, wellness status, human computer interface}, URL = {https://publications.cnr.it/doc/312489}, VOLUME = {11}, DOI = {10.1007/978-3-319-18374-9_25}, PUBLISHER = {Springer (London, GBR)}, ISBN = {978-3-319-18374-9}, CONFERENCE_NAME = {5° Forum Italiano per l'Ambient Assisted Living-ForitAAL}, CONFERENCE_PLACE = {Catania, Italy}, CONFERENCE_DATE = {2-5 Settembre 2014}, BOOKTITLE = {Ambient assisted living, Italian Forum 2014}, EDITOR = {Andò, P. B. and Siciliano, P. P.}, } @INPROCEEDINGS{PALLOTTI_2014_INPROCEEDINGS_PFAMF_287029, AUTHOR = {Pallotti, G. and Frontini, F. and Affè, F. and Monachini, M. and Ferrari, S.}, TITLE = {Presenting a System of Human-Machine Interaction for Performing Map Tasks}, YEAR = {2014}, ABSTRACT = {A system for human machine interaction is presented, that offers second language learners of Italian the possibility of assessing their competence by performing a map task, namely by guiding the a virtual follower through a map with written instructions in natural language. The underlying natural language processing algorithm is described, and the map authoring infrastructure is presented.}, KEYWORDS = {Language learning, human machine interaction, map tasks}, PAGES = {3963-3966}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {2}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{PANUNZI_2014_INPROCEEDINGS_PDGJMMQR_285381, AUTHOR = {Panunzi, A. and De Felice, I. and Gregori, L. and Jacoviello, S. and Monachini, M. and Moneglia, M. and Quochi, V. and Russo, I.}, TITLE = {Translating action verbs using a dictionary of images: the IMAGACT ontology}, YEAR = {2014}, ABSTRACT = {Action verbs have many meanings, covering actions in different ontological types. Moreover, each language categorizes action in its own way. One verb can refer to many different actions and one action can be identified by more than one verb. The range of variations within and across languages is largely unknown, causing trouble in all translation tasks. IMAGACT is a corpus-based ontology of action concepts, derived from English and Italian spontaneous speech corpora, which makes use of the universal language of images to identify the different action types extended by verbs referring to action in English, Italian, Chinese and Spanish. This paper presents the IMAGACT search interface and the various kinds of linguistic information the user can derive from it. IMAGACT makes explicit the variation of meaning of action verbs within one language and allows comparisons of verb variations within and across languages. Because the action concepts are represented with videos, extension into new languages beyond those presently implemented in IMAGACT is done using competence-based judgments by mother-tongue informants, without intense lexicographic work involving underdetermined semantic descriptions.}, KEYWORDS = {Action verbs, Image ontology, Multilingual dictionary, Computer-aided translation}, PAGES = {1163-1170}, URL = {http://euralex2014.eurac.edu/en/callforpapers/Documents/EURALEX%202014_gesamt.pdf}, DOI = {10.13140/2.1.3719.2320}, PUBLISHER = {EURAC (Bolzano, ITA)}, ISBN = {978-88-88906-97-3}, CONFERENCE_NAME = {XVI EURALEX International Congress: The User in Focus}, CONFERENCE_PLACE = {Bolzano}, CONFERENCE_DATE = {15-19/07/2014}, BOOKTITLE = {Proceedings of the XVI EURALEX International Congress: The User in Focus}, EDITOR = {Abel, A. and Vettori, C. and Ralli, N.}, } @INPROCEEDINGS{PEDRETTI_2014_INPROCEEDINGS_PDGMPALM_340200, AUTHOR = {Pedretti, I. and Del Grosso, A. and Giovannetti, E. and Mancini, L. and Piccini, S. and Abrate, M. and Lo Duca, A. and Marchetti, A.}, TITLE = {The Clavius on the Web Project: Digitization, Annotation and Visualization of Early Modern Manuscripts}, YEAR = {2014}, ABSTRACT = {This paper describes the full procedure adopted in the context of the Clavius on the Web project, which aims to help Web users to appraise the importance of specific manuscripts by going beyond their digital reproduction. The proposed approach is based on the multilayered explication of linguistic, lexical and semantic data representing the innermost nature of the analyzed manuscripts. The final purpose of the project is to gather and display the results of the three layers of analysis through interactive visualization techniques and export them as Linked Data. All the analyses rely on the XML/TEI encoding of the text, followed by a CTS-based tokenization. As a working example for this paper, the analysis of a portion of a manuscript provided by Historical Archives of the Pontifical Gregorian University will be illustrated. The text is a letter written in Latin and sent by Botvitus Nericius to Christophorus Clavius in 1598 from Madrid.}, KEYWORDS = {Clavius, Data Visualization, Early Modern Manuscripts, Lexica and Ontologies, Linked Open Data, NLP for Latin, Promotion of Cultural Heritage}, PAGES = {7}, URL = {http://dl.acm.org/citation.cfm?id=2802636}, DOI = {10.1145/2802612.2802636}, PUBLISHER = {ACM, Association for computing machinery (New York, USA)}, ISBN = {978-1-4503-3295-8}, CONFERENCE_NAME = {Third AIUCD Annual Conference on Humanities and Their Methods in the Digital Ecosystem (AIUCD '14)}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {18-19/09/2014}, BOOKTITLE = {Proceedings of the Third AIUCD Annual Conference on Humanities and Their Methods in the Digital Ecosystem (AIUCD '14)}, EDITOR = {Tomasi, F. and Del Turco, R. R. and Tammaro, A. M.}, } @INPROCEEDINGS{PICCINI_2014_INPROCEEDINGS_PRG_282583, AUTHOR = {Piccini, S. and Ruimy, N. and Giovannetti, E.}, TITLE = {Il primo lessico elettronico della terminologia saussuriana}, YEAR = {2014}, ABSTRACT = {Il lavoro che intendiamo presentare si iscrive all'interno di un Progetto di Ricerca Nazionale finanziato dal governo italiano, intitolato "Per una edizione digitale dei manoscritti di Ferdinand de Saussure", e volto a creare un prototipo di edizione digitale degli scritti autografi del grande linguista ginevrino. A tal fine, ogni manoscritto è stato digitalizzato, classificato e dotato di un link ipertestuale che rimanda alla sua trascrizione. Un sistema di gestione dei testi permette di consultare e annotare il singolo manoscritto e di effettuare studi filologici e critici sul corpus digitalizzato. Le concordanze per forma e per lemma prodotte per l'insieme dei testi forniscono un insieme di termini caratteristici dei quali viene descritta la semantica. Accanto alla realizzazione di tale piattaforma filologica digitale, uno degli aspetti innovativi del progetto consiste nella creazione del primo thesaurus-lessico elettronico della terminologia linguistica saussuriana. Quest'ultima riceve per la prima volta una rappresentazione strutturata, con una definizione del contenuto semantico di ciascuno dei termini chiave del pensiero del maestro ginevrino ed un quadro esplicito della natura e dell'importanza dei legami che li uniscono. Tale fascio di informazioni dovrebbe contribuire in modo significativo a meglio conoscere ed interpretare il pensiero del padre della linguistica moderna. L'architettura del lessico è ispirata al modello lessicale SIMPLE. Tra i modelli lessicali di maggior rilievo (WordNet, EuroWordNet, ItalWordNet, FrameNet, Pattern Dictionary, SIMPLE e Brandeis Semantic Ontology) esso è apparso essere il più adeguato, in quanto si distingue per alcuni importanti ed innovati aspetti. Il modello SIMPLE, infatti, ha permesso la realizzazione di lessici elettronici multilivello armonizzati per dodici lingue europee, imponendosi così come standard de facto nell'ambito della Lessicografia Computazionale. Successivamente ha fortemente ispirato lo standard ISO per i lessici del TAL Lexical Markup Framework. La strutturazione del lessico ha necessitato anzitutto della creazione di una ontologia lessicale di dominio. A tal fine è stato adottato un approccio centrifugo: in un primo momento sono stati identificati i concetti centrali del dominio di interesse, i quali sono stati poi generalizzati o specificati. L'ontologia così modellizzata è attualmente costituita da 43 tipi semantici ed ha una profondità di 4 livelli. Alcune classi semantiche sono state poi ulteriormente "specificate" in termini di tratti e/o relazioni semantiche obbligatorie (definitorie). Un insieme di cinquantotto relazioni semantiche permette di collegare le istanze delle differenti classi ontologiche, mentre trentadue tratti semantici codificano informazioni tipiche di una classe semantica nella sua interezza o di una specifica istanza. La semantica lessicale di ciascuna delle istanze di una classe ontologica è rappresentata in una entrata lessicale, nella quale una ed una sola accezione di un termine, semplice o complesso, viene riccamente definita attraverso un vasto insieme di informazioni formalizzate ed altamente strutturate, che coprono un ampio ventaglio di aspetti semantici. Il senso analizzato è anzitutto associato alla definizione data da Saussure stesso, e laddove non disponibile, a quelle di R. Godel e R. Engler. Per ogni lemma vengono specificati anche il periodo di attestazione, le fonti nelle quali occorre, la sua frequenza di occorrenza e le collocazioni nelle quali appare. La classificazione ontologica del termine rappresenta un dato essenziale al quale si aggiungono l'informazione concernente il dominio d'uso, il tipo di evento denotato (qualora si tratti di un evento), dei tratti semantici distintivi e una vasta rete di relazioni semantico-lessicali. Il modello lessicale adottato, infatti, conferisce una attenzione particolare ai legami che esistono tra i differenti termini. La multidimensionalità intrinseca al senso di ogni lemma è colta e formalizzata attraverso un insieme di relazioni semantiche specifiche del modello SIMPLE e ispirate alla Struttura Qualia della teoria del Lessico Generativo. Esse offrono un quadro preciso della natura dei legami (intra ed extra categoriali) che sussistono tra le unità lessicali contenute nella base di dati sia sull'asse paradigmatico (iperonimia, iponimia, meronimia ed olonimia) sia sull'asse sintagmatico (fornendo in particolare informazioni sull'origine e la funzione dell'entità denotata). Inoltre, accanto alle classiche relazioni di sinonimia, antonimia e di derivazione morfologica, sono state create delle relazioni specifiche, al fine di formalizzare nel modo più preciso possibile i legami particolari esistenti tra i termini del dominio della conoscenza e più generalmente la sua organizzazione concettuale. Dei termini predicativi contenuti nel lessico viene descritta anche la struttura argomentale con indicazioni sul ruolo semantico e sulle restrizioni semantiche degli argomenti introdotti. Allo stato attuale, la popolazione del lessico è costituita da 500 entrate lessicali (379 nomi, 113 aggettivi e 8 verbi): si tratta principalmente dei termini proposti da Godel e Engler e di alcune parole-chiave estratte dagli Écrits de linguistique générale. In una fase successiva verrà integrata la nuova terminologia dei manoscritti attualmente studiati. In una prima fase i dati lessicali sono stai gestiti in una piattaforma MS ACCESS. Più recentemente, per esigenze di standardizzazione ed interoperabilità, è stata effettuata una migrazione sulla piattaforma Protégé-OWL. Owl è, infatti, il linguaggio standard del W3C per la rappresentazione e la condivisone di ontologie sul Web. Il sistema di gestione Access, tuttavia, non è stato abbandonato: il lessico, attualmente, è ospitato in entrambe le piattaforme grazie ad un software che consente la perfetta sincronizzazione dei dati. Questa provvisoria "doppia ubicazione" permette, intanto, di sfruttare in modo ottimale le potenzialità dei due sistemi di gestione. Una tale strutturazione informatizzata dei dati lessicali offre numerosi vantaggi. In fase di creazione del lessico, essa permette uno sviluppo collaborativo rigoroso, anche a distanza, e lo stoccaggio di una grande quantità di dati; favorisce una rappresentazione sistematica dei fenomeni linguistici ed assicura la coerenza e la completezza dell'informazione codificata. Inoltre, consente di operare costanti controlli di coerenza formale dell'informazione e di effettuare delle modifiche o delle correzioni sull'insieme dei dati. La strutturazione multidimensionale dei concetti del dominio di conoscenza e la rappresentazione semantica altamente strutturata, ricca, diversificata ed espressiva ne fanno uno strumento di ricerca lessicale particolarmente performante. Varie tipologie di ricerca possono essere effettuate molto semplicemente e rapidamente su qualsiasi dato memorizzato, sia esso una relazione, un tratto o una unità semantica e questi dati possono essere interrogati sia singolarmente sia in combinazione. Si ha quindi la possibilità di estrarre gruppi di unità lessicali che condividono una data proprietà o una combinazione di proprietà, secondo dei criteri stabiliti dallo studioso in relazione alle esigenze della sua ricerca. A titolo di esempio, verranno presentate alcune delle numerose ricerche che possono essere effettuate sui dati lessicali all'interno della base di dati. A nostro parere, il thesaurus-lessico semantico della terminologia saussuriana costituisce, per i motivi esposti, uno strumento di grande utilità per gli esperti del settore. Inoltre, l'organizzazione strutturata della conoscenza lessicale, favorendo la ricerca semantica, dovrebbe contribuire in modo significativo ad una più approfondita conoscenza del vocabolario del maestro e pertanto degli aspetti più complessi ed originali del suo pensiero.}, KEYWORDS = {Saussure, SIMPLE, lessici computazionali}, URL = {https://publications.cnr.it/doc/282583}, CONFERENCE_NAME = {XXXVII Convegno della Società Italiana di Glottologia, "Il lessico nella teoria e nella storia linguistica"}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {25-27/10/2012}, } @INPROCEEDINGS{PIPERIDIS_2014_INPROCEEDINGS_PPSRCHCDMG_288462, AUTHOR = {Piperidis, S. and Papageorgiou, H. and Spurk, C. and Rehm, G. and Choukri, K. and Hamon, O. and Calzolari, N. and Del Gratta, R. and Magnini, B. and Girardi, C.}, TITLE = {META-SHARE: One year after}, YEAR = {2014}, ABSTRACT = {This paper presents META-SHARE (www.meta-share.eu), an open language resource infrastructure, and its usage since its Europe-wide deployment in early 2013. META-SHARE is a network of repositories that store language resources (data, tools and processing services) documented with high-quality metadata, aggregated in central inventories allowing for uniform search and access. META-SHARE was developed by META-NET (www.meta-net.eu) and aims to serve as an important component of a language technology marketplace for researchers, developers, professionals and industrial players, catering for the full development cycle of language technology, from research through to innovative products and services. The observed usage in its initial steps, the steadily increasing number of network nodes, resources, users, queries, views and downloads are all encouraging and considered as supportive of the choices made so far. In tandem, take-up activities like direct linking and processing of datasets by language processing services as well as metadata transformation to RDF are expected to open new avenues for data and resources linking and boost the organic growth of the infrastructure while facilitating language technology deployment by much wider research communities and industrial sectors.}, KEYWORDS = {Infrastructures, language resources identification, language resources documentation, metadata, language resources sharing, language resources licensing}, PAGES = {1532-1538}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {LREC'14}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {May, 26-31}, BOOKTITLE = {Proceedings of the Ninth International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{PIRRELLI_2014_INPROCEEDINGS_PMF_290601, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, TITLE = {Two-dimensional Wordlikeness Effects in Lexical Organisation}, YEAR = {2014}, ABSTRACT = {The main focus of research on wordlikeness has been on how serial processing strategies affect perception of similarity and, ultimately, the global network of associative relations among words in the mental lexicon. Comparatively little effort has been put so far, however, into an analysis of the reverse relationship: namely, how global organisation effects influence the speakers' perception of word similarity and of words' internal structure. In this paper, we explore the relationship between the two dimensions of wordlikeness (the "syntagmatic" and the "paradigmatic" one), to suggest that the same set of principles of memory organisation can account for both dimensions.}, KEYWORDS = {wordlikeness, lexical access, word processing, frequency, memory}, PAGES = {301-305}, URL = {http://clic.humnet.unipi.it/it/atti.html}, VOLUME = {1}, DOI = {10.12871/CLICIT2014158}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics CLiC-it 2014 \& Fourth International Workshop EVALITA 2014}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {9-11/12/2014}, BOOKTITLE = {The First Italian Conference on Computational Linguistics-Proceedings}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{RHEM_2014_INPROCEEDINGS_RUCM_287035, AUTHOR = {Rhem, G. and Uzkoreit, H. and Calzolari, N. and Monachini, M.}, TITLE = {The Strategic Impact of META-NET on the Regional, National and International Level}, YEAR = {2014}, ABSTRACT = {This article provides an overview of the dissemination work carried out in META-NET from 2010 until early 2014; we describe its impact on the regional, national and international level, mainly with regard to politics and the situation of funding for LT topics. This paper documents the initiative's work throughout Europe in order to boost progress and innovation in our field.}, KEYWORDS = {LR National/International Projects, Infrastructural/Policy Issues, Multilinguality, Machine Translation}, PAGES = {1517-1524}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{SIMI_2014_INPROCEEDINGS_SBM_329779, AUTHOR = {Simi, M. and Bosco, C. and Montemagni, S.}, TITLE = {Less is More? Towards a Reduced Inventory of Categories for Training a Parser for the Italian Stanford Dependencies}, YEAR = {2014}, ABSTRACT = {Stanford Dependencies (SD) represent nowadays a de facto standard as far as dependency annotation is concerned. The goal of this paper is to explore pros and cons of different strategies for generating SD annotated Italian texts to enrich the existing Italian Stanford Dependency Treebank (ISDT). This is done by comparing the performance of a statistical parser (DeSR) trained on a simpler resource (the augmented version of the Merged Italian Dependency Treebank or MIDT+) and whose output was automatically converted to SD, with the results of the parser directly trained on ISDT. Experiments carried out to test reliability and effectiveness of the two strategies show that the performance of a parser trained on the reduced dependencies repertoire, whose output can be easily converted to SD, is slightly higher than the performance of a parser directly trained on ISDT. A non-negligible advantage of the first strategy for generating SD annotated texts is that semi-automatic extensions of the training resource are more easily and consistently carried out with respect to a reduced dependency tagset. Preliminary experiments carried out for generating the collapsed and propagated SD representation are also reported.}, KEYWORDS = {Italian Treebank, Harmonization and Merging of Resources, Stanford Dependencie s}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/818_Paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {Ninth International Conference on Language Resources and Evaluation (LREC'14)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 May 2014}, BOOKTITLE = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{ABRATE_2014_INPROCEEDINGS_ADGLMMPP_282564, AUTHOR = {Abrate, M. and Del Grosso, A. M. and Giovannetti, E. and Lo Duca, A. and Marchetti, A. and Mancini, L. and Pedretti, I. and Piccini, S.}, TITLE = {Il Progetto Clavius on the Web: tecnologie linguistico-semantiche al servizio del patrimonio documentale e degli archivi storici}, YEAR = {2014}, ABSTRACT = {L'obiettivo del progetto Clavius on the Web è quello di valorizzare una parte dei manoscritti conservati dall'Archivio storico della Pontificia Università Gregoriana (APUG). Il progetto prende in esame alcuni manoscritti relativi a Christophorus Clavius (1538-­1612), matematico e astronomo gesuita. I manoscritti sono digitalizzati, trascritti, tradotti e analizzati dal punto di vista linguistico, lessicale e semantico. La terminologia e le entità di dominio individuate nel testo sono strutturate in un lessico e una ontologia, e collegate a risorse già disponibili sulla Rete secondo i principi dei Linked Data. La loro visualizzazione sul Web è implementata mediante tecniche di Data Visualization: la prima si basa sulla corrispondenza tra testo, traduzione e immagine mentre la seconda ha lo scopo di mostrare i contenuti delle analisi, con particolare enfasi alla navigazione delle risorse lessicali e ontologiche prodotte durante le varie fasi.}, URL = {https://publications.cnr.it/doc/282564}, CONFERENCE_NAME = {AIUCD 2014-Terzo convegno annuale: La metodologia della ricerca umanistica nell'ecosistema digitale}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {18-19 Settembre 2014}, BOOKTITLE = {AIUCD2014-La metodologia della ricerca umanistica nell'ecosistema digitale-Abstracts 3rd annual conference, 18-19 settembre 2014}, EDITOR = {Rossi, F. and Tomasi, F.}, } @INPROCEEDINGS{BELEFFI_2014_INPROCEEDINGS_BS_319421, AUTHOR = {Beleffi, E. and Sassi, M.}, TITLE = {La sicurezza del paziente sui quotidiani in Italia: indagine preliminare sui termini e l'andamento degli eventi}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/319421}, CONFERENCE_NAME = {FORUM RISK MANAGEMENT IN SANITÀ 2014}, CONFERENCE_PLACE = {Arezzo}, CONFERENCE_DATE = {25-28 novembre 2014}, } @INPROCEEDINGS{BIAGIONI_2014_INPROCEEDINGS_BDGP_280391, AUTHOR = {Biagioni, S. and Deluca, R. and Giannini, S. and Pardelli, G.}, TITLE = {I sistemi informativi della Biblioteca dell'Area della Ricerca di Pisa}, YEAR = {2014}, ABSTRACT = {Description of the CNR Library, (Pisa, Italy) and its services.}, KEYWORDS = {Sistemi informativi per biblioteche, Servizi bibliotecari}, URL = {https://publications.cnr.it/doc/280391}, CONFERENCE_NAME = {Seminario rivolto agli alunni dell'Istituto Tecnico Economico "F. Carrara" di Lucca, organizzato dall'Istituto di Linguistica Computazionale "A. Zampolli" del CNR di Pisa}, CONFERENCE_PLACE = {Pisa, Area della Ricerca CNR}, CONFERENCE_DATE = {31 marzo 2014}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_B_288065, AUTHOR = {Boschetti, F.}, TITLE = {OCR: instruments linguistiques pour améliorer la précision de la reconnaissance optique des caractères dans le cas du grec ancien et de l'arabe}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/288065}, CONFERENCE_NAME = {Workshop International en Traitement Automatique de la Langue Arabe}, CONFERENCE_PLACE = {Fes, Marocco}, CONFERENCE_DATE = {8 maggio 2014}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_B_288071, AUTHOR = {Boschetti, F.}, TITLE = {L'edizione scientifica digitale del testo letterario e del testo epigrafico: convergenze e divergenze-Parte I}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/288071}, CONFERENCE_NAME = {Risorse digitali e strumenti collaborativi per le Scienze dell'Antichità}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {2-3 ottobre 2014}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_B_288073, AUTHOR = {Boschetti, F.}, TITLE = {Extracting Information Related To Writings From Traditional Paper Corpora}, YEAR = {2014}, URL = {http://bit.ly/11ps2tD}, CONFERENCE_NAME = {EAGLE 2014 International Conference}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {29-30 settembre 2014}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BCD_288061, AUTHOR = {Boschetti, F. and Caruso, L. and Del Grosso, A. M.}, TITLE = {Euporia: Un'esperienza di Filologia Collaborativa a scuola, per promuovere il circolo virtuoso fra ricerca e didattica}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/288061}, CONFERENCE_NAME = {Internet Festival}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {11 ottobre 2014}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BDMNP_288372, AUTHOR = {Boschetti, F. and Del Gratta, R. and Marzi, C. and Nahli, O. and Pirrelli, V.}, TITLE = {Modelli, metodi e strumenti per il trattamento automatico della lingua araba e per l'editing in ambienti collaborativi}, YEAR = {2014}, ABSTRACT = {La linguistica computazionale ha portato negli ultimi vent'anni a un profondo mutamento nello studio delle lingue e delle loro testimonianze scritte, spostando l'accento della ricerca da aspetti linguistico-formali all'uso linguistico in contesti comunicativi reali. Il presente contributo illustra l'impatto di questo cambio di prospettiva sullo studio della lingua araba, attraverso una rassegna di alcune attività di ricerca in corso presso l'Istituto di Linguistica Computazionale del CNR di Pisa: I. acquisizione dei testi arabi tramite Optical Character Recognition (OCR) e sviluppo di strumenti per la correzione manuale del testo in ambienti collaborativi; II. sviluppo di algoritmi e strumenti per l'analisi morfologica della lingua araba; III. analisi delle dinamiche di acquisizione del lessico arabo mediante architetture bio-computazionali; IV. sviluppo della WordNet dell'Arabo collegata a Princeton WordNet, ItalWordNet, LatinWordNet e alla nascente AncientGreek WordNet. Queste attivit( sono rivolte sia all'analisi delle caratteristiche linguistiche dell'arabo che allo studio della produzione letteraria araba e dei suoi rapporti storico-culturali con altre lingue. In particolare, il contributo intende illustrare la fertilità di un approccio metodologico che metta in relazione le dinamiche di acquisizione del lessico arabo, con la messa a punto di procedure di analisi ed edizione critica del testo e con i principi di organizzazione ontologica di una lingua ad alta produttività derivazionale.}, URL = {http://aiucd2014.unibo.it/book-of-abstracts.pdf}, CONFERENCE_NAME = {AIUCD 3rd annual conference}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {18-19 settembre 2014}, BOOKTITLE = {La metodologia della ricerca umanistica nell'ecosistema digitale-AIUCD 2014 Terzo convegno annuale}, EDITOR = {Rossi, F. and Tomasi, F.}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BD_288063, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {Progetto Euporia: Gli studenti incontrano la Filologia Collaborativa}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/288063}, CONFERENCE_NAME = {Annotazione Linguistica e Retorica di Testi Greci con Traduzione a Fronte}, CONFERENCE_PLACE = {San Marco dei Cavoti (BN)}, CONFERENCE_DATE = {23 Maggio 2014}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BDKLN_288058, AUTHOR = {Boschetti, F. and Del Grosso, A. M. and Khan, A. F. and Lamé, M. and Nahli, O.}, TITLE = {A top-down approach to the design of components for the philological domain}, YEAR = {2014}, ABSTRACT = {This paper focuses on the methodology applied to the development of components in the domain of collaborative philology in the Memorata Poetis Project. This initiative, led by the University of Venice, coordinates eight units sharing the same cyber-infrastructure and is co-funded by the Italian Ministry of Instruction, University and Research (PRIN 2010/11). The project aims to study the multilingual intertextuality between epigraphic texts and literary epigrams, the transmission of themes, motives, etc. between different communicative situations (epigraphic versus literary) and different civilisations (Greek, Latin and Italian). As a control group, we analyse a corpus of epigraphic and literary texts in Arabic which do not belong to the same tradition as the others. The study of intertextuality affects both the reconstruction of the text (constitutio textus), by providing variants from the indirect tradition, and its interpretation (interpretatio), by widening the contexts in which the text has been reused.}, URL = {https://publications.cnr.it/doc/288058}, CONFERENCE_NAME = {DH2014}, CONFERENCE_PLACE = {Lausanne}, CONFERENCE_DATE = {8-12 july 2014}, BOOKTITLE = {Digital Humanities 2014-Book of Abstracts}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BF_288068, AUTHOR = {Boschetti, F. and Fortunato, L.}, TITLE = {Correcting Multiple Editions of Aeschylus}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/288068}, CONFERENCE_NAME = {Open Philology Meeting}, CONFERENCE_PLACE = {Leipzig}, CONFERENCE_DATE = {15 luglio 2014}, } @INPROCEEDINGS{BOZZI_2014_INPROCEEDINGS_BM_316572, AUTHOR = {Bozzi, A. and Marchi, S.}, TITLE = {"Greek into Arabic Web App" as a Research Infrastructure for the History of Philosophy and Science}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/316572}, CONFERENCE_NAME = {Plotinus, East and West-The Enneads in Arabic and Latin}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {November 3-6, 2014}, } @INPROCEEDINGS{CHIARELLA_2014_INPROCEEDINGS_CCMM_304969, AUTHOR = {Chiarella, D. and Cutugno, P. and Marconi, L. and Morgavi, G.}, TITLE = {Trame narrative nel social publishing}, YEAR = {2014}, PAGES = {13-13}, URL = {https://publications.cnr.it/doc/304969}, CONFERENCE_NAME = {IV Congresso Nazionale CKBG "Tecnologia e Leggerezza" ovvero come rimuovere 'peso' dagli individui, organizzazioni, comunità, città}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {29-31 gennaio 2014}, } @INPROCEEDINGS{CUTUGNO_2014_INPROCEEDINGS_CCLMN_304809, AUTHOR = {Cutugno, P. and Chiarella, D. and Lucentini, R. and Marconi, L. and Noberini, S.}, TITLE = {EMOCHA: an everlasting temporary exhibition}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/304809}, CONFERENCE_NAME = {RENEWAL, INNOVATION AND CHANGE: HERITAGE AND EUROPEAN SOCIETY (RICHES) First International Conference}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {4-5 dicembre 2014}, } @INPROCEEDINGS{DEFELICE_2014_INPROCEEDINGS_D_300629, AUTHOR = {De Felice, I.}, TITLE = {Quanti oggetti possiamo "prendere" e come? Un solo verbo, tante azioni e tante traduzioni}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/300629}, CONFERENCE_NAME = {Seminario rivolto agli alunni dell'Istituto Tecnico Economico "F. Carrara" di Lucca, organizzato dall'Istituto di Linguistica Computazionale "A. Zampolli" del CNR di Pisa}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {31/03/2014}, } @INPROCEEDINGS{DEFELICE_2014_INPROCEEDINGS_D_300631, AUTHOR = {De Felice, I.}, TITLE = {Pragmatics and Semantics in Grasp Events}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/300631}, CONFERENCE_NAME = {AISV 2014}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {22-24/01/2014}, } @INPROCEEDINGS{DEFELICE_2014_INPROCEEDINGS_D_287461, AUTHOR = {De Felice, I.}, TITLE = {The Affordance of grasping. A Psycholinguistic Study}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/287461}, CONFERENCE_NAME = {Experimental Psycholinguistics Conference}, CONFERENCE_PLACE = {Madrid}, CONFERENCE_DATE = {3-5/10/2014}, } @INPROCEEDINGS{DELGROSSO_2014_INPROCEEDINGS_DN_288053, AUTHOR = {Del Grosso, A. M. and Nahli, O.}, TITLE = {Towards a flexible open-source software library for multi-layered scholarly textual studies-An Arabic use-case dealing with semi-automatic language processing}, YEAR = {2014}, ABSTRACT = {This paper presents both the general model and a case study of the Computational and Collaborative Philology Library (CoPhiLib), an ongoing initiative underway at the Institute for Computational Linguistics (ILC) of the National Research Council (CNR), Pisa, Italy. The library, designed and organized as a reusable, abstract and open-source software component, aims at solving the needs of multi-lingual and cross- lingual analysis by exposing common Application Programming Interfaces (APIs). The core modules, coded by the Java programming language, constitute the groundwork of a Web platform designed to deal with textual scholarly needs. The Web application, implemented according to the Java Enterprise specifications, focuses on multi-layered analysis for the study of literary documents and related multimedia sources. This ambitious challenge seeks to obtain the management of textual resources, on the one hand by abstracting from current language, on the other hand by decoupling from the specific requirements of single projects. This goal is achieved thanks to methodologies declared by the "agile process", and by putting into effect suitable use case modeling, design patterns, and component- based architectures. The reusability and flexibility of the system have been tested on an Arabic case study: the system allows users to choose the morphological engine (such as AraMorph or Al- Khalil), along with linguistic granularity (i.e. with or without declension). Finally, the application enables the construction of annotated resources for further statistical engines (training set).}, KEYWORDS = {Computational and collaborative philology, API}, URL = {https://publications.cnr.it/doc/288053}, CONFERENCE_NAME = {ANLP IEEE CIST14}, CONFERENCE_PLACE = {Tetuan, Morocco}, CONFERENCE_DATE = {20-22/10/ 2014}, } @INPROCEEDINGS{FRONTINI_2014_INPROCEEDINGS_F_315438, AUTHOR = {Frontini, F.}, TITLE = {La mappa delle opinioni e dei sentimenti estratte dai social media}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/315438}, CONFERENCE_NAME = {Seminario rivolto agli alunni dell'Istituto Tecnico Economico "F. Carrara" di Lucca, organizzato dall'Istituto di Linguistica Computazionale "A. Zampolli" del CNR di Pisa}, CONFERENCE_PLACE = {Pisa, Area della Ricerca del CNR}, CONFERENCE_DATE = {31 marzo 2014}, } @INPROCEEDINGS{GIANNINI_2014_INPROCEEDINGS_GBGP_291438, AUTHOR = {Giannini, S. and Biagioni, S. and Goggi, S. and Pardelli, G.}, TITLE = {Mapping Italian Grey Communities: What is There Beyond the Academy?}, YEAR = {2014}, ABSTRACT = {The following title was published on an influential Italian newspaper, La Stampa, on November 7, 2013. "Tra i tesori della 'Letteratura Grigia' un'Eneide in napoletano del '600". The article is about the presentation of the "Fondo De Mauro" on the Italian Network of Popular Culture: this fund originates from a private collection built up in several decades and donated to the Network by Tullio De Mauro and his wife Silvana Ferreri in 2011; it is made up of thousands of books, brochures, pamphlets relating to Italian dialects and minority languages. In the field of healthcare, while browsing on the web, we found the title of a PhD thesis of last year: "Letteratura Grigia nelle meta-analisi delle prove ripartite con scelta casuale degli interventi di sanità". In the legal environment, the title of a Seminar at the University of Siena emerges from the web: "La "centralità" della legge e la letteratura grigia. Profili di politica del diritto in Italia tra Otto e Novecento" . From these findings, the idea of a survey on the wide variety of grey material available on Italian web portals arises. A first analysis shows that this material is available in different forms and dissemination is carried out through various means such as thematic bibliographies, newspapers articles, various types of documents published in pdf format or simple descriptions on web sites. The following are a few examples excerpted from some home pages belonging to our corpus: ? Collection of grey literature. The Historical Archive of Women candidates for becoming the repository where the memories about these themes will be stored... ? Grey literature ... Master copies · Reprints · Unreleased copies · Grey Literature. Archives for the history of Education... ? Grey literature. The high quality brand of parks... ? ... International grey literature; national and international legislative data on the topic of drug addiction and related themes; documentary archive ... ? Besides literature in German, there is literature in other languages and grey literature as well - in particular catalogues of museums and exhibitions... ? It is about a few thousand of books, brochures and documents of grey literature concerning two topics, Italian dialects and minority languages... Given this scenario, the research aims at verifying whether - and eventually how much - the grey literature available on the web is actually structured, accessible or even managed by systems dealing with its organization and aiming at its retrieval and storing. The utmost goal is to build up a map of non-academic communities and their mechanisms for managing, presenting and disseminating this type of material: a sort of journey among the streams of the web which channel meeting minutes, invites, manifests, fliers, pictures, newspapers articles, journalistic services and audio/video material on various topics. These "grey" products - by conveying basic information about social and popular culture - store, represent and spread knowledge. Significant examples could be identified in the web sites presenting the following matters: history of women's culture and of their movement in some specific Italian regions; projects dealing with the sustainability of urban environment with respect to childhood and adolescence; parks and other natural protected environments; archaeological documentation such as draft reports, diaries from the site, letters and miscellaneous documentation; nursing and health-related disciplines which produce guidelines, diagnostic and therapeutic courses, informative material for patients and their families. Also the theatrical culture is nourished by "grey products" as video archives, collections of music LPs and CDs, brochures, scripts, autograph manuscripts (i.e.. letters, correspondences, fliers, musical scores). In substance, a heterogeneous set of material which could reveal especially interesting to both researchers, scientists, professionals and simple fans and lovers of the various subjects if ever made available and usable. Focus Analysis of the documentation and production of taxonomies finalized at the creation of a map of non-academic communities and stakeholders involved in the management of grey material. Material and methods The survey examines the several disciplines, the typology, the institutional nature and the fields to which these grey communities belong, noticing as well the variety of documentation provided, the structure of information and the presentation and access modalities through the following steps: 1) Selection of the web portals as resulted from the query "letteratura grigia" OR "letteratura non convenzionale" OR "documentazione grigia" OR "materiale grigio". 2) Creation of a corpus made up of 28,000 occurrences. 3) Analysis of the various communities and of the grey material retrieved. 4) Statistical elaboration of the data. Conclusions: reflections on the communities and stakeholders involved in the management of grey literature and on the various ways of presenting the documentation provided by the web sites selected for this survey.}, KEYWORDS = {Italian Grey Literature}, PAGES = {21-25}, URL = {http://greyguide.isti.cnr.it/linkdoc.php?idcode=2014-G01-019\&authority=GLConference\&collection=GL16\&langver=en}, VOLUME = {16}, ISBN = {978-90-77484-24-1}, CONFERENCE_NAME = {Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change}, CONFERENCE_PLACE = {Library of Congress Washington D. C., USA}, CONFERENCE_DATE = {December 8-9, 2014}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GOGGI_2014_INPROCEEDINGS_GMFBPDBM_291816, AUTHOR = {Goggi, S. and Monachini, M. and Frontini, F. and Bartolini, R. and Pardelli, G. and De Mattei, M. and Bustaffa, F. and Manzella, G.}, TITLE = {Marine Planning and Service Platform (MAPS): An Advanced Research Engine for Grey Literature in Marine Science}, YEAR = {2014}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting Operative Oceanography in its activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. Community and Requirements. Operative Oceanography is the branch of marine research which deals with the development of integrated systems for examining and modeling the ocean monitoring and forecast. Experts need access to real-time data on the state of the sea such as forecasts on temperatures, streams, tides and the relevant scientific literature. This finds application in many areas, ranging from civilian and military safety to protection of off-shore and coastal infrastructures. The metadata. The set of metadata associated with marine data is defined in the CDI (Common Data Index) documented standard. They encode: the types of sizes which have been measured; the measurement tools the platform which has been employed; the geographic area where measures have been taken; the environmental matrix; the descriptive documentation. As concerns the scientific documentation, at the current stage of the CDI standard, a document is shaped around the following metadata: Title, Authors, Version, ISBN/DOI, Topic, Date of publication, Body/Institution, Abstract. The search engine. The query system (which is actually under development) has been designed for operating with structured data - the metadata - and raw data - the associated technical and scientific documentation. Full-text technologies are often unsuccessful when applied to this type of queries since they assume the presence of specific keywords in the text; in order to fix this problem, the MAPS project suggests to use different emantic technologies for retrieving the text and data and thus getting much more complying results. In the Poster we will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced earch engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the 2 great impact that the processing, re-use as well as application of grey data have on societal needs/problems and their answers.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {93-94}, URL = {http://greyguide.isti.cnr.it/dfdownloadnew.php?ident=GLConference/GL16/2014-G01-015\&langver=en\&scelta=Metadata}, ISBN = {978-90-77484-24-1}, CONFERENCE_NAME = {Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change}, CONFERENCE_PLACE = {Library of Congress Washington D. C., USA}, CONFERENCE_DATE = {December 8-9, 2014}, EDITOR = {Farace, C. B. D. and Frantzen, J.}, } @INPROCEEDINGS{KHAN_2014_INPROCEEDINGS_KFM_291637, AUTHOR = {Khan, F. and Frontini, F. and Monachini, M.}, TITLE = {A Model for Representing Diachronic Semantic Information in Lexico-Semantic Resources on the Semantic Web}, YEAR = {2014}, ABSTRACT = {The Semantic Web offers a way of publishing structured data online that facilitates the interlinking of different datasets stored at different online locations? indeed one of the main aims of the Semantic Web movement is to actively encourage this enrichment of online datasets with information from other resources, in order to avoid the problem of so called 'data islands'. In contrast to conventional hyperlinks however the links between different resources on the Semantic Web can be given semantic types and classified hierarchically. Data published on the Semantic Web is referred to as Linked Data? if, in addition, this data is available with an open license then it can be referred to as Linked Open Data (Heath 2011).}, KEYWORDS = {Cultural resources, Heritage resources}, PAGES = {1-3}, URL = {http://www.dh.uni-leipzig.de/wo/wp-content/uploads/2014/11/Fahad-Khan-Francesca-Frontini-and-Monica-Monachini-A-Model-for-Representing.pdf}, CONFERENCE_NAME = {Greek and Latin in an age of Open Data. Open Philology Project}, CONFERENCE_PLACE = {University of Leipzig, GERMANY}, CONFERENCE_DATE = {December 1-4, 2014}, } @INPROCEEDINGS{LAM_2014_INPROCEEDINGS_LRMVTS_295129, AUTHOR = {Lamé, M. and Rouvière, G. and Marras, A. M. and Vassali, M. and Tanca, C. and Salvatori, E.}, TITLE = {Hi-storytelling: Street Museum & Speaking Stones!}, YEAR = {2014}, ABSTRACT = {How could museums go down the streets, taking advantage of the historical primary sources, scattered everywhere, impossible to bring back inside the collections, such as inscriptions in situ or reused? We explore the inscription as a dispositive of information and communication and we apply its message to a ficitonnal story telling on contemporary social networks.}, KEYWORDS = {epigrafia, epigrafica digitale, dispostivo epigrafico, social network, musei, storytelling}, URL = {http://mwf2014.museumsandtheweb.com/proposals/hi-storytelling-street-museum-speaking-stones/}, CONFERENCE_NAME = {MWF2014: Museums and the Web Florence 2014}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {18-21 febbraio 2014}, EDITOR = {Cherry, R. and Proctor, N.}, } @INPROCEEDINGS{LUCENTINI_2014_INPROCEEDINGS_LCCMNC_304827, AUTHOR = {Lucentini, R. and Chiarella, D. and Cutugno, P. and Marconi, L. and Noberini, S. and Castelletti, S.}, TITLE = {A digital archive for cultural heritage: the case of the Luzzati Museum}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/304827}, CONFERENCE_NAME = {RENEWAL, INNOVATION AND CHANGE: HERITAGE AND EUROPEAN SOCIETY (RICHES) First International Conference}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {4-5 dicembre 2014}, } @INPROCEEDINGS{MARINELLI_2014_INPROCEEDINGS_M_281789, AUTHOR = {Marinelli, R.}, TITLE = {Database semantico-lessicali come strumento per l'apprendimento della lingua Inglese (crocieristica e turismo)}, YEAR = {2014}, ABSTRACT = {Partiamo dalla descrizione di Mariterm, database semantico lessicale di dominio marittimo. Mariterm dopo le fasi inziali della sua costruzione è stato ampliato e arricchito con immagini che permettono di visualizzare il termine preso in considerazione e successivamente con l'inserimento di espressioni idiomatiche e modi di dire che appartengono al dominio marittimo con l'inserimento di termini che riguardano più specificatamente il settore della crocieristica e dell'attività turistica ad essa legata. Vogliamo qui parlare dell'uso di db semantico lessicali per l'insegnamento della lingua Inglese, per lo scopo specifico dell'Inglese Marittimo e, in particolare della terminologia di pertinenza del settore della crocieristica e dell'attività turistica ad essa lagata.}, KEYWORDS = {database semanti lessicali, terminologia, turismo, crocieristica}, URL = {https://publications.cnr.it/doc/281789}, CONFERENCE_NAME = {Meeting annuale del Gruppo Web Semantico}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {Febbraio 2014}, } @INPROCEEDINGS{PIRRELLI_2014_INPROCEEDINGS_P_288013, AUTHOR = {Pirrelli, V.}, TITLE = {Psycho-computational modelling of lexical access and organisation: what can we learn from Arabic?}, YEAR = {2014}, ABSTRACT = {Arabic morphology raises a formidable challenge to Markovian computational models of word processing, based on fixed-order memory chaining. Computational models of human short-term and long-term memory can help us considerably to shed light on the issues involved at the level of peripheral word processing (access lexical representations). According to this view, word processing (short-term activation) and lexical representations (entrenchment of habitual activation chains) are two sides of the same coin, as they involve the same levels of brain circuitry on a different time scale. Such an "integrative" view of the lexicon as a dynamic system will be possible only we are able to foster an increasing synergy of perspectives and scientific domains of inquiry: neurosciences, (psycho)linguistics and computing. Conventions of Arabic script are no accident (maliciously intended to trip up computer algorithms)! They rather reflect some fundamental dynamics of the way human brain processes language.}, URL = {https://publications.cnr.it/doc/288013}, CONFERENCE_NAME = {IEEE CIST'14-Innovative Systems and Technologies for the Future}, CONFERENCE_PLACE = {Tetouan, Marocco}, CONFERENCE_DATE = {20-22 ottobre 2014}, } @INPROCEEDINGS{PIRRELLI_2014_INPROCEEDINGS_P_288044, AUTHOR = {Pirrelli, V.}, TITLE = {Modèles psycho-computationnels du lexique mentale}, YEAR = {2014}, ABSTRACT = {Over the last decades, a growing body of evidence on the mechanisms governing lexical storage, access, acquisition and processing has raised a considerable challenge to traditional models of language architecture and word usage. By pulling together cognitive, neurofunctional and psycho-computational implications of these mechanisms, a new view of the lexicon-grammar architecture emerges, based on the dynamic interaction between storage and processing. We call this an "integrative" view of the mental lexicon}, KEYWORDS = {Mental Lexicon, Language Acquisition, Memory}, URL = {https://publications.cnr.it/doc/288044}, CONFERENCE_NAME = {Workshop International en Traitement Automatique de la Langue Arabe}, CONFERENCE_PLACE = {Fès, Marocco}, CONFERENCE_DATE = {8 maggio 2014}, } @INPROCEEDINGS{RANDACCIO_2014_INPROCEEDINGS_RSZ_285389, AUTHOR = {Randaccio, S. and Soria, C. and Zoli, C.}, TITLE = {Standardized orthography: a shoe for barefoot}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/285389}, CONFERENCE_NAME = {Language Endangerment: Orthography Development for Language Maintenance and Revitalisation}, CONFERENCE_PLACE = {Cambridge, UK}, CONFERENCE_DATE = {04/07/2014}, } @INPROCEEDINGS{SAGRI_2014_INPROCEEDINGS_STMV_310539, AUTHOR = {Sagri, M. T. and Tiscornia, D. and Montemagni, S. and Venturi, G.}, TITLE = {Investigating the relationship between neuroscience and law: a case study on a corpus of Italian case law texts}, YEAR = {2014}, KEYWORDS = {Neuroscience linguistic and lexico-semantic analysis}, URL = {https://publications.cnr.it/doc/310539}, CONFERENCE_NAME = {Language and Law in Social Practice 3rd International Conference}, CONFERENCE_PLACE = {Florence}, CONFERENCE_DATE = {14-15-16-17 May 2014}, } @INPROCEEDINGS{SASSOLINI_2014_INPROCEEDINGS_SSCCS_319040, AUTHOR = {Sassolini, E. and Sassi, M. and Cucurullo, S. and Cinini, A. and Sbrulli, S.}, TITLE = {Industrial Philology: Problems and techniques of data and archives preservation for future generations}, YEAR = {2014}, ABSTRACT = {The main objective of digital archiving of texts is their re-use and preservation. The concept that guides these initiatives is linked to structural and organizational needs which heavily influence the definition of the format specifications that describe the organisation of the archives at various levels and consists of a more or less complex document. A format specification provides the details needed to build a file from a text, establishes the admitted encodings and software applications that can decode the file and make its content accessible. These structural specifications can have an extremely variable size and they depend on the complexity of the format. Although some format specifications are, for the most part, independent of the specific software (for example, ASCII and Unicode codes), many of them are related to the historical period in which the texts were acquired and also by dated software technologies. The file format specification should evolve hand in hand with the related software, and the fate of one is in fact often linked to that of the other. It is therefore appropriate to face the issue of obsolescence of software together with the obsolescence of file formats and of storage medium.}, KEYWORDS = {text management, text analysis}, PAGES = {168-172}, URL = {https://publications.cnr.it/doc/319040}, PUBLISHER = {TransAtlantic (Amsterdam, Paesi Bassi)}, ISSN = {1386-2316}, ISBN = {978-90-77484-22-7}, CONFERENCE_NAME = {GL15: Fifteenth International Conference on Grey Literature}, CONFERENCE_PLACE = {Bratislava}, CONFERENCE_DATE = {2, 3 december 2013}, BOOKTITLE = {The GL-conference series. Conference proceedings}, } @INPROCEEDINGS{SORIA_2014_INPROCEEDINGS_S_285384, AUTHOR = {Soria, C.}, TITLE = {Regional and Minority Languages of Italy, and the importance of terminology}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/285384}, CONFERENCE_NAME = {ELEN General Assembly}, CONFERENCE_PLACE = {Helsinki, Finlandia}, CONFERENCE_DATE = {04/10/2014}, } @INPROCEEDINGS{SORIA_2014_INPROCEEDINGS_S_285385, AUTHOR = {Soria, C.}, TITLE = {Towards a notion of "Digital Language Diversity": the role of technologies for preserving multilingualism}, YEAR = {2014}, KEYWORDS = {digital language diversity, NLP, less-resourced languages, regional languages, minority languages, digital rights}, URL = {https://publications.cnr.it/doc/285385}, CONFERENCE_NAME = {Seminario tenuto presso Department of English and Linguistics, University of Mainz}, CONFERENCE_PLACE = {Mainz}, CONFERENCE_DATE = {10/06/2014}, } @INPROCEEDINGS{SORIA_2014_INPROCEEDINGS_S_285388, AUTHOR = {Soria, C.}, TITLE = {Towards a notion of Digital Language Diversity}, YEAR = {2014}, KEYWORDS = {digital language diversity, NLP, less-resourced languages, regional languages, minority languages, digital rights}, URL = {https://publications.cnr.it/doc/285388}, CONFERENCE_NAME = {International Conference Linguistic Diversity in Cyberspace}, CONFERENCE_PLACE = {Yakutsk, Federazione Russa}, CONFERENCE_DATE = {28/06/2014-03/07/2014}, } @TECHREPORT{CHIARELLA_2014_TECHREPORT_CCM_306248, AUTHOR = {Chiarella, D. and Cutugno, P. and Marconi, L.}, TITLE = {D3. 1 Initial list of gestures and syntax}, YEAR = {2014}, PAGES = {1-17}, URL = {https://publications.cnr.it/doc/306248}, } @TECHREPORT{CIGNONI_2014_TECHREPORT_C_288034, AUTHOR = {Cignoni, L.}, TITLE = {Il mondo delle "Nursery rhymes" inglesi}, YEAR = {2014}, PAGES = {1-11}, URL = {https://publications.cnr.it/doc/288034}, } @TECHREPORT{DEMATTEI_2014_TECHREPORT_DMDMBF_335399, AUTHOR = {De Mattei, M. and Medone, D. and D'Angelo, P. and Monachini, M. and Bartolini, R. and Frontini, F.}, TITLE = {MAPS: Architettura del Sistema}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il presente documento è il deliverable "D3.1 - Architettura del Sistema" del progetto MAPS (Marine Planning and Service Platform). Il progetto MAPS è un'evoluzione del progetto precedente Marine. Tale evoluzione si articola su tre aspetti diversi: - Un meccanismo di federazione dei dati, che consenta di rendere disponibili ai propri utenti non soltanto i dati prodotti internamente da sistema Marine ma anche quelli resi disponibili da altri sistemi similari, soddisfacendo così un più ampio ambito di esigenze informative. Il deliverable D2.2, Modello della Soluzione specifica in dettaglio queste nuove funzionalità. - Un Catalogo dei Documenti che, conservando la documentazione tecnica e scientifica dei prodotti offerti, possa documentare in modo accurato le modalità di misurazione, elaborazione e controllo dei prodotti forniti e quindi i relativi ambiti di applicabilità. - Un sistema di ricerca capace di selezionare i dati necessari ad uno scopo determinato non soltanto sulla base della loro tipologia, della loro dislocazione territoriale o di altre informazioni simili contenute nei metadati associati come avviene oggi nella maggior parte dei sistemi esistenti, ma anche sulla base delle informazioni contenute nella documentazione tecnica e scientifica. Tali funzionalità sono specificate nel deliverable D1.3 - Modello della Soluzione.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {1-35}, URL = {https://publications.cnr.it/doc/335399}, } @TECHREPORT{DEMATTEI_2014_TECHREPORT_DMMFBM_335403, AUTHOR = {De Mattei, M. and Medone, D. and Maltese, M. and Frontini, F. and Bartolini, R. and Monachini, M.}, TITLE = {META: Report di progettazione degli algoritmi individuati}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il deliverable definisce l'architettura del Sistema di Estrazione Eventi Meteo realizzato dagli autori nell'ambito del progetto META. Il sistema estrae da contenuti online informazione su eventi meteo critici verificatesi in Liguria e nel nord della Toscana.}, KEYWORDS = {Ontology, Information Extraction, Taxonomy}, PAGES = {1-19}, URL = {https://publications.cnr.it/doc/335403}, } @TECHREPORT{DELGROSSO_2014_TECHREPORT_D_354195, AUTHOR = {Del Grosso, A. M.}, TITLE = {Processo di analisi e indicizzazione dei testi digitalizzati di Cristoforo Clavio}, YEAR = {2014}, ABSTRACT = {Il contributo descrive il rilascio del componente di analisi linguistica sviluppato dall'Istituto di Linguistica Computazionale in seno al progetto Clavius on the Web (CoW). Contestualmente vengono presentate le risorse utilizzate nonché i dati e i vincoli di input e quelli di output.}, KEYWORDS = {Clavius on the Web, Lemmatizzazione, Latino}, URL = {http://www.claviusontheweb.it}, } @TECHREPORT{FRONTINI_2014_TECHREPORT_FBM_335400, AUTHOR = {Frontini, F. and Bartolini, R. and Monachini, M.}, TITLE = {MAPS: Stato dell'Arte}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012 Il documento descrive lo stato dell'arte delle tecnologie linguistiche applicate ai sistemi di ricerca semantica.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {1-21}, URL = {https://publications.cnr.it/doc/335400}, } @TECHREPORT{FRONTINI_2014_TECHREPORT_FBM_335402, AUTHOR = {Frontini, F. and Bartolini, R. and Monachini, M.}, TITLE = {META:-Report sui modelli e tecniche linguistiche}, YEAR = {2014}, ABSTRACT = {PROGRAMMA OPERATIVO REGIONALE POR-FESR (2007-2013) Asse 1 Innovazione e Competitività Bando DLTM Azione 1.2.2 "Ricerca industriale e sviluppo sperimentale a favore delle imprese del Distretto Ligure per le Tecnologie Marine (DLTM) anno 2012. Il deliverable riassume lo stato dell'arte delle tecnologie semantiche che possono essere impiegate nella realizzazione del progetto META. Il progetto META è una progetto di ricerca e sviluppo tecnologico finanziato dalla Regione Liguria con i fondi POR-FESR 2007-2013 della Comunità Europea che mira alla realizzazione di un sistema per l'allerta di eventi meteo critici in Liguria e nel nord della Toscana. Nell'ambito del progetto META le tecnologie semantiche sono utilizzate per estrarre eventi meteo di interesse da articoli pubblicati in rete o sui social network.}, KEYWORDS = {Ontology, Information Extraction, Semantic Web, Search Engine}, PAGES = {1-20}, URL = {https://publications.cnr.it/doc/335402}, } @TECHREPORT{FRONTINI_2014_TECHREPORT_FBMPG_287039, AUTHOR = {Frontini, F. and Bartolini, R. and Monachini, M. and Pardelli, G. and Goggi, S.}, TITLE = {Stato dell'arte dei motori semantici. Progetto MAPS, programma operativo regionale POR-FESR (2007-2013)}, YEAR = {2014}, ABSTRACT = {Il presente documento è il deliverable "D1.1 - Stato dell'Arte dei motori semantici del progetto MAPS (Marine Planning and Service Platform). Il progetto MAPS è una evoluzione del progetto precedente Marine. Tramite il progetto Marine (Bando Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013 - pos n.1) è stata realizzata una piattaforma informatica di supporto all'Oceanografia Operativa capace di raccogliere dati marini per renderli poi disponibili ai ricercatori e alle organizzazioni interessate tramite protocolli standard. Lo scopo del progetto MAPS è quello di realizzare una Catalogo di Documenti contenente informazioni per la piattaforma Marine. Caratteristica di MAPS è di fornire accesso ai dati oceanografici sia attraverso la ricerca per metadati, sia attraverso la ricerca semantica contenuta nella manualistica tecnico scientifica di riferimento.}, PAGES = {1-22}, URL = {https://publications.cnr.it/doc/287039}, } @TECHREPORT{MARZI_2014_TECHREPORT_M_287135, AUTHOR = {Marzi, C.}, TITLE = {Lexical acquisition in bilingual contexts: aspects of (extra)linguistic and psycholinguistic modeling}, YEAR = {2014}, ABSTRACT = {The overall research goals of this bilateral action have been to focus on cognitive and extra-linguistic factors involved in bilingual word recognition, with a view to exploring implications, requirements and constraints on computational models of bilingual acquisition. In particular, the cross visits intended to investigate linguistic, extra-linguistic and psycholinguistic issues which are generally assumed to have a bearing on aspects of bilingual lexicon acquisition and word processing, with a view to better understanding their role and dynamic relationship with aspects more closely related to the language-specific input.}, KEYWORDS = {Cognitive factors, monolingual and bilingual word recognition}, URL = {http://www.esf.org/coordinating-research/research-networking-programmes/humanities-hu}, } @TECHREPORT{MARZI_2014_TECHREPORT_MPV_287137, AUTHOR = {Marzi, C. and Plag, I. and Vulchanova, M.}, TITLE = {Words: structure, meaning, acquisition, processing}, YEAR = {2014}, ABSTRACT = {By bringing together experts of various scientific domains and different theoretical inclinations, the second NetWordS Summer school contributed to advance the current awareness of theoretical, typological, psycholinguistic, computational and neurophysiological evidence on the structure and processing of words, with a view to fostering novel methods of research and assessment for grammar architecture and language physiology.}, KEYWORDS = {Mental lexicon, Lexical processing and acquisition, Interdisciplinary approach}, URL = {http://www.networds-esf.eu/uploads/NetWordS/2nd_SummerSchool_Trondheim_scientific%20r}, } @MISC{BARONI_2014_MISC_B_349785, AUTHOR = {Baroni, P.}, TITLE = {Sito Web dell'Istituto di Linguistica Computazionale «A. Zampolli»}, YEAR = {2014}, ABSTRACT = {Nuovo sito Web dell'Istituto di Linguistica Computazionale «A. Zampolli» del Consiglio Nazionale delle Ricerche (ILC-CNR), realizzato con Drupal, sviluppato in italiano e inglese}, KEYWORDS = {Sito web}, URL = {http://www.ilc.cnr.it}, } @MISC{BOSCHETTI_2014_MISC_BD_288059, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {La filologia computazionale e collaborativa al liceo}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/288059}, } @MISC{CIGNONI_2014_MISC_C_288040, AUTHOR = {Cignoni, L.}, TITLE = {English Grammar Slides}, YEAR = {2014}, URL = {http://www.paleopatologia.it}, } @MISC{CIGNONI_2014_MISC_CF_288033, AUTHOR = {Cignoni, L. and Fornaciari, G.}, TITLE = {Basics of English Grammar}, YEAR = {2014}, ABSTRACT = {This ongoing basic reference grammar was created to support the Italian students of the funerary archaeology courses held at the University of Pisa in the years 2011-2014 in the study of the English language. The courses conducted by a University Professor specialized in the discipline and by an expert English language teacher were delivered according to the CLIL (content and language integrated learning) methodology, which combines the teaching of a particular subject with the study of a vehicular language of instruction. In this online grammar resource the different parts of speech (nouns, pronouns, adjectives, etc.) and elements of the sentence or clause are explained in a simple and concise manner, so as to help the students develop their grammar skills. On the model of the same English grammar (2010), written in Italian and accompanied by short reading passages extracted from Henry Duday's The Archaeology of the Dead, this grammar written in English will be constantly enriched with contextualized examples illustrating the different grammar items. The subject teacher and the native language teacher will select the most appropriate examples extracted by the students from a number of acknowledged funerary archaeology texts used during the courses and dealing with the subjects of funerary archaeology and of other related research areas, in particular with the funerary customs and practices of specific historical periods (Anglo-Saxon, Roman, Medieval). The examples illustrating a particular grammar rule will not only show the use of the word in context, but will at the same time be rich in content, providing some useful notions about the discipline, so that the grammar can be studied not in isolation but in meaningful contexts and real-life situations. The CLIL classes fall within the framework of a wider project carried out at the Division of Palaeopathology - Department of Oncology, Transplants and Modern Technologies in Medicine of the University of Pisa, which is aimed to develop specific training for those (master and specialist level graduate students, post-doctoral scholars, etc.) specializing in funerary archaeology.}, URL = {http://www.paleopatologia.it}, } @MISC{DELGROSSO_2014_MISC_DB_390785, AUTHOR = {Del Grosso, A. M. and Boschetti, F.}, TITLE = {Progetto pilota EUPORIA. Annotazione linguistica e retorica di testi greci con traduzione a fronte dell'Elena di Euripide}, YEAR = {2014}, ABSTRACT = {Il contributo illustra i risultati del progetto Euporia.}, KEYWORDS = {digital philology, computational philology, digital humanities}, URL = {https://publications.cnr.it/doc/390785}, } @MISC{LAM_2014_MISC_L_330094, AUTHOR = {LamÉ, M.}, TITLE = {Writings In Contexts: some Echoes of Epigraphic Dispositive}, YEAR = {2014}, ABSTRACT = {How writings employed in epigraphic dispositives of all kinds (often approximatively translated with the word "device") challenge digital tools, repeating similar details that make one thinks of something else? What are the redundant patterns and characteristics, as well as the differences of those writings and does digital tools could contribute to harvest and to give sense to those writings that are often echoes of earlier epigraphic communication and are still echoing through ages among epigraphies of all time? By addressing those questions within the EAGLE international conferences, this participation also presents the international panel "Technology \& Tradition: a Synergic Approach to Deciphering, Analyzing and Annotating Epigraphic Writings", their authors and their standpoints linked to their full texts.}, KEYWORDS = {Epigraphic transmission (echoes), Epigraphic dispositive, Digital Epigraphy, Writings in contexts, Deciphering, Digital Autoptic Process, Archaic and iconographic writings, Encoding epigraphic writings, 3D reconstruction and analysis}, URL = {http://eer.hypotheses.org/technology-tradition-a-synergic-approach-to-deciphering-analyzing-and-annotating-epigraphic-writings/unit-1-computer-graphics-digital-epigraphy-and-computational-linguistics-from-cnr-pisa/marion-lame-writings-in-contexts}, } @MISC{MARZI_2014_MISC_M_289271, AUTHOR = {Marzi, C.}, TITLE = {Models and dynamics of the morphological lexicon in mono-and bilingual acquisition}, YEAR = {2014}, ABSTRACT = {Aim of this work is to define an explanatory model of the morphological lexicon as a dynamic system of word acquisition and storage in both mono- and multi-lingual contexts. The main focus is on exploring some aspects of the paradigmatic organisation of the mental lexicon in language acquisition, based on a dynamic analysis of mono- and bilingual contexts. An interdisciplinary approach to the lexical acquisition combines theoretical-motivated accounts, psycho-cognitive evidence and methodologies, and machine learning technologies. In particular, the thesis deals with those basic psychological and cognitive mechanisms considered as crucial in language acquisition: (i) the ability to perceive recurrent morphological structures (invariances) in varying temporal contexts, (ii) the capability to access/activate time series of symbols in the short term memory and to selectively integrate them with long term memory expectations, (iii) the attitude towards building novel forms through analogical extension of intra- and inter-paradigmatic relations (generalisation). This investigation is pursued through a computational model based on Self-Organising Maps with Hebbian connections defined over a temporal layer (Temporal Self-Organising Maps - TSOMs), providing a principled algorithmic account of effects of lexical acquisition, processing and access. Computational simulation of biologically inspired neural architecture of the mental lexicon offers the possibility to reproduce a wide range of conditions of mono- and multi-lingual input exposure, and to illustrate the dynamic of word acquisition and the emergence of morphological organisation. The proposed model provides an adaptive multifactorial account of morphology acquisition affected by input factors, such as word frequency distributions, paradigm regularity and wordlikeness, whereby lexical perception and organisation are grounded on memory-based processing strategies.}, KEYWORDS = {word acquisition, morphological lexicon, bio-computational model}, URL = {https://publications.cnr.it/doc/289271}, } @ARTICLE{BOSCHETTI_2013_ARTICLE_B_276310, AUTHOR = {Boschetti, F.}, TITLE = {Annotations in collaborative environments}, YEAR = {2013}, ABSTRACT = {This article discusses methodological aspects of the Greek into Arabic Web Application related to the annotation system. Collaborative environments for the philological studies manage multiple versions both of the reference edition with digital variants and of linguistic and exegetical annotations. The system must verify and maintain the consistency of interrelated information, which can change asynchronously. Strategies to align different versions of texts and annotations, in order to update the internal references and notify the users to verify the content consistency, are illustrated. Structural aspects that involve the granularity and overlapping of annotations are discussed, taking into account also that linguistic annotations automatically generated by morphological parsers can be the basis for extended comments in natural language. Finally, the article illustrates which features related to the annotation system are yet implemented in the G2A Web Application.}, KEYWORDS = {collaborative philology}, PAGES = {185-194}, URL = {http://www.greekintoarabic.eu/index.php?id=20}, VOLUME = {3}, PUBLISHER = {CNR, Istituto di Linguistica Computazionale (Pisa, Italia)}, ISSN = {2239-012X}, JOURNAL = {Studia graeco-arabica}, } @ARTICLE{BOZZI_2013_ARTICLE_B_288124, AUTHOR = {Bozzi, A.}, TITLE = {G2A: a Web application to study, annotate and scholarly edit ancient texts and their aligned translations. Part I. General model of the computational philology application}, YEAR = {2013}, ABSTRACT = {This paper presents the general model of a Web application for computational philology and describes the modules implemented by ILC-CNR in Pisa for the ERC project Ideas "Greek into Arabic. Philosophical Concepts and Linguistic Bridges" ADG 249431 (acronym: Greek into Arabic). The main principles on which the model is based are modularity, !exibility and development of the software according to open source criteria. These elements make it possible to include additional components in the modular structure, as well as components essential to the Greek into Arabic project (modularity), thus allowing the application to extend its functions to many other philological "elds, from classical and medieval philology to genetic criticism and philology of ancient printed texts (!exibility). Dissemination of this application, especially in the research and academic "elds, is guaranteed by the fact that its development is performed using internationally acknowledged systems of standard mark-up language and tools with no copyright restrictions (open source). In Part II a preliminary version of the user manual of G2A Web application is provided.}, KEYWORDS = {History of Philosophy, Computational philology}, PAGES = {159-171}, URL = {http://www.greekintoarabic.eu/index.php?id=101}, VOLUME = {3}, PUBLISHER = {CNR, Istituto di Linguistica Computazionale (Pisa, Italia)}, ISSN = {2239-012X}, JOURNAL = {Studia graeco-arabica}, } @ARTICLE{CALZOLARI_2013_ARTICLE_C_288640, AUTHOR = {Calzolari, N.}, TITLE = {Il Trattamento Automatico della Lingua per la sopravvivenza dell'Italiano nel mondo digitale}, YEAR = {2013}, ABSTRACT = {Il TAL nelle sue dimensioni rilevanti}, KEYWORDS = {Trattamento Automatico della Lingua (TAL)}, PAGES = {1-9}, URL = {http://www.camera.it/application/xmanager/projects/leg17/attachments/periodici/pdfs/000/000/007/ritagli_2013_03.pdf}, VOLUME = {2013}, PUBLISHER = {Camera dei Deputati (Roma)}, JOURNAL = {Ritagli}, } @ARTICLE{DEFELICE_2013_ARTICLE_D_285271, AUTHOR = {De Felice, I.}, TITLE = {Affordances: una chiave per il word sense disambiguation}, YEAR = {2013}, PAGES = {105-110}, URL = {https://publications.cnr.it/doc/285271}, VOLUME = {1}, ISSN = {2282-6009}, JOURNAL = {Nea Science-Giornale Italiano di neuroscienze, psicologia e riabilitazione}, } @ARTICLE{DELGROSSO_2013_ARTICLE_D_288049, AUTHOR = {Del Grosso, A. M.}, TITLE = {Indexing techniques and variant readings management}, YEAR = {2013}, ABSTRACT = {This paper illustrates indexing routines developed for the G2A Web Application, a philological system totally open source designed by the Team of the ILC-CNR of Pisa within the context of the ERC project Greek into Arabic. Philosophical Concepts and Linguistic Bridges (Ideas AdG 249431). Section 1 introduces the concept of 'index' in this peculiar field. The indexing process implemented by the ILC-CNR Team for the G2A Web Application is illustrated in Section 2. Section 3 discusses the component of textual criticism.}, KEYWORDS = {Computational and collaborative philology}, PAGES = {211-230}, URL = {http://www.greekintoarabic.eu/index.php?id=101\&reset=1}, VOLUME = {3}, PUBLISHER = {Pacini Editore (Pisa, Italia)}, ISSN = {2281-2687}, JOURNAL = {Studia graeco-arabica}, } @ARTICLE{DELLORLETTA_2013_ARTICLE_DVM_310619, AUTHOR = {Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {Linguistically-driven selection of correct arcs for dependency parsing}, YEAR = {2013}, ABSTRACT = {LISCA is an unsupervised algorithm aimed at assigning a quality score to each arc generated by a dependency parser in order to produce a decreasing ranking of arcs from correct to incorrect ones. LISCA exploits statistics about a set of linguistically-motivated and dependency-based features extracted from a large corpus of automatically parsed sentences and uses them to assign a quality score to each arc of a parsed sentence belonging to the same domain of the automatically parsed corpus. LISCA has been successfully tested on two datasets belonging to two different domains and in all experiments it turned out to outperform different baselines, thus showing to be able to reliably detect correct arcs also representing domain-specific peculiarities.}, KEYWORDS = {Correct arcs, Dependency parsing}, PAGES = {125-136}, URL = {http://cys.cic.ipn.mx/ojs/index.php/CyS/article/view/1517}, VOLUME = {17}, ISSN = {1405-5546}, JOURNAL = {Computación y Sistemas}, } @ARTICLE{FANTONI_2013_ARTICLE_FADM_285671, AUTHOR = {Fantoni, G. and Apreda, R. and Dell'Orletta, F. and Monge, M.}, TITLE = {Automatic extraction of function-behaviour-state information from patents}, YEAR = {2013}, ABSTRACT = {Patents contain a large quantity of technical information not available elsewhere and therefore very interesting for both academia and industry. The purpose of the research is to try to detect and extract information about the functions, the physical behaviours and the states of the system directly from the text of a patent in an automatic way. The above three categories constitute a well-known set of relevant entities in the theory of engineering design, and their study allows powerful analysis of individual artefacts as well as that of groups of products or technologies. The focus is in providing a handy tool that could speed up and facilitate human analysis and allow tackling also large corpora of documents. A second goal is to develop a protocol based on free software and database resources, so that it could be replicable with limited effort by everyone without having to rely on commercial databases. Extracting technical and design information from a document whose aim is more legal than technical, and that is written using a specific jargon, is not a trivial task. The approach chosen to overcome the various issues is to support state-of-the-art Computational Linguistic tools with a large Knowledge Base. The latter has been constructed both manually and automatically and comprises not only keywords but also concepts, relationships and regular expressions. A case study about a very recent patent describing a mechanical device has been included to show the functioning and output of the entire system. © 2013 Elsevier Ltd. All rights reserved.}, KEYWORDS = {Function-Behaviour-Structure, Patent informatics, Product development, Semantic elaboration}, PAGES = {317-334}, URL = {http://www.sciencedirect.com/science/article/pii/S1474034613000487}, VOLUME = {27}, DOI = {10.1016/j.aei.2013.04.004}, PUBLISHER = {Elsevier Science (Oxford, Regno Unito)}, ISSN = {1474-0346}, JOURNAL = {Advanced engineering informatics}, } @ARTICLE{MARCHI_2013_ARTICLE_M_288182, AUTHOR = {Marchi, S.}, TITLE = {G2A: a Web application to study, annotate and scholarly edit ancient texts and their aligned translations. Part II. Towards a user manual}, YEAR = {2013}, ABSTRACT = {The present contribution describes the current operating procedures of the G2A application. The system is still under development at the Institute of Computational Linguistics (ILC/CNR, Area della ricerca di Pisa); therefore, all the functions described in this provisional user manual should be considered as an exemplification of the general model described by Andrea Bozzi in Part I.}, KEYWORDS = {Literary computing, XML, Web Application, Java, exist-db}, PAGES = {173-183}, URL = {https://learningroads.cfs.unipi.it/wp-content/uploads/2015/10/MARCHISGA3-2013.pdf}, VOLUME = {3}, PUBLISHER = {CNR, Istituto di Linguistica Computazionale (Pisa, Italia)}, ISSN = {2239-012X}, JOURNAL = {Studia graeco-arabica}, } @ARTICLE{MARZI_2013_ARTICLE_M_287131, AUTHOR = {Marzi, C.}, TITLE = {Innovation, language, and grey literature}, YEAR = {2013}, ABSTRACT = {Language and innovation are inseparable. Language conveys ideas which are essential in innovation, establishes the most immediate connections with our conceptualisation of the outside world, and provides the building blocks for communication. Every linguistic choice is necessarily meaningful, and it involves the parallel construction of form and meaning. From this perspective, language is a dynamic knowledge construction process. In this article, emphasis will be laid on investigating how words are used to describe innovation, and how innovation topics can influence word usage and collocational behaviour. Especially in grey materials, and in a context-based approach, the lexical representation of innovative knowledge is closely related to the representation of knowledge itself, and gives the opportunity to reduce the gap between knowledge representation and knowledge understanding. This will bring into focus the dynamic interplay between lexical creativity and innovative pragmatic contexts, and the necessity for a dynamic semantic shift from context-driven vagueness to domain-driven specialisation.}, KEYWORDS = {Lexical productivity, Language technologies, Grey Literature, Web corpora}, PAGES = {145-151}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84884695370\&partnerID=q2rCbXpz}, VOLUME = {9}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{MARZI_2013_ARTICLE_MF_283382, AUTHOR = {Marzi, C. and Ferro, M.}, TITLE = {Adaptive strategies in lexical acquisition}, YEAR = {2013}, ABSTRACT = {The emergence of morphological patterns from lexical storage in language acquisition is conditioned by language-specific factors as well as extra-linguistic cognitive capacities. With particular reference to the acquisition of plural markers in German, in a memory-based perspective highlighting interesting theoretical implications for usage-based models, the paper analyses acquisitional strategies by focussing on emergent relations between stored word forms and on dynamic expectation/competition of incoming input. In particular, we outline an adaptive multifactorial account of morphological processing that includes both frequency and formal factors. Our investigation is supported by a computational model of morphology acquisition/processing based on self-organisation memories, where word representations are dynamically recoded as time-series.}, KEYWORDS = {German plurals, Morphological generalisation, Self-organising memory, Word processing}, PAGES = {307-328}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84892521599\&partnerID=q2rCbXpz}, VOLUME = {XII}, DOI = {10.1418/75045}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{MONTEMAGNI_2013_ARTICLE_M_329781, AUTHOR = {Montemagni, S.}, TITLE = {Tecnologie linguistico-computazionali e monitoraggio della lingua italiana}, YEAR = {2013}, ABSTRACT = {In una riflessione su dove stia andando l'italiano del terzo millennio, è legittimo chiedersi se e in che misura le tecnologie linguistico-computazionali possano essere di aiuto nel monitoraggio della lingua italiana nelle sue varietà diamesiche, diafasiche e diastratiche, nonché sull'asse diacronico. L'obiettivo del presente contributo consiste nel fornire una risposta, sebbene preliminare, a questo interrogativo, primariamente sul versante metodologico. In particolare, si vuole mostrare che mediante il ricorso a tecnologie linguistico-computazionali è oggi possibile monitorare un ampio spettro di tratti, che spaziano tra i diversi livelli di descrizione linguistica (primariamente, lessico, morfo-sintassi e sintassi), in relazione a corpora di sempre più vaste dimensioni. Questo rappresenta un cambio fondamentale nello studio della variazione linguistica, in particolare della lingua italiana, fino a oggi basato su corpora di dimensioni relativamente ridotte e tipicamente condotto mediante un'analisi (semi-)manuale del testo. Come vedremo, l'uso di vasti corpora testuali combinato con il ricorso a tecnologie linguistico-computazionali per l'analisi e il monitoraggio linguistico rendono oggi possibili analisi sempre più accurate e affidabili, che coprono aspetti della struttura linguistica rimasti fino a ora inesplorati in quanto difficilmente attingibili mediante un'analisi manuale del testo.}, KEYWORDS = {Monitoraggio Linguistico, Trattamento Automatico del Linguaggio, Varietà d'Uso della Lingua}, PAGES = {145-172}, URL = {http://www.italianlp.it/wp-content/uploads/2014/04/montemagni_silta_submission_rif.pdf}, VOLUME = {XLII}, PUBLISHER = {Pacini Editore (Opedaletto (PI), Italia)}, ISSN = {0390-6809}, JOURNAL = {Studi italiani di linguistica teorica ed applicata}, } @ARTICLE{MONTEMAGNI_2013_ARTICLE_MWDN_288064, AUTHOR = {Montemagni, S. and Wieling, M. and De Jonge, B. and Nerbonne, J.}, TITLE = {Synchronic patterns of Tuscan phonetic variation and diachronic change: Evidence from a dialectometric study}, YEAR = {2013}, ABSTRACT = {A careful investigation of synchronic patterns of linguistic variation with underlying linguistic features can lead to important insights into the comprehension of diachronic phonetic processes. In this article, we showed that the method of spectral partitioning of bipartite graphs applied to synchronic dialectal data can effectively and reliably be used to investigate diachronic processes, thus contributing to a deeper understanding of the relationship between synchronic variation and diachronic change. This was illustrated through a case study carried out on Tuscan dialects, focusing on so-called Tuscan 'gorgia', a lenition process consisting of the spirantization of stop consonants. In particular, from a quantitative analysis of the sound correspondences involving voiceless and voiced stops, we tracked the evolution of the spirantization phenomenon in several respects. First, we tracked spirantization geographically, across Tuscany from the influential center of Florence to the peripheral areas. Second, we tracked it phonologically, from voiceless to voiced stops, and within each voicing class from velars to dentals and then to bilabials. Finally, we tracked it demographically, with young speakers using the most innovative sound correspondences more than old speakers. The fact that these results are in line with the literature on the topic of Tuscan 'gorgia' demonstrates the potential of the method of spectral partitioning of bipartite graphs with respect to the reconstruction of diachronic processes starting from diatopically distributed synchronic dialectal data.}, KEYWORDS = {Tuscan dialactelogy, dialectometry, diachronic variation}, PAGES = {157-172}, URL = {https://publications.cnr.it/doc/288064}, VOLUME = {28}, DOI = {10.1093/llc/fqs057}, PUBLISHER = {Oxford University Press (Oxford, Regno Unito)}, ISSN = {0268-1145}, JOURNAL = {Literary and linguistic computing}, } @ARTICLE{NAHLI_2013_ARTICLE_N_288546, AUTHOR = {Nahli, O.}, TITLE = {Computational contributions for Arabic language processing Part I. The automatic morphologic analysis of Arabic texts}, YEAR = {2013}, ABSTRACT = {problems of ambiguity inherent to the Arabic language. Difficulties arose in the various stages of automatic processing of the Arabic version of Plotinus, the text which lies at the core of our project. Part I highlights the needs that led us to update the morphological engine AraMorph in order to optimize its morpho-syntactic analysis. Even if the engine has been optimized, a digital lexical source for better use of the system is still lacking. Part II presents a methodology exploiting the internal structure of the Arabic lexicographic encyclopaedia Lisan al-"arab, which allows automatic extraction of the roots and derived lemmas. The outcome of this work is a useful resource for morphological analysis of Arabic, either in its own right, or to enrich already existing resources}, KEYWORDS = {Morphological engine AraMorph, Morpho-syntactic analysis, Arabic language}, PAGES = {195-206}, URL = {http://www.greekintoarabic.eu/uploads/media/NAHLI_SGA_3-2013.pdf}, VOLUME = {3}, PUBLISHER = {Pacini Editore (Pisa, Italia)}, ISSN = {2281-2687}, JOURNAL = {Studia graeco-arabica}, } @ARTICLE{NAHLI_2013_ARTICLE_NG_282561, AUTHOR = {Nahli, O. and Giovannetti, E.}, TITLE = {Computational contributions for Arabic language processing-Part II. Lisan al-'arab as a source of lexical and morphological knowledge}, YEAR = {2013}, ABSTRACT = {The following sections illustrate a part of the study on the morphology of the Arabic language which is carried on within the framework of the ERC project Greek into Arabic. Philosophical Concepts and Linguistic Bridges ADG 249431. We used the Arabic lexicographic encyclopaedia Lisan al-'arab and, thanks to the regularity of its structure, we developed a system for the extraction of morphologically labelled word sequences, to be exploited for morphological analysis purposes.}, KEYWORDS = {Arabic morphology, Arabic morphological analysis, Arabic lexicography, Natural Language Processing}, PAGES = {207-210}, URL = {https://publications.cnr.it/doc/282561}, VOLUME = {3}, PUBLISHER = {CNR, Istituto di Linguistica Computazionale (Pisa, Italia)}, ISSN = {2239-012X}, JOURNAL = {Studia graeco-arabica}, } @ARTICLE{PARDELLI_2013_ARTICLE_PB_242950, AUTHOR = {Pardelli, G. and Biagioni, S.}, TITLE = {Quando la linguistica incontra l'informatica: una riflessione terminologica}, YEAR = {2013}, ABSTRACT = {This paper presents a case study to the use of words in the field of Natural Language Processing. This electronic processing of linguistic data leads to the diffusion of clear and concise words for describing a complex concept that would need a circumlocution to be described instead. The aim of this article is to provide thinking over of these new lexical forms over the time. We present a tabular representation summarizing terms extracted from the titles of papers presented at international conferences COLING, International Conference on Computational Linguistics in the period (1965-2010). The system used for the data processing is available at the Istituto di Linguistica Computazionale "A. Zampoilli" - CNR, Pisa, Italy.}, KEYWORDS = {Computational Linguistics, Computer Science, Terminology, Information extraction, Content Analysis and Indexing}, PAGES = {67-78}, URL = {http://caspur-ciberpublishing.it}, VOLUME = {3}, DOI = {10.2423/i22394303v3n1p67}, PUBLISHER = {Caspur-Ciber Publishing (Roma, Italia)}, ISSN = {2239-4303}, JOURNAL = {SCIRES-IT (Roma)}, } @INCOLLECTION{BASILI_2013_INCOLLECTION_BLDMV_316376, AUTHOR = {Basili, R. and Lenci, A. and De Cao, D. and Moschitti, A. and Venturi, G.}, TITLE = {Evalita 2011: the Frame Labeling over Italian Texts Task}, YEAR = {2013}, ABSTRACT = {The Frame Labeling over Italian Texts (FLaIT) task held within the EvalIta 2011 challenge is here described. It focuses on the automatic annotation of free texts according to frame semantics. Systems were asked to label all semantic frames and their arguments, as evoked by predicate words occurring in plain text sentences. Proposed systems are based on a variety of learning techniques and achieve very good results, over 80% of accuracy, in most subtasks.}, KEYWORDS = {NLP System Evaluation, Shallow Semantic Parsing, Frame Semantics}, PAGES = {195-204}, URL = {https://publications.cnr.it/doc/316376}, VOLUME = {7689}, PUBLISHER = {Springer (Berlin Heidelberg, DEU)}, ISBN = {978-3-642-35827-2}, BOOKTITLE = {Evaluation of Natural Language and Speech Tools for Italian}, EDITOR = {Magnini, B. and Cutugno, F. and Falcone, M. and Pianta, E.}, } @INCOLLECTION{CALZOLARI_2013_INCOLLECTION_C_288554, AUTHOR = {Calzolari, N.}, TITLE = {Language Resources: From Infancy to Maturity. Lessons and Next Steps for the UNL Community}, YEAR = {2013}, ABSTRACT = {In the second chapter, Professor Nicoletta Calzolari Zamorani, Research Associate and former Director of CNR-ILC, Pisa, Italy, approaches the questions by giving insight on the general framework surrounding Language Resources (LRs) today. Rather than answering the specific UNL questions directly, she emphasizes the fact that in order to define a language structure such as UNL it is pree-eminent to gather results of analyses from different communities and to define a coherent strategy.}, KEYWORDS = {Language Resources (LRs)}, PAGES = {19-35}, URL = {https://publications.cnr.it/doc/288554}, PUBLISHER = {Cambridge scholars publishing (Newcastle upon Tyne, GBR)}, ISBN = {978-1-4438-5144-2}, BOOKTITLE = {Lexical Issues of UNL}, EDITOR = {Martins, R.}, } @INCOLLECTION{CALZOLARI_2013_INCOLLECTION_C_288651, AUTHOR = {Calzolari, N.}, TITLE = {The People's web Meets NLP. Collaboratively Constructed Language Resources}, YEAR = {2013}, ABSTRACT = {It's a pleasure to write the Foreword for the book on Collaboratively Constructed Language Resources. I believe that the trend of collaborative construction of Language Resources (LRs) represents both a "natural" evolution of computerised resource building (I'll try to give few historical hints) and a "critical" evolution for the future of the field of language resources}, KEYWORDS = {Language Resources (LRs)}, PAGES = {vii-xiii}, URL = {http://biblioproxy.cnr.it:2107/static/pdf/767/bfm%253A978-3-642-35085-6%252F1.pdf?auth66=1417169875_75f7e71c14e86981c69d52e3379c37e5\&ext=.pdf}, DOI = {10.1007/978-3-642-35085-6}, PUBLISHER = {Springer (Milano-Heidelberg-New York, ITA)}, ISBN = {978-3-642-35085-6}, BOOKTITLE = {The People's Web Meets. NLP Collaboratively Constructed Language Resources}, EDITOR = {Gurevych, I. and Kim, J.}, } @INCOLLECTION{CALZOLARI_2013_INCOLLECTION_CBLM_231482, AUTHOR = {Calzolari, N. and Bertagna, F. and Lenci, A. and Monachini, M.}, TITLE = {Boosting Lexical Resources for the Semantic Web: Generative Lexicon and Lexicon Interoperability}, YEAR = {2013}, ABSTRACT = {To make the vision of a European Information Infrastructure and of the Semantic Web a reality, two key issues are tackled: (i) content, which must be dealt with in a multilingual environment; (ii) standards, which are critical to achieve interoperability and integration. In the Semantic Web scenario, ontologies are the key components to manage knowledge, whereas, in Human Language Technology, semantic description is committed to computational lexicons, which have to squarely address the complexity of natural language. Answers to the above issues are found within two frameworks: first, in the framework of Generative Lexicon (GL) theory and GL-based lexicons that account for the complex, multidimensional and multifaceted nature of meaning in lexicon and ontology design; second, in the context of the ISLE enterprise which, with the MILE, represents an essential interface between advanced research in the field of multilingual lexical semantics and the practical task of developing resources for HLT.}, KEYWORDS = {Generative Lexicon, Computational Lexicons, Standards, Semantic Web, Human Language Technology}, PAGES = {415-431}, URL = {http://download-v2.springer.com/static/pdf/679/chp%253A10.1007%252F978-94-007-5189-7_18.pdf?token2=exp=1430751723~acl=%2Fstatic%2Fpdf%2F679%2Fchp%25253A10.1007%25252F978-94-007-5189-7_18.pdf*~hmac=2ebe0f4a7ba5903ef47dbc16674a886bcbb26b9d6e4e9c9f209e35787522e5f1}, VOLUME = {46}, DOI = {10.1007/978-94-007-5189-7_18}, PUBLISHER = {Springer (Dordrecht, NLD)}, ISBN = {9789400751880}, BOOKTITLE = {Text, Speech and Language Technology}, EDITOR = {Pustejovsky, J. and Bouillon, P. and Isahara, H. and Kanzaki, K. and Lee, C.}, } @INCOLLECTION{CALZOLARI_2013_INCOLLECTION_CMS_280537, AUTHOR = {Calzolari, N. and Monachini, M. and Soria, C.}, TITLE = {LMF-Historical Context and Perspectives}, YEAR = {2013}, ABSTRACT = {The importance of designing standards for language resources (LR) is firmly established, starting with the Expert Advisory Group for Language Engineering (EAGLES) and International Standards for Language Engineering (ISLE) initiatives. Both EAGLES and ISLE stress the importance of reaching a consensus on (linguistic and nonlinguistic) "content", in addition to agreement on formats and encoding issues, and also address the needs of content processing and Semantic Web technologies. The recommendations for standards and best practices issued within the projects became, through the INTERA and mainly the LIRICS project, the International Organization for Standardization (ISO) within the ISO TC37/SC4 committee, where Lexical Markup Framework (LMF) was developed. Standards are fundamental to exchange, preserve, maintain and integrate data and LRs, to achieve interoperability in general, and they are an essential basis of any LR infrastructure.}, KEYWORDS = {EAGLES, international standards for language engineering, interoperability, lexical markup framework (LMF)}, PAGES = {1-18}, URL = {http://dx.doi.org/10.1002/9781118712696.ch1}, DOI = {10.1002/9781118712696.ch1}, PUBLISHER = {John Wiley \& Sons, Inc (Hoboken, USA)}, ISBN = {978-1-118-71259-7}, BOOKTITLE = {LMF Lexical Markup Framework}, EDITOR = {Gil, F. and Patrick, P.}, } @INCOLLECTION{CUCURULLO_2013_INCOLLECTION_CS_353214, AUTHOR = {Cucurullo, S. and Sassi, M.}, TITLE = {Il Contributo Tecnologico dell'ILC al Progetto LinCi}, YEAR = {2013}, ABSTRACT = {Il progetto "la Lingua delle Città (LinCi)" ha l'obiettivo di mettere in luce alcuni fenomeni rilevanti - sia dal punto di vista lessicale che grammaticale - dell'italiano comune e informale, secondo l'opinione sull'uso dei parlanti intervistati. A tale scopo il gruppo dei linguisti che ha ideato il progetto ha elaborato un questionario di 200 domande riconducibili a vari campi semantici. La struttura del questionario consente inoltre al raccoglitore di ricavare informazioni di tipo grammaticale (per esempio sull'uso di certe forme pronominali o verbali), nonché giudizi di carattere metalinguistico sulla "dialettalità" o meno di certe forme, sulle differenze tra registro formale e informale, sulla frequenza d'uso.}, KEYWORDS = {lingua italiana, banche dati}, PAGES = {81-99}, URL = {https://publications.cnr.it/doc/353214}, PUBLISHER = {Accademia della Crusca (Firenze, ITA)}, ISBN = {978-88-89369-51-7}, BOOKTITLE = {La lingua delle città LinCi. La banca dati}, EDITOR = {Nesi, A. and Salani, T. P.}, } @INCOLLECTION{CUCURULLO_2013_INCOLLECTION_CS_353219, AUTHOR = {Cucurullo, S. and Sassi, M.}, TITLE = {ASPETTI TECNICO-METODOLOGICI DEL PROGETTO LinCi}, YEAR = {2013}, ABSTRACT = {l progetto "la Lingua delle Città (LinCi)" ha l'obiettivo di mettere in luce i cambiamenti e l'evoluzione dei diversi dialetti italiani regionali, sia dal punto di vista grammaticale che lessicale. A tale scopo, è stato elaborato un questionario di 200 domande su vari campi semantici: determinazioni temporali; forme di saluto; corpo umano; mestieri; oggetti domestici; cibi, frutta e verdura; rapporti sociali, ecc. La struttura del questionario consente inoltre al raccoglitore di ricavare informazioni di tipo grammaticale (per esempio sull'uso di certe forme pronominali e verbali), nonché giudizi di carattere metalinguistico, sulla "dialettalità" o meno di certe forme, sulle differenze tra registro formale e informale, ecc. La collaborazione dell'Istituto di Linguistica Computazionale (ILC) al Progetto LinCi ha come obiettivo quello di fornire il supporto informatico all'unità di coordinamento per la creazione e gestione della banca-dati e la sua consultazione tramite il sito dedicato.}, KEYWORDS = {Sociolinguistica, banca-dati DBT}, PAGES = {47-50}, URL = {https://publications.cnr.it/doc/353219}, VOLUME = {11}, PUBLISHER = {Franco Cesati Editore (Firenze, ITA)}, ISBN = {9788876674563}, BOOKTITLE = {La lingua delle città Raccolta di studi}, EDITOR = {Nesi, A.}, } @INCOLLECTION{DELGROSSO_2013_INCOLLECTION_DM_288051, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Una Applicazione Web per la Filologia Computazionale. Un esperimento su alcuni scritti autografi di Ferdinand de Saussure}, YEAR = {2013}, ABSTRACT = {Nel progetto PRIN "Per un'edizione digitale dei manoscritti di Ferdinand de Saussure", il cui scopo era approntare una serie di strumenti utili all'edizione di- gitale degli autografi saussuriani, il gruppo di ricerca e sviluppo di filologia com- putazionale guidato da Andrea Bozzi e presente presso l'Istituto di Linguistica Computazionale "Antonio Zampolli" (ILC-CNR, Pisa) ha individuato i requisiti ed implementato le funzionalita? rispondenti alle esigenze dichiarate degli studiosi di questi materiali.}, KEYWORDS = {filologia computazionale, piattaforma web}, PAGES = {131-157}, URL = {https://publications.cnr.it/doc/288051}, PUBLISHER = {Edizioni dell'Orso (Alessandria, ITA)}, ISBN = {978-88-6274-478-2}, BOOKTITLE = {Guida per un'edizione digitale dei manoscritti di Ferdinand de Saussure}, EDITOR = {Gambarara, D. and Marchese, M. P.}, } @INCOLLECTION{DELLORLETTA_2013_INCOLLECTION_DMMVAF_266373, AUTHOR = {Dell'Orletta, F. and Marchi, S. and Montemagni, S. and Venturi, G. and Agnoloni, T. and Francesconi, E.}, TITLE = {Domain Adaptation for Dependency Parsing at EVALITA 2011}, YEAR = {2013}, ABSTRACT = {The domain adaptation task was aimed at investigating techniques for adapting state-of-the-art dependency parsing systems to new domains. Both the language dealt with, i.e. Italian, and the target do- main, namely the legal domain, represent two main novelties of the task organised at Evalita 2011 with respect to previous domain adaptation ini- tiatives. In this paper, we define the task and describe how the datasets were created from different resources. In addition, we characterize the different approaches of the participating systems, report the test results, and provide a first analysis of these results.}, KEYWORDS = {Dependency Parsing, Domain Adaptation, Self-training, Active Learning, Legal-NLP}, PAGES = {58-69}, URL = {https://publications.cnr.it/doc/266373}, VOLUME = {7689}, PUBLISHER = {Springer (Berlin Heidelberg, DEU)}, ISBN = {978-3-642-35827-2}, BOOKTITLE = {Evaluation of NLP and Speech Tools for Italian}, EDITOR = {Magnini, B. and Cutugno, F. and Falcone, M. and Pianta, E.}, } @INCOLLECTION{HAYASHI_2013_INCOLLECTION_HMSSC_285427, AUTHOR = {Hayashi, Y. and Monachini, M. and Savas, B. and Soria, C. and Calzolari, N.}, TITLE = {LMF as a Foundation for Servicized Lexical Resources}, YEAR = {2013}, ABSTRACT = {This chapter argues that the lexical markup framework (LMF) can play a significant role in realizing servicized lexical resources on the Web. To accomplish this goal, it begins with a brief introduction of the notion of servicized resources, and then presents a technical architecture of, what is called, LMF-aware lexicon access services. It presents two implementation showcases to demonstrate the applicability of the LMF and to discuss its possible extensions. The first example deals with WordNet-type computational semantic lexicons, while the other takes up a machine-readable bilingual dictionary primarily compiled for human usage. To conclude the chapter, the final sections summarize the results while reviewing related work.}, KEYWORDS = {lexical markup framework (LMF), LMF-aware lexicon access services, servicized lexical resources}, PAGES = {201-213}, URL = {http://onlinelibrary.wiley.com/doi/10.1002/9781118712696.ch14/references}, DOI = {10.1002/9781118712696.ch14}, PUBLISHER = {Wiley-ISTE (Hoboken, USA)}, ISBN = {9781118712696}, BOOKTITLE = {LMF-Lexical Markup Framework}, EDITOR = {Francopoulo, G.}, } @INCOLLECTION{MONTANI_2013_INCOLLECTION_MARBCMSTSBP_278709, AUTHOR = {Montani, C. and Andronico, P. and Raviolo, C. and Bozzi, A. and Codenotti, B. and Meghini, C. and Sommani, M. and Tarabella, L. and Scopigno, R. and Baraglia, R. and Perego, R.}, TITLE = {Il CNR dopo la CEP}, YEAR = {2013}, ABSTRACT = {Una breve [e parziale] storia di alcune tematiche ICT di successo che si sono sviluppate negli Istituti CNR di Pisa a partire dagli anni '60 e che, a giudizio degli autori, rappresentano a buon diritto rami importanti di quell'albero rigoglioso che ha avuto le sue radici nella CEP.}, KEYWORDS = {divulgazione scientifica, storia dell'informatica}, URL = {https://publications.cnr.it/doc/278709}, PUBLISHER = {Pisa University Press (Pisa, ITA)}, BOOKTITLE = {La CEP prima della CEP: storia dell'informatica. Divulgazione scientifica e didattica sperimentale. Atti del Convegno, Pisa 11-12 novembre 2011}, } @INCOLLECTION{MONTEMAGNI_2013_INCOLLECTION_M_329778, AUTHOR = {Montemagni, S.}, TITLE = {Estrazione Terminologica Automatica e Indicizzazione: Scenari Applicativi, Problemi e Possibili Soluzioni}, YEAR = {2013}, ABSTRACT = {Il ricorso a metodi e tecniche di estrazione automatica di terminologia settoriale da corpora di dominio, ovvero da insiemi di documenti relativi a uno specifico settore della conoscenza, rappresenta una sempre più diffusa pratica di supporto al processo di indicizzazione di collezioni documentali, inteso come l'operazione volta all'individuazione delle voci indice che ne costituiscono il contenuto concettuale. L'obiettivo di questo contributo è una rivisitazione critica di esperienze condotte all'interno di diversi scenari applicativi in cui i risultati del processo di estrazione automatica di terminologia sono utilizzati per la costruzione di vocabolari controllati o di thesauri sulla base dei quali è condotto il processo di indicizzazione.}, KEYWORDS = {Trattamento Automatico del Linguaggio, Estrazione Terminologica, Indicizzazione}, PAGES = {241-284}, URL = {https://publications.cnr.it/doc/329778}, PUBLISHER = {Iter (Milano) (Milano, ITA)}, ISBN = {978-88-903419-3-9}, BOOKTITLE = {Documenti Digitali}, EDITOR = {Guarasci, R. and Folino, A.}, } @INCOLLECTION{RUIMY_2013_INCOLLECTION_RPGB_272216, AUTHOR = {Ruimy, N. and Piccini, S. and Giovannetti, E. and Bellandi, A.}, TITLE = {Lessicografia Computazionale e Terminologia Saussuriana}, YEAR = {2013}, PAGES = {161-179}, URL = {https://publications.cnr.it/doc/272216}, PUBLISHER = {Edizioni dell'Orso (Alessandria, ITA)}, ISBN = {978-88-6274-478-2}, BOOKTITLE = {Guida per un'edizione digitale dei manoscritti di Ferdinand de Saussure}, } @INCOLLECTION{SASSI_2013_INCOLLECTION_SG_319402, AUTHOR = {Sassi, M. and Grava, M.}, TITLE = {Una metamorfosi chiamata GIS: dai Database ai Geo-database}, YEAR = {2013}, ABSTRACT = {In questo contributo si descrivono le fasi di sviluppo (pionieristico) della ricerca Popolazione e agricoltura nel territorio toscano durante l'Ottocento, iniziata da Giuliana Biagioli con metodi tradizionali. Grazie al suo particolare intuito ha poi trovato nell'Università di Pisa il terreno fertile per poi lanciarsi nell'avventura informatica. Qui ricordiamo che la prima facoltà di Scienze dell'Informazione era stata fondata a Pisa pochi anni prima (si era nel 1969) e quindi negli anni successivi si poteva già contare con i primi specialisti del settore. La prima parte descriverà le prime tappe di lavoro con l'ausilio del calcolatore, che a quel tempo era denominato Mainframe e oltre ad occupare enormi spazi si "nutriva" con schede meccanografiche e nastri magnetici. Con lo sviluppo della tecnologia si è poi passati all'uso dei terminali periferici, che permettevano l'inserimento dati diretto, in comunicazione telematica con il cervellone, per poi arrivare al trasferimento dei dati e dei risultati delle elaborazioni su Personal Computer. In questa relazione suddivideremo le due parti: prima e dopo Internet.}, KEYWORDS = {Storia, Banca-dati, Catasto leopoldino, Storia dell'Informatica}, PAGES = {439-458}, URL = {https://publications.cnr.it/doc/319402}, PUBLISHER = {Edizioni ETS (Pisa, ITA)}, ISBN = {9788846736765}, BOOKTITLE = {Il mondo a metà-Studi storici sul territorio e l'ambiente-In onore di Giuliana Biagioli}, EDITOR = {Pazzagli, R.}, } @INCOLLECTION{VENTURI_2013_INCOLLECTION_V_285645, AUTHOR = {Venturi, G.}, TITLE = {Semantic annotation of Italian legal texts: a FrameNet-based approach}, YEAR = {2013}, ABSTRACT = {The FrameNet approach to text semantic annotation can be a reliable model to make explicit the linguistic information and the semantic content of legal texts. This hypothesis is discussed and empirically demonstrated through an experiment of annotation of a corpus of Italian legal texts. This study is aimed at showing how FrameNet is particularly appropriate in order to provide new perspectives for legal language studies and for legal knowledge representation tasks. Moreover, by relying on the output of an automatic dependency parser, the FrameNet-based annotation methodology presented here is meant to be succesfully used in automatic semantic processing tasks of legal texts.}, KEYWORDS = {Legal Language, Semantic Annotation, Legal Ontologies, Natural Language Processing}, PAGES = {51-84}, URL = {https://publications.cnr.it/doc/285645}, VOLUME = {58}, DOI = {10.1075/bct.58}, PUBLISHER = {John Benjamins Publishing Company (Amsterdam/Philadelphia, USA)}, ISBN = {9789027202772}, BOOKTITLE = {Advances in Frame Semantics}, EDITOR = {Fried, M. and Nikiforidou, K.}, } @INCOLLECTION{VOSSEN_2013_INCOLLECTION_VSM_285402, AUTHOR = {Vossen, P. and Soria, C. and Monachini, M.}, TITLE = {Wordnet-LMF: A Standard Representation for Multilingual Wordnets}, YEAR = {2013}, ABSTRACT = {Wordnet-lexical markup framework (LMF) is an instantiation of LMF for representing Wordnet-like semantic dictionaries. Wordnet is a widely accepted resource and thus provides a good case for testing the viability of a representation in LMF and the acceptance by a wide range of users. Wordnet-LMF was developed in the framework of the EU project KYOTO for the specific purpose of endowing a set of wordnets with a standardized interoperability format allowing the interchange of semantic information. This chapter explains the choices that were made to model the wordnet information in LMF. It provides a preliminary assessment of LMF, by large-scale application to real lexical resources, endowing wordnet with a format representation that allows easier integration among resources sharing the same structure and, more importantly, across resources with different theoretical and implementation approaches.}, KEYWORDS = {KYOTO project, multilingual wordnets, Wordnet-lexical markup framework}, PAGES = {51-66}, URL = {http://dx.doi.org/10.1002/9781118712696.ch4}, DOI = {10.1002/9781118712696.ch4}, PUBLISHER = {Wiley-ISTE (Hoboken, USA)}, ISBN = {9781118712696}, BOOKTITLE = {LMF-Lexical Markup Framework}, EDITOR = {Francopoulo, G.}, } @EDITORIAL{SAUR_2013_EDITORIAL_SCHLMP_288143, AUTHOR = {Saurí, R. and Calzolari, N. and Huang, C. R. and Lenci, A. and Monachini, M. and Pustejovsky, J.}, TITLE = {Proceedings of the 6th International Conference on Generative Approaches to the Lexicon Generative Lexicon and Distributional Semantics}, YEAR = {2013}, ABSTRACT = {The papers in this volume represent some of the most recent and exciting work being carried out both within the framework of Generative Lexicon and related approaches to the lexicon and lexical resources. With the recent emphasis in natural language processing on the development of machine learning algorithms, it has become even more important for computational linguists to work on the development of linguistically informed lexical resources, for use in the annotation of corpora and creation of gold standard data for training, as well as the collation of larger theoretical datasets for investigating linguistic phenomena in greater detail and sophistication. These works contribute to this trend as well as to the further development of the mechanisms within GL for describing and explaining semantic and lexical phenomena in language}, KEYWORDS = {Generative Lexicon, Language Resources}, PAGES = {i-126}, URL = {https://aclweb.org/anthology/W/W13/W13-5400.pdf}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-937284-98-5}, } @INPROCEEDINGS{BOSCO_2013_INPROCEEDINGS_BMS_329780, AUTHOR = {Bosco, C. and Montemagni, S. and Simi, M.}, TITLE = {Converting Italian Treebanks: Towards an Italian Stanford Dependency Treebank}, YEAR = {2013}, ABSTRACT = {The paper addresses the challenge of converting MIDT, an existing dependency-based Italian treebank resulting from the harmonization and merging of smaller resources, into the Stanford Dependencies annotation formalism, with the final aim of constructing a standard-compliant resource for the Italian language. Achieved results include a methodology for converting treebank annotations belonging to the same dependency-based family, the Italian Stanford Dependency Treebank (ISDT), and an Italian localization of the Stanford Dependency scheme.}, KEYWORDS = {Italian Treebank, Harmonization and Merging of Resources, Stanford Dependencie s}, PAGES = {61-69}, URL = {http://aclweb.org/anthology/W13-2308}, ISBN = {978-1-937284-58-9}, CONFERENCE_NAME = {7th Linguistic Annotation Workshop and Interoperability with Discourse}, CONFERENCE_PLACE = {Sofia, Bulgaria}, CONFERENCE_DATE = {8-9 August 2013}, BOOKTITLE = {Proceedings of the 7th Linguistic Annotation Workshop and Interoperability with Discourse}, } @INPROCEEDINGS{CHIARELLA_2013_INPROCEEDINGS_CCCMMS_282744, AUTHOR = {Chiarella, D. and Cutugno, P. and Cinini, A. and Marconi, L. and Morgavi, G. and Sbrulli, S.}, TITLE = {Análisis de documentos en red sobre la renuncia del Papa Ratzinger y la elección del Papa Francisco}, YEAR = {2013}, URL = {https://publications.cnr.it/doc/282744}, ISBN = {9789597152194}, CONFERENCE_NAME = {VIII Conferencia Científica Internacional Lingüística}, CONFERENCE_PLACE = {Habana}, CONFERENCE_DATE = {27-29 Novembre 2013}, } @INPROCEEDINGS{CHIARELLA_2013_INPROCEEDINGS_CCMM_264938, AUTHOR = {Chiarella, D. and Cutugno, P. and Marconi, L. and Morgavi, G.}, TITLE = {Mnemosynet: "una red para seguir los caminos de la memoria colectiva"}, YEAR = {2013}, PAGES = {390-394}, URL = {https://publications.cnr.it/doc/264938}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {9789597174226}, CONFERENCE_NAME = {XII Simposio Internacional de Comunicación Social: Actualizaciones en Comunicación Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {21-25 gennaio 2013}, BOOKTITLE = {Actualizaciones en Comunicación Social-Vol. I}, EDITOR = {Ruiz Miyares, L. and Álvarez Silva, M. R. and Muñoz Alvarado, A.}, } @INPROCEEDINGS{CIMINO_2013_INPROCEEDINGS_CDVM_285772, AUTHOR = {Cimino, A. and Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {Linguistic Profiling based on General-purpose Features and Native Language Identification}, YEAR = {2013}, ABSTRACT = {In this paper, we describe our approach to native language identification and discuss the results we submitted as participants to the First NLI Shared Task. By resorting to a wide set of general-purpose features qualifying the lexical and grammatical structure of a text, rather than to ad hoc features specifically selected for the NLI task, we achieved encouraging results, which show that the proposed approach is general-purpose and portable across different tasks, domains and languages.}, KEYWORDS = {Native Language Identification, Linguistic Profiling}, PAGES = {207-215}, URL = {http://www.aclweb.org/anthology/W13-1727}, ISBN = {978-1-937284-47-3}, CONFERENCE_NAME = {8th workshop on "Innovative Use of NLP for Building Educational Applications"}, CONFERENCE_PLACE = {Atlanta (Georgia)}, CONFERENCE_DATE = {13 giugno 2013}, } @INPROCEEDINGS{DELGROSSO_2013_INPROCEEDINGS_DB_276328, AUTHOR = {Del Grosso, A. M. and Boschetti, F.}, TITLE = {Collaborative multimedia platform for computational philology}, YEAR = {2013}, ABSTRACT = {This paper aims at illustrating a collaborative and modular web platform in the domain of digital and computational philology. The proposed work deals with parallel multilingual and multimedia resources. Two case studies are discussed in order to show the flexibility of the designed platform. The reusability of the components in different projects is achieved by abstract modeling and through the application of effective design patterns. The platform deals with textual resources and associated multimedia content, which can be retrieved by the metadata and shown in parallel (e.g., the page image of a manuscripts and the related transcription). The library of components will distribute under GPL 3.0 license and available at https://github.com/CoPhi.}, KEYWORDS = {Computational philology, Digital philology, Enterprise systems, Multilingualism, Parallel multimedia}, PAGES = {46-51}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84905815787\&origin=inward}, ISBN = {9781627484770}, CONFERENCE_NAME = {MMEDIA 2013}, CONFERENCE_PLACE = {Venice}, CONFERENCE_DATE = {21-26 April 2013}, BOOKTITLE = {Proceedings of a meeting held at NexComm 2013, The Fifth International Conferences on Advances in Multimedia (MMEDIA 2013)}, EDITOR = {Davis, P.}, } @INPROCEEDINGS{DELLORLETTA_2013_INPROCEEDINGS_DMV_278421, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Linguistic Profiling of Texts Across Textual Genre and Readability Level. An exploratory Study on Italian Fictional Prose}, YEAR = {2013}, PAGES = {189-197}, URL = {https://publications.cnr.it/doc/278421}, CONFERENCE_NAME = {Recent Advances in Natural Language Processing (RANLP 2013)}, CONFERENCE_PLACE = {Hissar, Bulgaria}, CONFERENCE_DATE = {7-13 settembre}, BOOKTITLE = {Proceedings of Recent Advances in Natural Language Processing (RANLP 2013)}, } @INPROCEEDINGS{DELLORLETTA_2013_INPROCEEDINGS_DVM_285773, AUTHOR = {Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {Unsupervised Linguistically-Driven Reliable Dependency Parses Detection and Self-Training for Adaptation to the Biomedical Domain}, YEAR = {2013}, ABSTRACT = {In this paper, a new self-training method for domain adaptation is illustrated, where the selection of reliable parses is carried out by an unsupervised linguistically-driven algorithm, ULISSE. The method has been tested on biomedical texts with results showing a significant improvement with respect to considered baselines, which demonstrates its ability to capture both reliability of parses and domain-specificity of linguistic constructions.}, KEYWORDS = {Self-training, Domain Adaptation, Biomedical Texts}, PAGES = {45-53}, URL = {http://www.aclweb.org/anthology/W13-1906}, ISBN = {978-1-937284-55-8}, CONFERENCE_NAME = {12th workshop on "Biomedical Natural Language Processing" (BioNLP)}, CONFERENCE_PLACE = {Sofia (Bulgaria)}, CONFERENCE_DATE = {8-9 agosto 2013}, } @INPROCEEDINGS{FERRARI_2013_INPROCEEDINGS_FSD_277748, AUTHOR = {Ferrari, A. and Spagnolo, G. O. and Dell'Orletta, F.}, TITLE = {Mining commonalities and variabilities from natural language documents}, YEAR = {2013}, ABSTRACT = {A company who wishes to enter an established marked with a new, competitive product is required to analyse the product solutions of the competitors. Identifying and comparing the features provided by the other vendors might greatly help during the market analysis. However, mining common and variant features of from the publicly available documents of the competitors is a time consuming and error-prone task. In this paper, we suggest to employ a natural language processing approach based on textit(contrastive analysis) to identify commonalities and variabilities from the brochures of a group of vendors. We present a first step towards a practical application of the approach, in the the context of the market of Communications-Based Train Control (CBTC) systems.}, KEYWORDS = {Software Product Lines, Variability Mining, CBTC, D. 2 SOFTWARE ENGINEERING, 68N30}, PAGES = {116-120}, URL = {http://dl.acm.org/citation.cfm?id=2491634}, ISBN = {978-1-4503-1968-3}, CONFERENCE_NAME = {SPLC 2013-17th International Software Product Line Conference}, CONFERENCE_PLACE = {Tokyo, Japan}, CONFERENCE_DATE = {26-30 August 2013}, EDITOR = {Kishi, T.}, } @INPROCEEDINGS{FRONTINI_2013_INPROCEEDINGS_FDM_287280, AUTHOR = {Frontini, F. and Del Gratta, R. and Monachini, M.}, TITLE = {Linking the Geonames ontology to WordNet}, YEAR = {2013}, ABSTRACT = {This paper illustrates the transformation of the GeoNames ontology concepts, with their English labels and glosses, into a GeoDomain WordNet-like resource in English, its translation into Italian, and its linking to the existing generic WordNets of both languages.}, KEYWORDS = {GeoNames, WordNet, lemon}, PAGES = {263-267}, URL = {http://hnk.ffzg.hr/bibl/ltc2013/book/papers/OWN-2.pdf}, PUBLISHER = {Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {6th Language \& Technology Conference: Human Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan, Poland}, CONFERENCE_DATE = {December 7-9, 2013}, BOOKTITLE = {Human Language Technologies as a Challenge for Computer Science and Linguistics. Proceedings, 6th Language \& Technology Conference, December 7-9, 2013, Poznañ, Poland}, EDITOR = {Vetulani, Z. and Uszkoreit, H.}, } @INPROCEEDINGS{MARCHETTI_2013_INPROCEEDINGS_MTALDFM_287331, AUTHOR = {Marchetti, A. and Tesconi, M. and Abbate, S. and Lo Duca, A. and D'Errico, A. and Frontini, F. and Monachini, M.}, TITLE = {Tour-pedia: a web application for the analysis and visualization of opinions for tourism domain}, YEAR = {2013}, ABSTRACT = {We present Tour-pedia an interactive web application that extracts opinions from reviews of accommodations from different sources available on-line. Polarity markers display on a map the different opinions. This tool is intended to help business operators to manage reputation on-line.}, KEYWORDS = {Visualization tools, opinion mining, NLP on social media, tourism reviews}, PAGES = {594-595}, URL = {http://www.iit.cnr.it/sites/default/files/ltc2013_opener_demo.pdf}, PUBLISHER = {Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)}, ISBN = {978-83-932640-4-9}, CONFERENCE_NAME = {6th Language \& Technology Conference: Human Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan, Poland}, CONFERENCE_DATE = {December 7-9, 2013}, EDITOR = {Vetulani, Z. and Uszkoreit, H.}, } @INPROCEEDINGS{MARCONI_2013_INPROCEEDINGS_MCCMS_282745, AUTHOR = {Marconi, L. and Cutugno, P. and Chiarella, D. and Morgavi, G. and Sassi, M.}, TITLE = {Análisis de Blogs y Temas en "Narrarsi in rete: linguaggi a confronto"}, YEAR = {2013}, URL = {https://publications.cnr.it/doc/282745}, ISBN = {9789597152194}, CONFERENCE_NAME = {VIII Conferencia Científica Internacional Lingüística}, CONFERENCE_PLACE = {Habana}, CONFERENCE_DATE = {27-29 Novembre 2013}, } @INPROCEEDINGS{MARINELLI_2013_INPROCEEDINGS_MC_281780, AUTHOR = {Marinelli, R. and Cignoni, L.}, TITLE = {How to Integrate CLIL Maritime Courses in English with Idioms Derived from Specialized Lexical Semantic Databases}, YEAR = {2013}, ABSTRACT = {This paper is a proposal for better learning of idiomatic seafaring expressions to be exploited in Content and Language Learning (CLIL) maritime English courses. These courses are specifically designed for individuals with little or no prior knowledge of maritime language, wishing to undertake professional training in Naval Academies or to embark on a career as officers, engineers or crew members, or to work in import-export companies, tour operators and shipping agencies. The starting point of our research was a set of idiomatic expressions included and structured in a lexical database of maritime terminology (Mariterm), organized in semantic relations and containing around 3500 Italian lemmas, clustered into approximately 2,500 synsets (sets of one or more synonyms), e.g.: affondare, andare a fondo (to sink). Each term is linked to other terms of the specialized lexicon by means of lexical semantic relations, and the definition of each word in both languages and the translation into English is provided. A number of phraseological expressions and sayings, for instance seguire la corrente (to go with the tide) in Italian, "to hold course" in English, as well as information of a historical type, have been included in the database. As the terminology contained in Mariterm is constantly connected to the English language, we thought it was worth highlighting the English idiomatic expression with its translation into Italian, despite the fact that the Italian expressions sometimes belong to other semantic fields. The single or multiword expressions can be equivalent and idiomatic in both languages (essere sulla stessa barca/"to be in the same boat"); or can be idiomatic in one of the two languages but not in the other (essere un porto di mare/"to be like Piccadilly Circus"); or, viceversa, "sail close to the wind"/camminare sul filo del rasoio, thus highlighting similarities and differences related to concept representation in the two languages. In some cases, the terminological database managing tool allows for visualization of etymological or historical information related to the idiomatic expression sought for. Idioms are widely used in everyday language, and their complex constructions make them extremely difficult to understand, let alone to use, for non-native speakers. Some phrases referable to the seafaring field are clear, eg.: "to go with the flow" (continue in the same way as others), while others require background knowledge of their historical origin in order to be understood and better memorized, e.g.: "feeling blue" (to describe a feeling of sadness). Starting from the terminological database and using different and appropriate exercises, each phraseological item is provided with a definition, the Italian equivalent when it exists, and information about its origins. We present each idiom highlighting some variations, and giving when possible example sentences in context from well known and validated sources of various types (web sites, specialized maritime journals, handbooks, grey literature, newsletters and other publications), which can help the learner understand when and how a particular idiom is used and to illustrate important grammar points of the English language, making the learning process more interesting and appealing.}, KEYWORDS = {CLIL, phraseology, maritime terminology, lexical semantic databases, grammar}, PAGES = {6666-6673}, URL = {https://publications.cnr.it/doc/281780}, VOLUME = {1}, PUBLISHER = {International Association of Technology, Education and Development (IATED) (Valencia, ESP)}, ISBN = {978-84-616-2661-8}, CONFERENCE_NAME = {INTED2013. 7th International Technology, Education and Development Conference}, CONFERENCE_PLACE = {Valencia (Spain)}, CONFERENCE_DATE = {4th-6th of March, 2013}, BOOKTITLE = {INTED2013. 7th International Technology, Education and Development Conference. Valencia (Spain), 4th-6th of March, 2013. Proceedings}, EDITOR = {Chova, L. G. and Martínez, A. L. and Torres, I. C.}, } @INPROCEEDINGS{MARINELLI_2013_INPROCEEDINGS_MC_281786, AUTHOR = {Marinelli, R. and Cignoni, L.}, TITLE = {A Latin-Italian Database linked to the English language}, YEAR = {2013}, ABSTRACT = {This paper describes an ongoing research conducted at the Institute for Computational Linguistics (ILC) of the National Research Council (CNR) in Pisa, concerning the creation of a lexical semantic database for the Latin language, with its Italian and English translation. As we all know, many words of the Italian language derive from Latin, in the same way as many Latin words and expressions are used in everyday Italian language. The aim of this work is to exploit a well-known and tested conceptual model to construct a resource that can be easily consulted by the students of high school and university who wish to achieve proficiency in Latin language learning and deeper awareness of the use of Italian. The database is of a relational type, following the EuroWordNet (EWN)/ItalWordNet (IWN) model, according to the WordNet (WN) philosophy perspective. The items included in the database are clustered into approximately 250 synsets, which are sets of one or more synonyms, for example carina, navis (ship). Each Latin synset is linked to other words (or synsets) by means of semantic 'internal relations' of the hierarchical-vertical type (or 'is-a' relations) and of the horizontal type (role, means, purpose, sub-event, instance, etc. relations), on the basis of the model. One of the most important characteristics of the database is the possibility of connecting the Latin words to the equivalent English synonyms (or near synonyms), included in the Princeton database WordNet, by means of 'equivalence relations'. 'Plug-in relations connect each word of the Latin lexicon to the equivalent synset (synonym) of the Italian WordNet (IWN), as a kind of hookup point from which it is possible to see the Italian synonym with all its semantic relations. The study of all these kinds of relations can support the students in the learning of the Latin language, linked to Italian as well as to English. The database currently contains 200 nouns and 50 verbs, drawn from the most recent version of the Campanini-Carboni's Latin and Italian Dictionary (2011), and from William's Vocabulary for Speaking Latin (1829). A set of adjectives and adverbs will be codified in the near future, in order to represent all the grammatical categories and a small group of proper names will also be included to enrich the resource. Particular attention is paid to the Latin words that are used in everyday spoken and written Italian, for instance virus, medium, video, etc. The management tool of the database allows to visualize each Latin word inserted in the database, the Italian translation with its definition, the link with the English language, and slots showing additional information on request about its etymology, history, derivates, declension or conjugation. The Latin concept is represented as a node in the net of connections in its semantic field.}, KEYWORDS = {Latin, lexical databases, technology-enhanced learning}, PAGES = {588-595}, URL = {https://publications.cnr.it/doc/281786}, VOLUME = {1}, PUBLISHER = {International Association of Technology, Education and Development (IATED). Barcelona (Spain) (Barcellona, ESP)}, ISBN = {978-84-616-3822-2}, CONFERENCE_NAME = {EDULEARN13. 5th International Conference on Education and New Learning Technologies. International Association of Technology, Education and Development (IATED). Barcelona (Spain)}, CONFERENCE_PLACE = {Barcellona (Spain)}, CONFERENCE_DATE = {1st-3rd of July 2013}, BOOKTITLE = {EDULEARN13. 5th International Conference on Education and New Learning Technologies. International Association of Technology, Education and Development (IATED). Barcelona (Spain) 1st-3rd of July, 2013. Proceedings}, EDITOR = {Chova, L. G. and Martínez, A. L. and Torres, I. C.}, } @INPROCEEDINGS{MARZI_2013_INPROCEEDINGS_M_287133, AUTHOR = {Marzi, C.}, TITLE = {Innovation, language, and the web}, YEAR = {2013}, ABSTRACT = {Language and innovation are inseparable. Language conveys ideas which are essential in innovation, establishes the most immediate connections with our conceptualisation of the outside world, and provides the building blocks for communication. Every linguistic choice is necessarily meaningful, and it involves the parallel construction of form and meaning. From this perspective, language is a dynamic knowledge construction process. In this article, emphasis will be laid on investigating how words are used to describe innovation, and how innovation topics can influence word usage and collocational behaviour. The lexical representation of innovative knowledge in a context-based approach is closely related to the representation of knowledge itself, and gives the opportunity to reduce the gap between knowledge representation and knowledge understanding. This will bring into focus the dynamic interplay between lexical creativity and innovative pragmatic contexts, and the necessity for a dynamic semantic shift from context-driven vagueness to domain-driven specialisation.}, KEYWORDS = {Grey literature, Language technologies, Lexical productivity, Web corpora}, PAGES = {153-159}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84924135180\&origin=inward}, VOLUME = {14}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISSN = {1386-2316}, ISBN = {9789077484203}, CONFERENCE_NAME = {Fourteenth International Conference on Grey Literature}, CONFERENCE_PLACE = {CNR, Rome Italy}, CONFERENCE_DATE = {29-30 November 2012}, BOOKTITLE = {Tracking innovation thorugh grey literature}, EDITOR = {Farace, D. J. and Frantzen, J. and Greynet}, } @INPROCEEDINGS{MONEGLIA_2013_INPROCEEDINGS_MPGMRDKF_287346, AUTHOR = {Moneglia, M. and Panunzi, A. and Gagliardi, G. and Monachini, M. and Russo, I. and De Felice, I. and Khan, F. and Frontini, F.}, TITLE = {IMAGACT E-learning Platform for Basic Action Types. In: Pixel (ed.), Proceedings of the 6th International Conference ICT for Language Learning}, YEAR = {2013}, ABSTRACT = {Action verbs express important information in a sentence and they are the most frequent elements in speech, but they are also one of the most difficult part of the lexicon to learn for L2 language learners, because languages segment these concepts in very different ways. The two sentences "Mary folds her shirt" and "Mary folds her arms" refer to two completely different types of action, as becomes evident when they are translated into another language (e.g., in Italian they would be translated as "Maria piega la camicia" and "Maria incrocia le braccia" respectively). IMAGACT e-learning platform aims to make these differences evident by creating a cross-linguistic ontology of action types, whose nodes consist of 3D scenes, each of which relates to one action type. In order to identify these types, contexts of use have been extracted from English and Italian spontaneous speech corpora for around 600 high frequency action verbs (for each language). All instances that refer to similar events (e.g., fold the shirt/ the blanket) are grouped under one single action type: each one of these types is then represented by a linguistic best example and a short video that represents simple actions (e.g. a man taking a glass from a table).The action types extracted for Italian and English are compared and merged into one cross-linguistic ontology of action. IMAGACT has provided an internet based annotation infrastructure to derive this information from corpora. The project is now completed for the Italian and English lexicon, data extraction for Chinese and Spanish is ongoing. Reference to prototypical imagery is crucial in order to bootstrap the learning process. By selecting the set of 3D scenes referred to by a verb in one language and viewing the type of activity represented therein learners can directly understand the range of applicability of each verb. Thanks to an easy interface, a user can access the English/Italian/Chinese lexicon by lemma or directly by 3D scenes. For example, searching for the verb "to turn",s/he will be presented with a number of scenes, showing the various action types associated to that verb.Clicking on a scene s/he or she will know how this type of action is referred to in other the languages}, KEYWORDS = {Ontology}, PAGES = {85-89}, URL = {https://publications.cnr.it/doc/287346}, PUBLISHER = {libreriauniversitaria. it (Limena, ITA)}, ISBN = {978-88-6292-423-8}, CONFERENCE_NAME = {International Conference "ICT for Language Learning", 6th edition}, CONFERENCE_PLACE = {Florence, Italy}, CONFERENCE_DATE = {14-15 november 2013}, BOOKTITLE = {Conference Proceedings. ICT for Language Learning}, EDITOR = {Pixel}, } @INPROCEEDINGS{MONTANI_2013_INPROCEEDINGS_MARBCMSTSBP_277794, AUTHOR = {Montani, C. and Andronico, P. and Raviolo, C. and Bozzi, A. and Codenotti, B. and Meghini, C. and Sommani, M. and Tarabella, L. and Scopigno, R. and Baraglia, R. and Perego, R.}, TITLE = {Il CNR dopo la CEP}, YEAR = {2013}, ABSTRACT = {A short history of some of the ICT issues developed in the Institutes of CNR in Pisa since the 60s and that, in the opinion of the authors, had its roots in the CEP (Pisa Electronic Computer).}, KEYWORDS = {Storia dell'Informatica, K. 2 HISTORY OF COMPUTING}, PAGES = {41-66}, URL = {https://publications.cnr.it/doc/277794}, ISBN = {978-88-6741-303-4}, CONFERENCE_NAME = {La CEP prima della CEP: storia dell'informatica. Atti}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {11-12 novembre 2011}, EDITOR = {Cignoni, G. A. and Gadducci, F.}, } @INPROCEEDINGS{PARDELLI_2013_INPROCEEDINGS_PGS_254484, AUTHOR = {Pardelli, G. and Goggi, S. and Sassi, M.}, TITLE = {Open Grey for Language Technology: a ride on the network}, YEAR = {2013}, ABSTRACT = {Sommario in IngleseThe aim of this paper is to introduce the Open Access movement for Natural Language Processing (NLP) by means of a wide range of open access Grey Literature documentation available on the web. In 2008 Robert Dale, in the last issue of volume 35 of Computational Linguistics said: "There are a number of definitions of the term 'open access' in circulation, but almost all share the key principle that scientific literature should be freely available for all to read, download, copy, distribute, and use (with appropriate attribution) without restriction". At first glance it might seem that the Open Access movement has gradually become more influential in the field of language technology by building repositories accessible through the network. Today's digital archives are niches of intellectual production spread by means of a wide range of documents (such as journal articles and proceedings) which, paradoxically, the search engines do not always reach. The use of inappropriate terms in the formulation of queries and the fragmentation of repositories in this area of investigation does not allow to retrieve information on a large scale. The full paper, after a first introductory section, will be organized in two sections: 1) the first dedicated to the methodology for searching and tracing open access resources and to the criteria for analyzing and selecting the online documentation; 2) the second devoted to a description of the state-of-the-art of Open Access Grey Literature material in a statistical and thematic scenario. As things stand, standardization of computational systems interconnected by links and tools of various nature allowing Internet users to easily retrieve the information that the web naturally makes available would then be essential. Topics: Sustainability, Public Accessible Resources, Product and Service enhancements, Open Access, Curation and Preservation}, KEYWORDS = {Open Access Movement. Natural Lanuage Processing}, PAGES = {161-165}, URL = {https://publications.cnr.it/doc/254484}, PUBLISHER = {TEXTRELEASE, GL PROGRAM \& CONFERENCE BUREAU (Amsterdam, NLD)}, ISBN = {978-90-77484-20-3}, CONFERENCE_NAME = {GL14 Fourteenth International Conference on Grey Literature. Tracking Innovation Through Grey Literature}, CONFERENCE_PLACE = {Roma, Italy (CNR)}, CONFERENCE_DATE = {29-30 November 2012}, EDITOR = {Farace, D. J. and Frantzen, J. and Greynet}, } @INPROCEEDINGS{RUSSO_2013_INPROCEEDINGS_RC_288005, AUTHOR = {Russo, I. and Caselli, T.}, TITLE = {Changeable Polarity of Verbs through Emotions' Attribution in Crowdsourcing Experiments}, YEAR = {2013}, ABSTRACT = {Sentiment analysis and emotion detection are tasks with common features but rarely related because they tend to categorize the objects of their studies according to different categories, i.e. positive, negative and neutral values in SA, and emotion labels such as "joy", "anger" etc. in emotion detection. In this paper we try to bridge this gap, reporting on three crowdsourcing experiments to collect speakers' intuitions on emotion(s) associated with events denoted by verbs and propose to set contextual polarity values on the basis of the selected emotions. In this way we suggest a methodology to handle connotational meanings of verbs that can help to refine automatic sentiment analysis on social media, where shared contents are often short reports on pleasant or unpleasant events and activities.}, KEYWORDS = {emotion attribution, connotations of verbs, empathy}, PAGES = {131-139}, URL = {http://ceur-ws.org/Vol-1096/paper9.pdf}, CONFERENCE_NAME = {First International Workshop on Emotion and Sentiment in Social and Expressive Media: approaches and perspectives from AI (ESSEM 2013) A workshop of the XIII International Conference of the Italian Association for Artificial Intelligence (AI*IA 2013)}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {3 dicembre}, BOOKTITLE = {Proceedings of the First International Workshop on Emotion and Sentiment in Social and Expressive Media: approaches and perspectives from AI (ESSEM 2013) A workshop of the XIII International Conference of the Italian Association for Artificial Intelligence (AI*IA 2013)}, EDITOR = {Battaglino and , C. and Bosco and , C. and Cambria and , E. and Damiano and , R. and Patti and , V. and Rosso and , P.}, } @INPROCEEDINGS{RUSSO_2013_INPROCEEDINGS_RDFKM_285373, AUTHOR = {Russo, I. and De Felice, I. and Frontini, F. and Khan, F. and Monachini, M.}, TITLE = {(Fore)seeing actions in objects. Acquiring distinctive affordances from language}, YEAR = {2013}, ABSTRACT = {In this paper we investigate if conceptual information concerning objects' affordances as possibilities for actions anchored to an object can be at least partially acquired through language. Considering verb-noun pairs as the linguistic realizations of relations between actions performed by an agent and objects we collect this information from the ImagAct dataset, a linguistic resource obtained from manual annotation of basic action verbs, and from a web corpus(itTenTen). The notion of affordance verb as the most distinctive verb in ImagAct enables a comparison with distributional data that reveal how lemmas ranking based on a semantic association measure that mirror that of affordances as the most distinctive actions an object can be involved in.}, PAGES = {151-161}, URL = {https://docs.google.com/viewer?a=v\&pid=sites\&srcid=ZGVmYXVsdGRvbWFpbnxubHBjczIwMTN8Z3g6MTI0ZGMzYWYwYmMxNjY1Mg}, CONFERENCE_NAME = {NLPCS 2013-10th International Workshop on Natural Language Processing and Cognitive Science}, CONFERENCE_PLACE = {Marseille}, CONFERENCE_DATE = {15-17/10/2013}, BOOKTITLE = {Proceedings of NLPCS 2013-10th International Workshop on Natural Language Processing and Cognitive Science}, EDITOR = {Sharp, B. and Zock, M.}, } @INPROCEEDINGS{RUSSO_2013_INPROCEEDINGS_RFDKM_287456, AUTHOR = {Russo, I. and Frontini, F. and De Felice, I. and Khan, F. and Monachini, M.}, TITLE = {Disambiguation of Basic Action Types through Nouns' Telic Qualia}, YEAR = {2013}, ABSTRACT = {Knowledge about semantic associations between words is effective to disambiguate word senses. The aim of this paper is to investigate the role and the relevance of telic information from SIMPLE in the disambiguation of basic action types of Italian HOLD verbs ( prendere, 'to take', raccogliere, 'to pick up', pigliare 'to grab' etc.). We propose an experiment to compare the results obtained with telic information from SIMPLE with basic co-occurrence information extracted from corpora (most salient verbs modifying nouns) classified in terms of general semantic classes to avoid data sparseness.}, PAGES = {70-75}, URL = {http://www.aclweb.org/anthology/W13-5410}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-937284-98-5}, CONFERENCE_NAME = {6th International Conference on Generative Approaches to the Lexicon Generative Lexicon and Distributional Semantics}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {24-25/09/2013}, BOOKTITLE = {Proceedings of the 6th International Conference on Generative Approaches to the Lexicon. Generative Lexicon and Distributional Semantics}, EDITOR = {Saurí, R. and Calzolari, N. and Huang, C. and Lenci, A. and Monachini, M. and Pustejovsky, J.}, } @INPROCEEDINGS{SASSI_2013_INPROCEEDINGS_SBP_254230, AUTHOR = {Sassi, M. and Biagioni, S. and Pardelli, G.}, TITLE = {A linguistic and gender approach to 1841 Tuscany population Census}, YEAR = {2013}, ABSTRACT = {The Census of 1841 in Tuscany was first official data registry which tried to describe Tuscan population as a whole on granducal basis. With the use of special ad hoc created forms all demographic and socioeconomic characteristics of families and single persons in "Granducato di Toscana" were described. These data of Census, now kept by the State Archive of Florence , supply a precious source for studies of all different aspects of the population and include following information: name, surname, age, gender, marital status, employment, religion, schoolarity, "social status". In the registrers for each community and parish a full account is given of homes, resident families, and composition of families including family servants. Each of those entities had a proper incremental code number. [Registers were generated and updated by priests, who at that time were only surely scholarised officers widespread on territory, that is why they are divided by parish, which is an administrative unit typical of canonic right, instead of quarters or "rioni" or "contrade" which instead had been long practiced in civil right. ] During early 80:s the research group of prof. Biagioli of Department of Modern History of Pisa University, charged the computational linguistic Institute of CNR with digitalization and the electronic processing of these data as well as of data from "Catasto" [public registry of buildings and land ownership] to enable statistical, demographical, historical, sociological and economic analysis . In this work the authors have used the only partially usable subset of data left of that work, concerning four communities in the province of Pisa i.e actual Bièntina, Càscina, Pontedera and San Giuliano Terme (at that time named "Baths of San Giuliano") and is more concerned with terminological and lexical issues a gender related analysis of work and craftmanships. Each of the four communities has its own peculiar profile. Work is developed in 5 points: a) Informatics retrieval of linguistic information from Tuscany of 1800 focused by the arts and craftmanships more in use in families of that time, b) gender division of works and craftmanships, c) observation of lexical disparity in the four communities and terminological curiosities of that historical period, d) actually no longer existing craftmanships, e) diacronic analysis of communities, where possible. In this scenery the authors will introduce the methodology they employed for data analysis. Tables and graphs will be used to better focus different moments and results of work. The authors give the English translation of the terms extracted from the Corpus (see Appendix Glossary).}, KEYWORDS = {1841 Tuscany Population Census, Terminology}, PAGES = {200-205}, URL = {https://publications.cnr.it/doc/254230}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {978-959-7174-22-6}, CONFERENCE_NAME = {XIII Simposio Internacional de Comunicación Social-Actulaizaciones en Comunicacion Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {21-25 Jan 2013}, EDITOR = {Ruiz Miyares, L. and Álvarez Silva, M. R. and Muñoz Alvarado, A.}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_SM_225750, AUTHOR = {Soria, C. and Mariani, J.}, TITLE = {Searching LTs for minority languages}, YEAR = {2013}, ABSTRACT = {Les Technologies de la Langue (TL) sont un instrument nécessaire pour toutes les langues, en particulier celles qui aspirent à conquérir un espace dans les dispositifs numériques. Les langues qui ne sont pas équipées de technologies sont sérieusement menacées d'extinction numérique dans le long terme. Le projet META-NET a évalué l'état actuel des TL pour 30 langues européennes. Mais qu'en est-il des langues régionales et minoritaires ? Très peu d'informations sont disponibles pour elles. Dans cet article, nous présentons d'abord les données disponibles dans la LRE Map. Nous plaidons ensuite en faveur d'une campagne donnant une image complète des TL existant pour les langues minoritaires et régionales d'Europe. Cela aidera les décideurs, les chercheurs et les développeurs à planifier une feuille de route pour doter toutes les langues des instruments nécessaires pour fonctionner comme des langues correctement équipées dans l'ère numérique. Un recensement des TL disponibles et nécessaires est ainsi proposé.}, KEYWORDS = {Lingue regionali e minoritarie, risorse linguistiche, tecnologie linguistiche}, PAGES = {235-247}, URL = {http://www.taln2013.org/actes/www/TALARE-2013/actes/talare-2013-long-005.pdf}, CONFERENCE_NAME = {TALARE 2013: Traitement Automatique des Langues Régionales de France et d'Europe}, CONFERENCE_PLACE = {Les Sables d'Olonne}, CONFERENCE_DATE = {17-21 giugno 2013}, BOOKTITLE = {Actes de TALARE 2013: Traitement Automatique des Langues Régionales de France et d'Europe}, EDITOR = {Morin, E. and Estève, Y.}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_SMZ_285446, AUTHOR = {Soria, C. and Mariani, J. and Zoli, C.}, TITLE = {Dwarfs sitting on the giants' shoulders-how LTs for regional and minority languages can benefit from piggybacking major languages}, YEAR = {2013}, ABSTRACT = {LTs are a necessary instrument for all languages, especially for those aiming at conquering a space over digital devices. Languages that are not equipped with LT seriously face digital extinction in the long run. Many challenges are to be faced to equip minority languages with LTs (from basic to advanced): the almost complete lack of knowledge about available resources and technologies, the substantial delay in development of basic technologies, the lack of cooperation among minority languages communities, the chronic shortage of funding (in particular for minority languages not officially recognized, yet often the most vital ones over the Internet) and the limited economic value placed over LTs for minority languages by the digital market rules. In this paper we suggest how these challenges can be overcome, and how coordinated and standardized cooperation among all interested stakeholders can lead to better knowledge and awareness of the breadth and depth of available technologies.}, PAGES = {73-79}, URL = {https://publications.cnr.it/doc/285446}, ISBN = {978-0-9560210-5-2}, CONFERENCE_NAME = {XVII FEL Conference}, CONFERENCE_PLACE = {Ottawa, Canada}, CONFERENCE_DATE = {01/10/2013-04/10/2013}, BOOKTITLE = {Proceedings of the XVII FEL Conference}, EDITOR = {Norris, M. J. and Anonby, E. and Junker, M. and Ostler, N. and Patrick, D.}, } @INPROCEEDINGS{VENTURI_2013_INPROCEEDINGS_V_340389, AUTHOR = {Venturi, G.}, TITLE = {Investigating legal language peculiarities across different types of Italian legal texts: an NLP-based approach}, YEAR = {2013}, ABSTRACT = {In this paper, the author carried out the linguistic profiling of a corpus of different types of Italian legal texts exemplifying different sub-varieties of Italian legal language by relying on a wide range of different linguistic features (lexical, morpho-syntactic and syntactic) automatically extracted from the output of a multi-level automatic linguistic analysis of texts. The devised comparative approach allowed investigating the linguistic variation i) between the considered corpus of legal texts and a corpus of newspaper articles representative of Italian ordinary language and ii) among the considered types of legal texts (legislative acts, administrative acts, the Italian Constitution and legal cases). Achieved results can provide the starting point to identify areas of lexical, morpho-syntactic and/or syntactic complexity within a legal text in order to assess its readability as well to perform a number of different computational forensic linguistics tasks.}, KEYWORDS = {Legal language analysis, linguistic profiling, legal genres}, PAGES = {1-19}, URL = {http://ler.letras.up.pt/uploads/ficheiros/13624.pdf}, ISBN = {978-989-8648-14-3}, CONFERENCE_NAME = {3rd European Conference of the International Association of Forensic Linguists}, CONFERENCE_PLACE = {Porto}, CONFERENCE_DATE = {15-18 ottobre 2012}, } @INPROCEEDINGS{ZOLI_2013_INPROCEEDINGS_ZSR_285441, AUTHOR = {Zoli, C. and Soria, C. and Randaccio, S.}, TITLE = {The status, corpus planning and speakers' attitudes on Romagnol (ISO 639-3: rgn)}, YEAR = {2013}, ABSTRACT = {Even if recognized by Ethnologue with a clear ISO code, Romagnol is still underestimated and often considered an Italian dialect. One of the variants of Gallo-Italic languages, it has a strong linguistic identity and is well determined as Abstandsprache with regards to Veneto and Marchigiano; less clear the boundaries towards west, where there is a dialectal continuum with Emilian (egl). With respect to Emilian, Romagnol is developing as Ausbausprache, being strong and clear the perception the community has of a Romagnol history, territory, ethnicity, separated from that of Emilia. However, it lacks of a relevant factor for its status: a written standard. Institute "Friedrich Schürr" is one of the most active associations in the field of language support, but it has to do a crucial step: from a amateurish and folkloric activism to a real effort for a shared recognition of the language. The creation of a standard spelling is the path to follow for public presence, but there is still big tension between the old and the new vision. The fragmentation of oral varieties of Romagnol has never been a communication barrier, but nowadays it is perceived as a problem in terms of a written standardization. The Italian equation that "the language is written as it is spoken" causes the misleading belief that every difference in pronunciation must be registered in writing. It is actually the opposite: after six years of age we do not read letter by letter but we register the entire word "photo-graphing" it. Therefore, it is not obviously necessary that the phonetics matches the spelling. But minority speakers often get confused between speaking and writing, and fear that the standardization of the language may harm their local dialect. A standardized spelling only makes sense for a written language. If there were, for example, a talk show in Romagna, the titles and explanatory signs would be in standard Romagnol, but the presenter and the guests would talk in their own dialects (as it happens in German Switzerland or in Norway). At the same time, the speakers who fear standardization, also reject the use of tools such as electronic instruments for spellchecking (according to the belief that everyone writes in his or her own way) and do not accept the creation of neologisms because they are alien to the traditional language these speakers learned as children. These attitudes contribute to relegate minority languages such as Romagnol to the status of dialects and prevent them to evolve and flourish. In our presentation we will briefly sketch the dialectal situation of Romagnol, the main standardization problems and the issues arisen in the last years among activists. We will try to demonstrate that standardization is not only necessary but also fundamental if we want to give minority languages such as Romagnol the same status and dignity of national languages.}, KEYWORDS = {orthography, standardization, minority language, regional language}, PAGES = {124-125}, URL = {http://icml14.uni-graz.at/etc/upload/ICML_XIV_programme.pdf}, CONFERENCE_NAME = {14th International Conference on Minority Languages (ICML XIV)}, CONFERENCE_PLACE = {Graz, Austria}, CONFERENCE_DATE = {11/09/2013-17/09/2014}, BOOKTITLE = {International Conference on Minority Languages XIV (ICML XIV)}, } @INPROCEEDINGS{BOSCHETTI_2013_INPROCEEDINGS_B_276322, AUTHOR = {Boschetti, F.}, TITLE = {An Integrated System for Generating and Correcting Polytonic Greek OCR: The Proof-reading Process}, YEAR = {2013}, URL = {http://www.digitalclassicist.org/wip/wip2013.html}, CONFERENCE_NAME = {Digital Classicist Seminars}, CONFERENCE_PLACE = {London}, CONFERENCE_DATE = {19/07/2013}, } @INPROCEEDINGS{BOSCHETTI_2013_INPROCEEDINGS_B_276333, AUTHOR = {Boschetti, F.}, TITLE = {Acquisizione e Creazione di Risorse Plurilingui per la Filologia Classica in Ambienti Collaborativi-Tre Casi d'Uso}, YEAR = {2013}, URL = {https://publications.cnr.it/doc/276333}, CONFERENCE_NAME = {II Convegno AIUCD 2013}, CONFERENCE_PLACE = {Padova}, CONFERENCE_DATE = {11 Dicembre 2013}, } @INPROCEEDINGS{BOSCHETTI_2013_INPROCEEDINGS_B_276335, AUTHOR = {Boschetti, F.}, TITLE = {The Proof-reading Process}, YEAR = {2013}, URL = {https://publications.cnr.it/doc/276335}, CONFERENCE_NAME = {Open Philology Seminar}, CONFERENCE_PLACE = {Leipzig}, CONFERENCE_DATE = {8 August 2013}, } @INPROCEEDINGS{BOSCHETTI_2013_INPROCEEDINGS_BBD_276332, AUTHOR = {Boschetti, F. and Bozzi, A. and Del Grosso, A. M.}, TITLE = {Library of components for the Computational Philological Domain dealing with TEI markup guidelines: CoPhiLib}, YEAR = {2013}, KEYWORDS = {collaborative philology}, PAGES = {160-162}, URL = {http://digilab2.let.uniroma1.it/teiconf2013/program/posters/abstracts-posters#C162}, CONFERENCE_NAME = {The Linked TEI: Text Encoding in the Web}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {3-4 October 2013}, BOOKTITLE = {The Linked TEI: Text Encoding in the Web Book of Abstracts, Abstracts of the TEI Conference and Members Meeting 2013}, EDITOR = {Ciotti, F. and Ciula, A.}, } @INPROCEEDINGS{DELGROSSO_2013_INPROCEEDINGS_DMPM_288072, AUTHOR = {Del Grosso, A. M. and Murano, F. and Pesini, L. and Marchi, S.}, TITLE = {A Web tool for philological research. An experiment on some Saussurean writings}, YEAR = {2013}, ABSTRACT = {The work describes a philological-computational tool developed by the Istituto di Linguistica Computazionale, CNR, Pisa to create a digital edition of Ferdinand de Saussure's unpublished manuscripts. Since the use of a digital edition and of the most modern computer technology allow a more in-depth research, the ILC is developing a set of digital tools to facilitate the research and to take advantage of both the documents and the related information by the scientific community.}, KEYWORDS = {digital philology, digital humaniteis, software enginnering}, URL = {https://publications.cnr.it/doc/288072}, CONFERENCE_NAME = {AIUCD2013}, CONFERENCE_PLACE = {Padua, Italy}, CONFERENCE_DATE = {11-12 December 2013}, } @INPROCEEDINGS{GOGGI_2013_INPROCEEDINGS_GPGB_277513, AUTHOR = {Goggi, S. and Pardelli, G. and Giannini, S. and Biagioni, S.}, TITLE = {Grey literature in European Commission projects}, YEAR = {2013}, ABSTRACT = {The latest recommendations issued by the European Commission go towards the revision of their policy on dissemination and preservation of scientific information in order to promote the access to the results of the community funded research by especially implementing the open access policy within 'Horizon 2020', the EU Framework Programme for Research and Innovation (2014-2020). The aim of the survey is to identify, measure and evaluate the usability and availability of grey literature provided by the European Commission projects web sites in order to verify whether this type of literature is compliant with EU recommendations.}, KEYWORDS = {Grey literature, EU Projects, A. 1 INTRODUCTORY AND SURVEY}, PAGES = {154-159}, URL = {https://publications.cnr.it/doc/277513}, ISBN = {978-90-77484-21-0}, CONFERENCE_NAME = {Fifteenth International Conference on Grey Literature. The Grey Audit: a field assessment in grey literature}, CONFERENCE_PLACE = {Bratislava, Slovak Republic}, CONFERENCE_DATE = {2-3 Dicembre 2013}, BOOKTITLE = {GL Program Books}, } @INPROCEEDINGS{LAM_2013_INPROCEEDINGS_LD_390563, AUTHOR = {Lamé, M. and Del Grosso, A. M.}, TITLE = {WE ARE ALL DISABLED!}, YEAR = {2013}, ABSTRACT = {This is not a technical proposition. This is a very humble web user's testimony from so­called enabled and disabled people: any human being is disabled when it comes to accessing digital information, as it requires a device ­ that is a computer ­ to be reasonably understandable, perceivable, operable and robust, unless one wants to print the binary code of a movie, such as La Jetée by Chris Marker as the artist David Guez recently did for his art exhibition at Centre Pompidou.}, KEYWORDS = {Digital Publishing, w3c, web}, URL = {https://www.w3.org/2012/12/global-publisher/statements-of-interest/21-WeAreAllDisabled.pdf}, CONFERENCE_NAME = {A W3C Workshop on Digital Publishing}, CONFERENCE_PLACE = {Centre Pompidou, Parigi}, CONFERENCE_DATE = {16-17/09/2013}, } @INPROCEEDINGS{LAM_2013_INPROCEEDINGS_LK_322774, AUTHOR = {Lamé, M. and Kossmann, P.}, TITLE = {From Paper Browser to Digital Edition of Inscriptions}, YEAR = {2013}, URL = {http://eer.hypotheses.org/posters}, CONFERENCE_NAME = {Text-Encoding Inititiative Roma}, CONFERENCE_PLACE = {Università La Sapienza, Rome}, CONFERENCE_DATE = {3-4 ottobre 2013}, } @INPROCEEDINGS{LAM_2013_INPROCEEDINGS_LK_322783, AUTHOR = {Lamé, M. and Kossmann, P.}, TITLE = {Pour une délivraison des inscriptions}, YEAR = {2013}, ABSTRACT = {Questo contributo, preparato con la Professoressa, archeologa ed epigrafista Perrine Kossmann, in carica dell'insegnamento di Storia della Grecia Antica all'Università di Borgogna, è il prolungamento più teorico dell'intervento metodologico fatto alla Text Encoding Initiative alla conferenza romana alcuni giorni prima. Si trattava di esporre i contrasti di modelizzazione tra costruire un sistema informatico basato sui modelli tradizionali e corpora cartacei (paper browser) e sistema informatici basati sul messaggio trasmesso dal medium epigrafico, che implica una comprensione globale del fenomeno epigrafico in quanto fenomeno testuale storico. Gli esempi esposti furono i testi delle iscrizioni della Grecia Antica e i testi delle Res Gestae Divi Augusti.}, URL = {https://publications.cnr.it/doc/322783}, CONFERENCE_NAME = {Les Humanités délivrées}, CONFERENCE_PLACE = {Université de Lausanne}, CONFERENCE_DATE = {2-3 ottobre 2013}, } @INPROCEEDINGS{MARINELLI_2013_INPROCEEDINGS_M_281778, AUTHOR = {Marinelli, R.}, TITLE = {Espressioni idiomatiche in un database di terminologia: codifica, relazioni, traduzione}, YEAR = {2013}, ABSTRACT = {In questa presentazione si parla di un database di terminologia marittima che dopo le fasi inziali della sua costruzione è stato ampliato e arricchito con immagini che permettono di visualizzare il termine preso in considerazione e successivamente con l'inserimento di espressioni idiomatiche e modi di dire che appartengono al dominio marittimo. Viene inoltre illustrata anche la problematica della traduzione in inglese, sia dei termini, sia delle espressioni idiomatiche stesse.}, KEYWORDS = {Database semantico lessicali, terminologia, espressioni idiomatiche}, URL = {https://publications.cnr.it/doc/281778}, CONFERENCE_NAME = {Meeting annuale del Gruppo Web Semantico}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {2013}, } @INPROCEEDINGS{MARZI_2013_INPROCEEDINGS_MD_288409, AUTHOR = {Marzi, C. and Daelemans, W.}, TITLE = {On memory and computation: a reappraisal of German noun plural inflection}, YEAR = {2013}, ABSTRACT = {The talk "On memory and computation: a reappraisal of German noun plural inflection" presented one of the main objectives on which the bilateral Italian-Belgian action is focussing, namely linguistic and extra-linguistic factors involved in mono- and bi-lingual word recognition. She suggests a multi-factorial view of morphology adaptive processing, in presenting a usage-based perspective, by investigating a few properties of the German noun plural system and focusing on the dynamic relation between regularity, productivity and competition of inflection patterns through computer simulations (Temporal Self-Organising Maps - TSOMs) of type/token-frequency effects. In detail, she highlighted a few formal properties of the -s plural class, and concluded that although relatively infrequent, -s plurals seem to pattern in fairly regular sub-classes which suffer from no competition by members of other inflectional classes.}, URL = {http://www.networds-esf.eu/index.php?page=3rd-networds-workshop}, CONFERENCE_NAME = {Third NetWordS Workshop on "Variation and Adaptation in Lexical Processing and Acquisition"}, CONFERENCE_PLACE = {Dubrovnik, Croatia}, CONFERENCE_DATE = {19-20/09/2013}, } @INPROCEEDINGS{MARZI_2013_INPROCEEDINGS_MFP_287555, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Lexical parsability and morphological structure}, YEAR = {2013}, ABSTRACT = {A classical tenet in the psycholinguistic literature on the mental lexicon is that a parsed affix presents high activation levels (and thus contributes to activation spreading to other words with the same affix), and that such levels are tightly correlated with the affix productivity. In a number of influential papers, it has been suggested that parsability criteria interact with frequency to define morphological productivity in the lexicon. For example, the frequency of a derivative (e.g. government) relative to its base (govern) is shown to be a good predictor for parsability/productivity. The higher the frequency ratio, the more likely the morphological structure to be perceived, and the associated affix to be used productively. The present contribution intends to offer a computational explanatory basis for this correlational evidence, and assess its applicability to the acquisition of complex inflectional paradigms. In those languages, like Italian and German, whose inflection is stem-based rather than word-based, there is often no single paradigmatic form which can act as a base by being properly contained in all other inflected variants. Yet, it seems intuitive to suggest that verbs that are inflected for one paradigm cell only (e.g. neighbouring), are learned earlier and more easily but exhibit lower levels of perceived inflectional structure than verbs with richer paradigms. This appears to be in good accord with experimental evidence of time latencies in lexical decision, which are shown to correlate negatively with token frequency, paradigm size and paradigm entropy. Our simulations, based on Temporal Self-Organizing Maps (TSOMs) allow us to establish an interesting connection between inflectional parsability, frequency-based paradigm structure, and acquisitional constraints on the interaction between the human processor and working memory. Self-organising topological models of the mental lexicon can mimic the spatial and temporal organization of memory structures supporting the processing of symbolic sequences [8-10], and can provide an interesting framework for testing integrative accounts of lexical processing/acquisition as the complex result of general-purpose operations on word stimuli (e.g. working memory, long-term storage, sensory-motor mapping, rehearsal, unit integration, unit analysis, executive control, time-series processing), in line with recent acquisitions on the neuro-functional architecture of the perisylvian language network in the left hemisphere of human brain. Simulations of the incremental acquisition of "mini-paradigms" (small islands of morphological contrast encompassing up to three different forms for the same verb support the hypothesis that perception of structure (parsability) and morphological productivity strongly correlate in the inflectional lexica of German and Italian. In particular, by monitoring longitudinal progress in storage and generalisation of differently distributed inflectional paradigms in the two languages, we show that: i) high-frequency forms are stored and accessed significantly earlier than low-frequency forms; ii) deeply entrenched but paradigmatically isolated forms tend to block usage of other forms in the same paradigm; iii) low-frequency evenly distributed (highly entropic) intra-paradigmatic forms are acquired later but are easily extended. Our investigation credits the proposed computational framework with psycholinguistic plausibility, and grounds parsability-based models of morphological productivity on a specific, explicit proposal of lexical architecture. This provides an explanatory basis for both psycholinguistic and linguistic accounts of morphological structure, and offers an intermediate framework for scientific inquiry bridging the gap between linguistic units and functional units in neurosciences. Finally, it makes the interesting suggestion that principles of morpheme-based organisation of the mental lexicon are compatible with a learning strategy requiring memorisation of full forms.}, KEYWORDS = {morphological structure, word paradigms, frequency, human processor}, PAGES = {33-34}, URL = {http://mmm9.ffzg.unizg.hr/wp-content/uploads/2012/10/MMM_PROGRAM4.pdf}, CONFERENCE_NAME = {9th Mediterannean Morphology Meeting on "Morphology and Semantics" (9th MMM)}, CONFERENCE_PLACE = {Dubrovnik, Croatia}, CONFERENCE_DATE = {15-18/09/2013}, BOOKTITLE = {Morphology and Semantics-Books of Abstracts}, } @INPROCEEDINGS{RUTA_2013_INPROCEEDINGS_RSTBCCBTFNMP_284784, AUTHOR = {Ruta, L. and Siracusano, R. and Tortorella, G. and Boncoddo, M. and Colombi, C. and Crifaci, G. and Billeci, L. and Tartarisco, G. and Ferro, M. and Narzisi, A. and Muratori, F. and Pioggia, G.}, TITLE = {The PRIMA-PIETRA Project: A Web-Based Platform for Early Autism Risk Assessment}, YEAR = {2013}, ABSTRACT = {It is well recognized that the best outcomes in autism spectrum disorders (ASD) are achieved through early diagnosis and early intervention. ASD symptoms may occur as early as 12-18 months and different instruments have been developed for early autism risk assessment under the age of 2 years. The Modified Checklist for Autism in Children (M-CHAT) is a developmental surveillance-screening instrument administered during 18- to 36-month well-child visits that was demonstrated to improve early identification of autism. Novel technologies can substantially contribute to improve early diagnosis in ASD, providing early screening risk assessment platforms, unobtrusive measurements of behaviors and physiological responses, as well as brain structure and connectivity, or other measurable stimulus-event experimental paradigms. The Prima Pietra Project based at the Pervasive Healthcare Center of the Institute of Clinical Physiology of the National Research Council of Italy (Consiglio Nazionale delle Ricerche, C.N.R.) and the AOU Polyclinic "G. Martino" in Messina developed and provided an early autism risk assessment web-based platform for pediatricians and physicians available on the internet.}, KEYWORDS = {early autism risk assessment}, URL = {https://imfar.confex.com/imfar/2013/webprogram/Paper14488.html}, CONFERENCE_NAME = {International Meeting for Autism Research 2013}, CONFERENCE_PLACE = {San Sebastian, Spain}, CONFERENCE_DATE = {2 May 2013}, BOOKTITLE = {International Meeting for Autism Research}, } @INPROCEEDINGS{SAGRI_2013_INPROCEEDINGS_SV_310522, AUTHOR = {Sagri, M. T. and Venturi, G.}, TITLE = {Exploring the use of neuroscience in the Italian courtrooms: the linguistic and lexico-semantic analysis of a corpus of Italian case law texts}, YEAR = {2013}, URL = {https://publications.cnr.it/doc/310522}, CONFERENCE_NAME = {International Seminar of Neuroethics}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {27/11/2013}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_S_285397, AUTHOR = {Soria, C.}, TITLE = {Preserving Digital Language Diversity}, YEAR = {2013}, URL = {https://publications.cnr.it/doc/285397}, CONFERENCE_NAME = {All-Russia methodological seminar on protection of minority languages of peoples of Siberia and the Russian Far East}, CONFERENCE_PLACE = {Biysk, Altai Krai, Russian Federation}, CONFERENCE_DATE = {05/11/2013-08/11/2013}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_S_285434, AUTHOR = {Soria, C.}, TITLE = {You can speak it now: assessing the effect of official recognition on vitality of minority languages}, YEAR = {2013}, ABSTRACT = {In 1999, an Italian controversial law granted official recognition to twelve regional and minority languages, but denied it to others such as Piedmontese, Venetan, Sicilian, Emilian, Romagnol, that nevertheless are rated as endangered by UNESCO and Ethnologue. This particular situation offers an ideal laboratory to assess the impact of language policies on protected languages and at the same time the effect of lack of official protection and recognition on languages denied of institutional support. In this research we have coupled official census data with the results of an extensive survey carried out among speakers of all endangered languages of Italy, recognised or not, to re-assess their vitality in terms of speakers' number, domains of use, intergenerational transmission and speakers' attitudes. After illustrating the methodology adopted for the survey, we will show how the law was largely ineffective in producing quantitatively significant changes, while had a sharp effect on speakers' attitudes. We will argue that official recognition generally improved self-esteem and generated a pride in the language that is severely lacking among speakers of other languages, while lack of top-down recognition can be a powerful element in reinforcing negative feelings and overestimation of the diffculties of language revitalization.}, KEYWORDS = {minority language, regional language, endangered language, institutional support, speakers' attitudes}, URL = {https://publications.cnr.it/doc/285434}, CONFERENCE_NAME = {Conference Language Endangerment: Language Policy and Planning}, CONFERENCE_PLACE = {Cambridge, UK}, CONFERENCE_DATE = {26/07/2013}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_S_317649, AUTHOR = {Soria, C.}, TITLE = {Salviamo l'italiano dall'estinzione digitale}, YEAR = {2013}, URL = {https://publications.cnr.it/doc/317649}, CONFERENCE_NAME = {Salone del Libro di Torino, Convegno "D'Annunzio innovatore"}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {17/05/2013}, } @INPROCEEDINGS{SORIA_2013_INPROCEEDINGS_SZR_285444, AUTHOR = {Soria, C. and Zoli, C. and Randaccio, S.}, TITLE = {Why the Internet should speak minority languages-and how. The role of Language Technologies for minority and contested languages}, YEAR = {2013}, ABSTRACT = {Many minority languages that are thriving to get a place in the digital space and are profiting of the new opportunities offered by the Internet and digital devices will seriously face digital extinction if they will not be supported by Language Technologies (Calzolari et al. 2012). LTs (spelling and grammar checkers, electronic dictionaries, localized interfaces, as well as search engines, language translators or information extraction tools) are a necessary instrument to secure usability of minority languages over the web (Soria et al. 2012, Krauwer 2003), thus ensuring those languages equal digital opportunities and raising their profile in the eyes of the younger, digitally-oriented generation. However, there are many challenges to be faced to equip minority languages with LTs (from basic to advanced): a substantial delay in development of basic technologies, a lack of cooperation among minority languages communities, a chronic shortage of funding (in particular for minority languages not officially recognized, yet often the most vital ones over the Internet) and the limited economic value placed over LTs for minority languages by the digital market rules. In this talk, on the basis of concrete examples and a survey about the digital use of minority languages of Italy, we will show how these challenges can be overcome and suggest a roadmap towards sustainable development of LTs for minority languages.}, URL = {https://publications.cnr.it/doc/285444}, CONFERENCE_NAME = {Conference Contested Languages in the Old World}, CONFERENCE_PLACE = {Bangor, UK}, CONFERENCE_DATE = {09/09/2013-10/09/2013}, } @TECHREPORT{BOSCHETTI_2013_TECHREPORT_B_276326, AUTHOR = {Boschetti, F.}, TITLE = {Acquisition of texts and development of linguistic tools for Greek, Latin and Italian corpora}, YEAR = {2013}, ABSTRACT = {This report illustrates the activities of Federico Boschetti, researcher at the ILC-CNR of Pisa, Italy, during the visit at the Perseus Project (Dec. 2012 Jun. 2013), aimed at the development of methods and tools for the localization in Italian of digital resources for the study of classics. After a short introduction that contextualizes the visit, the lines of investigation and development are described: a) the collaborative multilingual proofreader, b) the Ancient Greek WordNet linked to the Italian WordNets and c) the alignment of original texts to the Italian translations. The conclusion discusses the relations among the products of these activities.}, URL = {https://publications.cnr.it/doc/276326}, } @TECHREPORT{DELGROSSO_2013_TECHREPORT_DGP_354193, AUTHOR = {Del Grosso, A. M. and Giovannetti, E. and Piccini, S.}, TITLE = {Definizione del modello di filologia computazionale}, YEAR = {2013}, ABSTRACT = {Il documento illustra il lavoro di analisi condotto durante la prima fase del progetto Clavius on the Web. Nelle sezioni del documento si descrivono le modalità secondo le quali il modello generale della piattaforma di gestione dei testi verrà implementato partendo dalle esigenze degli studiosi dei manoscritti originali di Clavius digitalizzati e messi a raffronto con i file di trascrizione.}, KEYWORDS = {Clavius, Clavius on the web, Literary Computing, Computational Philology, Lexica, Latin, Semantic Web}, URL = {http://claviusontheweb.it/}, } @TECHREPORT{MARZI_2013_TECHREPORT_MG_287852, AUTHOR = {Marzi, C. and Giraudo, H.}, TITLE = {Perspectives on Synergy}, YEAR = {2013}, ABSTRACT = {The 2nd NetWordS Workshop, held on the 3rd and 4th of December 2012 in the Toulouse Research Area (Maison de la Recherche) of the French National Research Council (CNRS), brought together 27 participants (scholars, Post-Docs, PhD students) from various European countries. Ten speakers, experts of various scientific domain and with different theoretical inclinations, discussed cross-disciplinary Perspectives on Synergy, reflecting the interdisciplinarity and synergy fostered by NetWordS, the European Research Networking Programme on Word Structure. With these objectives in mind, the workshop gathered PhD students and junior research fellows who carried out interdisciplinary research under the NetWordS granting scheme for 2012 and more senior scholars who are currently involved in European or national initiatives geared towards scientific goals of interest to the NetWordS programme. A Round Table followed to concretely discuss project proposals of common interest to be submitted in the years 2013-2014, and to encourage the sharing of interdisciplinary cooperation efforts among NetWordS partners and other research teams}, KEYWORDS = {Mental lexicon, Interdisciplinary approach}, URL = {http://www.networds-esf.eu/index.php?page=2nd-networds-workshop}, } @TECHREPORT{MARZI_2013_TECHREPORT_MP_287848, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {NetWordS: the European Network on Word Structure (2011-2015) ESF RNP Mid-Term Report (2011-2013)}, YEAR = {2013}, ABSTRACT = {By networking experts of various research fields (including but not limited to Theoretical Linguistics, Cognition, Brain Sciences and Computing) and of different theoretical inclinations, NetWordS has set itself the fundamental goal of advancing the current awareness of theoretical, typological, psycholinguistic, computational and neurophysiological evidence on the structure and processing of words, with a view to promoting novel methods of research and assessment for grammar architecture and language physiology. The programme is pursued through knowledge sharing, dissemination and transfer, organised over a four year period, from May 2011 to April 2015. Thanks to its highly interdisciplinary profile, the programme promotes training of young scientists through short visits, exchange grants and Summer Schools. It encourages the novel integration of existing methodologies, sets common research priorities, and fosters virtual cross-disciplinary laboratories, partnerships and research infrastructures.}, KEYWORDS = {Mental lexicon, Interdisciplinary approach, word representation, word processing}, URL = {https://publications.cnr.it/doc/287848}, } @TECHREPORT{MARZI_2013_TECHREPORT_MR_287843, AUTHOR = {Marzi, C. and Raffaelli, I.}, TITLE = {Variation and Adaptation in Lexical Processing and Acquisition}, YEAR = {2013}, ABSTRACT = {Recent emphasis on language knowledge as an emergent dynamic system has drawn considerable attention to the role of time in the way speakers acquire and use their own language. There are at least three levels on which time matters. At the processing level, the interaction between processing and memory constraints, and in particular between short-term and long-term memory issues, is understood to shape the way we recode and organise time-bound sequences of linguistic signals. On an ontogenetic scale, the age of acquisition of language input data, and the duration of exposure (in the case of multilingual contexts) are known to interact with issues of cognitive maturation and brain plasticity, yielding different outcomes as a function of different time intervals. In this connection, also the distribution of input data in a particular linguistic environment (both in terms of word type and token frequency) is bound to have an impact on rate and speed of acquisition and on overall knowledge organisation. Finally, all previously mentioned time-effects conspire to make the language system change through usage and acquisition in passing from one generation to the ensuing one.}, KEYWORDS = {Mental lexicon, Lexical processing and acquisition, Interdisciplinary approach}, URL = {http://www.networds-esf.eu/index.php?page=3rd-networds-workshop}, } @TECHREPORT{PARDELLI_2013_TECHREPORT_P_242269, AUTHOR = {Pardelli, G.}, TITLE = {Un modello bibliografico saussuriano}, YEAR = {2013}, ABSTRACT = {Il modello bibliografico saussuriano, presentato il 21 settembre 2012 al Seminario Internazionale organizzato dall'Università di Firenze presso la Facoltà di Lettere a conclusione del progetto, ha le caratteristiche di un repertorio in quanto volto a una fonte primaria di conoscenza legata a uno specifico autore e a una sola tipologia documentaria. Ciascuna scheda è strutturata per contenere i formati digitali e per divenire strumento di estrazione informativa dai vari ambienti indicizzati. Nella sostanza trattasi di un database bibliografico al passo con l'era delle risorse documentarie cartacee che migrano progressivamente nei formati richiesti dalla rete telematica: una trasposizione verso il digitale resa possibile anche dai numerosi progetti di ricerca nazionali e comunitari di cui un esempio prezioso è il Progetto PRIN coordinato dal Professor Daniele Gambarara. Il Web si configura così come strumento di reference per recuperi veloci di materiale bibliografico. La descrizione dell'involucro tecnologico di questo particolare segmento della bibliografia saussuriana è il tema argomentale del presente scritto, mirato a una raccolta di informazioni da convogliare e frammentare in un centinaio di record aventi tutti eguale struttura e organizzati puntualmente in campi e sotto campi, al fine di soddisfare bisogni conoscitivi attraverso una maschera di interrogazione disponibile in Internet grazie ad un vocabolario controllato di termini in linguaggio naturale .}, KEYWORDS = {Saussure Ferdinand, Bibliografia}, PAGES = {1-9}, URL = {https://publications.cnr.it/doc/242269}, } @MISC{BOSCHETTI_2013_MISC_B_276330, AUTHOR = {Boschetti, F.}, TITLE = {Acquisition and Creation of Multilingual Resources for Classical Philology in Collaborative Environments: Three Use Cases}, YEAR = {2013}, URL = {https://publications.cnr.it/doc/276330}, } @ARTICLE{BARCA_2012_ARTICLE_BP_217409, AUTHOR = {Barca, L. and Pezzulo, G.}, TITLE = {Unfolding visual lexical decision in time}, YEAR = {2012}, ABSTRACT = {Visual lexical decision is a classical paradigm in psycholinguistics, and numerous studies have assessed the so-called "lexicality effect" (i.e., better performance with lexical than non-lexical stimuli). Far less is known about the dynamics of choice, because many studies measured overall reaction times, which are not informative about underlying processes. To unfold visual lexical decision in (over) time, we measured participants' hand movements toward one of two item alternatives by recording the streaming x,y coordinates of the computer mouse. Participants categorized four kinds of stimuli as "lexical" or "non-lexical:" high and low frequency words, pseudowords, and letter strings. Spatial attraction toward the opposite category was present for low frequency words and pseudowords. Increasing the ambiguity of the stimuli led to greater movement complexity and trajectory attraction to competitors, whereas no such effect was present for high frequency words and letter strings. Results fit well with dynamic models of perceptual decision-making, which describe the process as a competition between alternatives guided by the continuous accumulation of evidence. More broadly, our results point to a key role of statistical decision theory in studying linguistic processing in terms of dynamic and non-modular mechanisms.}, KEYWORDS = {Psycholinguistics, Time Measurement}, URL = {http://www.plosone.org/article/info:doi/10.1371/journal.pone.0035932}, VOLUME = {7}, DOI = {10.1371/journal.pone.0035932}, PUBLISHER = {Public Library of Science (San Francisco, CA, Stati Uniti d'America)}, ISSN = {1932-6203}, JOURNAL = {PloS one}, } @ARTICLE{BARCA_2012_ARTICLE_BP_218408, AUTHOR = {Barca, L. and Pezzulo, G.}, TITLE = {Written language processing in Hearing and Deaf}, YEAR = {2012}, ABSTRACT = {Visual lexical decision is a classical paradigm in psycholinguistics, and numerous studies have assessed the so-called ''lexicality effect'' (i.e., better performance with lexical than non-lexical stimuli). Far less is known about the dynamics of choice, because many studies measured overall reaction times, which are not informative about underlying processes. To unfold visual lexical decision in (over) time, we measured participants' hand movements toward one of two item alternatives by recording the streaming x,y coordinates of the computer mouse. Participants categorized four kinds of stimuli as 'lexical' or 'non-lexical': high and low frequency words, pseudowords, and letter strings. Spatial attraction toward the opposite category was present for low frequency words and pseudowords. Increasing the ambiguity of the stimuli led to greater movement complexity and trajectory attraction to competitors, whereas no such effect was present for high frequency words and letter strings. Results fit well with dynamic models of perceptual decision-making, which describe the process as a competition between alternatives guided by the continuous accumulation of evidence. More broadly, our results point to a key role of statistical decision theory in studying linguistic processing in terms of dynamic and non-modular mechanisms.}, KEYWORDS = {Visual lexical decision, Interactive Activation account, dynamic models of decision-making, kinematics}, PAGES = {e35932}, URL = {http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0035932}, VOLUME = {7}, PUBLISHER = {Public Library of Science (San Francisco, CA, Stati Uniti d'America)}, ISSN = {1932-6203}, JOURNAL = {PloS one}, } @ARTICLE{BARCA_2012_ARTICLE_BP_221505, AUTHOR = {Barca, L. and Pezzulo, G.}, TITLE = {Is visual lexical decision a dynamic and competitive process? no, if we look at reaction times. yes, if we study how it unfolds in time}, YEAR = {2012}, ABSTRACT = {Visual lexical decision is a classical paradigm in Psycholinguistic, and numerous studies have assessed a so-called "lexicality effect" (i.e., better performance with lexical over non-lexical stimuli). Far less is know relative to the dynamics of choice, as many studies measure overal reaction times which are not informative of the underlying processes. To unfold visual lexical decision in time, we measured participants' hand movements toward one of two items alternatives by recording the streaming x,y coordinates of the computer mouse. Participants categorized as 'lexical' or 'non-lexical' four kinds of stimuli: high and low frequency words, pseudowords, and letter strings. Spatial attraction toward the opposite category was present for low frequency words and pseudowords. Increasing stimuli ambiguity lead to enhcanced movements' complexity and trajectories' attraction to competitors, as no such effect was present for high frequency words and letter strings. Results fit well with dynamic models of perceptual decision-making describing the process as a competition between alternatives guided by the continuous accumulation of evidence, as well as with a recent neural model of visual word recognition that highlights the role of top-down influences and predictions on perceptual processes. More broadly, our results point to a key role of statistical decision theory to study linguistic processing in terms of dynamic and non-modular mechanisms. Finally, we discuss two aspects that make our set-up challenging for current dynamical models of decision-making: 1) not all information (e.g. ortographic, phonological and semantic) is available at the same time, therefore the accumulation process is nonstationary; 2) the choice is not completed at the action onset, but can be revised at any time during the movement.}, KEYWORDS = {Computational neuroscience, psycholinguistic, statistical decision theory}, URL = {http://www.frontiersin.org/10.3389/conf.fnins.2012.86.00001/event_abstract}, DOI = {10.3389/conf.fnins.2012.86.00001}, PUBLISHER = {Frontiers Research Foundation (Lausanne, Svizzera)}, ISSN = {1662-453X}, JOURNAL = {Frontiers in neuroscience (Online)}, } @ARTICLE{CHERSI_2012_ARTICLE_CP_217431, AUTHOR = {Chersi, F. and Pezzulo, G.}, TITLE = {Using hippocampal-striatal loops for spatial navigation and goal-directed decision-making}, YEAR = {2012}, ABSTRACT = {The hippocampus plays a central role in spatial representation, declarative and episodic memory. In this area, so-called place cells possess high spatial selectivity, firing preferentially when the individual is within a small area of the environment. Interestingly, it has been found in rats that these cells can be active also when the animal is outside the location or context of their corresponding place field producing so-called ''forward sweeps''. These typically occur at decision points during task execution and seem to be utilized, among other things, for the evaluation of potential alternative paths. Anticipatory firing is also found in the ventral striatum, a brain area that is strongly interconnected with the hippocampus and is known to encode value and reward. In this paper, we describe a biologically based computational model of the hippocampalventral striatum circuit that implements a goal-directed mechanism of choice, with the hippocampus primarily involved in the mental simulation of possible navigation paths and the ventral striatum involved in the evaluation of the associated reward expectancies. The model is validated in a navigation task in which a rat is placed in a complex maze with multiple rewarding sites. We show that the rat mentally activates place cells to simulate paths, estimate their value, and make decisions, implementing two essential processes of model-based reinforcement learning algorithms of choice: look-ahead prediction and the evaluation of predicted states.}, KEYWORDS = {Spatial navigation Mental simulation Hippocampal-striatal circuit Neural network Computational model}, PAGES = {125-129}, URL = {http://link.springer.com/content/pdf/10.1007%2Fs10339-012-0475-7}, VOLUME = {13}, DOI = {10.1007/s10339-012-0475-7}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {1612-4782}, JOURNAL = {Cognitive processing (Print)}, } @ARTICLE{HAYASHI_2012_ARTICLE_HSMSC_218777, AUTHOR = {Hayashi, Y. and Savas, B. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LMF-aware Web services for accessing semantic lexicons}, YEAR = {2012}, ABSTRACT = {This paper demonstrates that Wordnet-LMF, a version of ISO LMF, allows us to effectively design and implement Web services for accessing WordNettype semantic lexicons that conform to the REST Web service architecture. The implemented prototype service currently provides access to native wordnets as well as to a bilingual concept dictionary. This paper thus describes slight revisions that were made to the Wordnet-LMF specifications to model and accommodate a nonwordnet-native bilingual concept dictionary.}, KEYWORDS = {Lexical markup framework Semantic lexicons Wordnets Language services RESTful Web service design}, PAGES = {253-264}, URL = {http://link.springer.com/content/pdf/10.1007%2Fs10579-012-9181-4.pdf}, VOLUME = {46}, DOI = {10.1007/s10579-012-9181-4}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{IDE_2012_ARTICLE_IC_288725, AUTHOR = {Ide, N. and Calzolari, N.}, TITLE = {Editors' Note}, YEAR = {2012}, PAGES = {153-154}, URL = {http://biblioproxy.cnr.it:2107/static/pdf/806/art%253A10.1007%252Fs10579-012-9190-3.pdf?auth66=1417189348_cbc23cf949747940b72361b8a4a15351\&ext=.pdf}, VOLUME = {46}, DOI = {10.1007/s10579-012-9190-3}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{LAM_2012_ARTICLE_LVB_321606, AUTHOR = {Lamé, M. and Valchera, V. and Boschetti, F.}, TITLE = {Epigrafia digitale: paradigmi di rappresentazione per il trattamento digitale delle epigrafi}, YEAR = {2012}, ABSTRACT = {The article is divided into two parts: the first presents some theoretical and methodological aspects of traditional epigraphy, which can be the theoretical basis for digitization of entries. In particular, the attention is focused on the autopsy of the epigraph. The second part concerns the description of a systematic model for the digital representation of an epigraphic text, based on nine levels of independent analyses, according to the principles of the stand-off mark-up.}, KEYWORDS = {Digital Epigraphy, Digital Philology}, PAGES = {386-392}, URL = {https://publications.cnr.it/doc/321606}, VOLUME = {74}, PUBLISHER = {Fratelli Lega Editori (Faenza, Italia)}, ISSN = {0013-9572}, JOURNAL = {Epigraphica}, } @ARTICLE{LAM_2012_ARTICLE_LVB_217376, AUTHOR = {Lamé, M. and Valchera, V. and Boschetti, F.}, TITLE = {Epigrafia digitale. Paradigmi di rappresentazione per il trattamento digitale delle epigrafi}, YEAR = {2012}, ABSTRACT = {The article is divided into two parts: the first presents some theoretical and methodological aspects of traditional epigraphy, which can be the theoretical basis for digitization of entries. In particular, the attention is focused on the autopsy of the epigraph. The second part concerns the description of a systematic model for the digital representation of an epigraphic text, based on nine levels of independent analyses, according to the principles of the stand-off mark-up.}, KEYWORDS = {Digital Epigraphy Digital Philology}, PAGES = {331-338}, URL = {https://publications.cnr.it/doc/217376}, VOLUME = {74}, PUBLISHER = {Fratelli Lega Editori (Faenza, Italia)}, ISSN = {0013-9572}, JOURNAL = {Epigraphica}, } @ARTICLE{MARINELLI_2012_ARTICLE_MBMCCS_217369, AUTHOR = {Marinelli, R. and Bindi, R. and Marchi, S. and Castellani, E. and Carli, G. and Santarcangelo, E. L.}, TITLE = {Hypnotizability-related differences in written language}, YEAR = {2012}, ABSTRACT = {The study was aimed at analyzing the written production of subjects with high (Highs) and low (Lows) hypnotizability scores. The participants were asked to write short texts following highly imaginative titles in standard conditions. The texts were processed through computerized and manual methods. The results showed that the Highs' texts are more sophisticated owing to a higher number of abstract nouns, more intense and imaginative owing to a larger number of similes, metaphors and onomatopoeias, less detailed owing to a higher nouns-to adjectives ratio. The differences in the use of abstract nouns and highly imageable expressions are discussed in relation to the pre-eminent left hemisphere activity of highs during wakefulness and to a possibly different involvement of the precuneus which is involved in hypnotic phenomena.}, KEYWORDS = {Written language, text analysis, hypnosis, hypnotizability}, PAGES = {54-66}, URL = {https://publications.cnr.it/doc/217369}, VOLUME = {1}, DOI = {10.1080/00207144.2011.622196}, PUBLISHER = {Taylor \& Francis (Philadelphia, Stati Uniti d'America)}, ISSN = {0020-7144}, JOURNAL = {International journal of clinical and experimental hypnosis}, } @ARTICLE{MARZI_2012_ARTICLE_M_217342, AUTHOR = {Marzi, C.}, TITLE = {First 'NetWordS'Workshop on Understanding the Architecture of the Mental Lexicon: Integration of Existing Approaches}, YEAR = {2012}, ABSTRACT = {The ambitious goal of the workshop, organised within the framework of "NetWordS", the European Science Foundation Research Networking Programme on the Structure of Words in the languages of Europe, was to lay the foundations for an interdisciplinary European research agenda on the Mental Lexicon for the coming 10 years, with particular emphasis on three main challenges: . Lexicon and Rules in the grammar . Word knowledge and word use . Words and meanings}, KEYWORDS = {Mental lexicon}, PAGES = {52-52}, URL = {http://ercim-news.ercim.eu/en89/events/first-networds-workshop}, VOLUME = {89}, PUBLISHER = {ERCIM (Le Chesnay)}, ISSN = {0926-4981}, JOURNAL = {ERCIM news}, } @ARTICLE{MARZI_2012_ARTICLE_M_217353, AUTHOR = {Marzi, C.}, TITLE = {Knowledge communities in grey}, YEAR = {2012}, ABSTRACT = {The dynamic nature of modern human social interactions, and the increasing capability of wireless and mobile devices for creating and sharing contents, open up the opportunity for a wide dissemination of information through complex knowledge sharing systems. As the shared knowledge components build cognitive ties, there is no real sharing of knowledge without a common understanding of it. In this article, particular emphasis is laid on technologies in Natural Language understanding and knowledge management for providing structured, intelligent access to the continuously evolving content, generated on-line in a pervasive collaborative environment. In detail, robust automated techniques for term extraction and knowledge acquisition are used to tap the information density and the global coherence of text excerpts sampled from both general-purpose and subject-specific social networks. We show empirically that the two sources may exhibit considerable differences in terms of content accessibility and informativeness.}, KEYWORDS = {Grey Literature, Web Communities, Knowledge sharing, Concept Maps}, PAGES = {27-33}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84883276602\&origin=inward}, VOLUME = {8}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{MARZI_2012_ARTICLE_MFP_217399, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Word alignment and paradigm induction}, YEAR = {2012}, ABSTRACT = {The variety of morphological processes attested in inflectional system of average complexity calls for adaptive strategies of word alignment. Prefixation, suffixation, stem alternation and combinations thereof pose severe problems to unsupervised algorithms of morphology induction. The paper analyses morphological generalisation as a by-product of flexible memory self-organisation strategies for word recoding. Our model endorses the hypothesis that lexical forms are memorised as full units. At the same time, lexical units are paradigmatically organised. We show that the overall amount of redundant morphological structure emerging from paradigm-based self-organisation has a clear impact on generalisation. This supports the view that issues of word representation and issues of word processing are mutually implied in lexical acquisition.}, KEYWORDS = {Morphological Generalisation Morphological Paradigms Self-Organising Memory Word coding and Processing}, PAGES = {251-274}, URL = {http://www.rivisteweb.it/doi/10.1418/38789}, VOLUME = {XI}, DOI = {10.1418/38789}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{MARZI_2012_ARTICLE_MP_217391, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Understanding the Architecture of the Mental Lexicon}, YEAR = {2012}, ABSTRACT = {The present collection stems from the 1st NetWordS Workshop "Understanding the architecture of the mental lexicon: Integration of existing approaches", held in the Pisa Research Area of the Italian National Research Council, in November 2011. "NetWordS: the European network on Word Structure in the languages of Europe" is the Research Networking Programme of the European Science Foundation launched in May 2011 with the ambitious goal of paving the way to the European interdisciplinary research agenda on the Mental Lexicon, with particular emphasis on the following three main challenges: - lexicon and rules in the grammar, - word knowledge and word use, - words and meanings.}, KEYWORDS = {Mental Lexicon, interdisciplinary approach}, PAGES = {101-105}, URL = {https://publications.cnr.it/doc/217391}, VOLUME = {XI}, DOI = {10.1418/38780}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{PEZZULO_2012_ARTICLE_P_218767, AUTHOR = {Pezzulo, G.}, TITLE = {The interaction engine: a common pragmatic competence across linguistic and non-linguistic interactions}, YEAR = {2012}, ABSTRACT = {Recent research in cognitive psychology, neuro- science, and robotics has widely explored the tight relations between language and action systems in primates. However, the link between the pragmatics of linguistic and nonlinguistic inter- actions has received less attention up to now. In this paper, we argue that cognitive agents exploit the same cognitive processes and neural substrate-a general pragmatic competence-across linguistic and nonlinguistic interactive contexts. Elaborating on Levinson's idea of an "interaction engine" that permits to convey and recognize communicative intentions in both linguistic and nonlinguistic interactions, we offer a computationally guided analysis of pragmatic competence, suggesting that the core abilities required for successful linguistic interactions could derive from more primitive architectures for action control, nonlinguistic interactions, and joint actions. Furthermore, we make the case for a novel, embodied approach to human-robot interaction and communication, in which the ability to carry on face-to-face communication develops in coordination with the pragmatic competence required for joint action.}, KEYWORDS = {Computational modeling, Context, Engines, Joints, Planning}, PAGES = {105-123}, URL = {http://ieeexplore.ieee.org/xpl/login.jsp?reload=true\&tp=\&arnumber=6006515\&url=http%3A}, VOLUME = {4}, DOI = {10.1109/TAMD.2011.2166261}, PUBLISHER = {IEEE (Piscataway, NJ, Stati Uniti d'America)}, ISSN = {1943-0604}, JOURNAL = {IEEE transactions on autonomous mental development (Print)}, } @ARTICLE{PEZZULO_2012_ARTICLE_PBCFMS_218730, AUTHOR = {Pezzulo, G. and Barsalou, L. W. and Cangelosi, A. and Fischer, M. H. and McRae, K. and Spivey, M.}, TITLE = {Computational Grounded Cognition: A New Alliance between Grounded Cognition and Computational Modeling}, YEAR = {2012}, ABSTRACT = {Grounded theories assume that there is no central module for cognition. According to this view, all cognitive phenomena, including those considered the province of amodal cognition such as reasoning, numeric and language processing, are ultimately grounded in (and emerge from) a variety of bodily, affective, perceptual and motor processes. The development and expression of cognition is constrained by the embodiment of cognitive agents and various contextual factors (physical and social) in which they are immersed. The grounded framework has received numerous empirical confirmations. Still, there are very few explicit computational models that implement grounding in sensory, motor and affective processes as intrinsic to cognition, and demonstrate that grounded theories can mechanistically implement higher cognitive abilities. We propose a new alliance between grounded cognition and computational modeling towards a novel multidisciplinary enterprise: Computational Grounded Cognition. We clarify the defining features of this novel approach and emphasize the importance of using the methodology of Cognitive Robotics, which permits simultaneous consideration of multiple aspects of grounding, embodiment, and situatedness, showing how they constrain the development and expression of cognition.}, KEYWORDS = {Grounding, embodiment, situatedness, Cognitive Robotics, situated simulation}, PAGES = {612-613}, URL = {http://www.frontiersin.org/Psychology/10.3389/fpsyg.2012.00612/abstract}, VOLUME = {3}, DOI = {10.3389/fpsyg.2012.00612}, PUBLISHER = {Frontiers media (Lausanne, Svizzera)}, JOURNAL = {Frontiers in Psychology}, } @ARTICLE{PEZZULO_2012_ARTICLE_PO_217381, AUTHOR = {Pezzulo, G. and Ognibene, D.}, TITLE = {Proactive action preparation: Seeing action preparation as a continuous and proactive process}, YEAR = {2012}, ABSTRACT = {In this paper, we aim to elucidate the processes that occur during action preparation from both a conceptual and a computational point of view. We rst introduce the traditional, serial model of goaldirected action and discuss from a computational viewpoint its subprocesses occurring during the two phases of covert action preparation and overt motor control. Then, we discuss recent evidence indicating that these sub-processes are highly intertwined at representational and neural levels, which undermines the validity of the serial model and points instead to a parallel model of action speci cation and selection. Within the parallel view, we analyze the case of delayed choice, arguing that action preparation can be proactive, and preparatory processes can take place even before decisions are made. Speci cally, we discuss how prior knowledge and prospective abilities can be used to maximize utility even before deciding what to do. To support our view, we present a computational implementation of (an approximated version of) proactive action preparation, showing its advantages in a simulated tennis-like scenario.}, KEYWORDS = {action preparation, action execution, proactivity, prediction, internal model}, PAGES = {386-424}, URL = {https://publications.cnr.it/doc/217381}, VOLUME = {16}, PUBLISHER = {Human Kinetics (Champaign, IL, Stati Uniti d'America)}, ISSN = {1087-1640}, JOURNAL = {Motor control}, } @ARTICLE{RIGOLI_2012_ARTICLE_RPP_218745, AUTHOR = {Rigoli, F. and Pavone, E. F. and Pezzulo, G.}, TITLE = {Aversive pavlovian responses affect human instrumental motor performance}, YEAR = {2012}, ABSTRACT = {In neuroscience and psychology, an influential perspective distinguishes between two kinds of behavioral control: instrumental (habitual and goal-directed) and Pavlovian. Understanding the instrumental-Pavlovian interaction is fundamental for the comprehension of decision-making. Animal studies (as those using the negative auto-maintenance paradigm), have demonstrated that Pavlovian mechanisms can have maladaptive effects on instrumental performance. However, evidence for a similar effect in humans is scarce. In addition, the mechanisms modulating the impact of Pavlovian responses on instrumental performance are largely unknown, both in human and non-human animals. The present paper describes a behavioral experiment investigating the effects of Pavlovian conditioned responses on performance in humans, focusing on the aversive domain. Results showed that Pavlovian responses influenced human performance, and, similar to animal studies, could have maladaptive effects. In particular, Pavlovian responses either impaired or increased performance depending on modulator variables such as threat distance, task controllability, punishment history, amount of training, and explicit punishment expectancy. Overall, these findings help elucidating the computational mechanisms underlying the instrumental-Pavlovian interaction, which might be at the base of apparently irrational phenomena in economics, social behavior, and psychopathology.}, KEYWORDS = {Controllability, goal-directed, habitual, Pavlovian, reinforcement learning}, PAGES = {134-14}, URL = {http://www.frontiersin.org/Decision_Neuroscience/10.3389/fnins.2012.00134/abstract}, VOLUME = {6}, DOI = {10.3389/fnins.2012.00134}, PUBLISHER = {Frontiers Research Foundation (Lausanne, Svizzera)}, ISSN = {1662-453X}, JOURNAL = {Frontiers in neuroscience (Online)}, } @ARTICLE{RUSSO_2012_ARTICLE_RCR_218805, AUTHOR = {Russo, I. and Caselli, T. and Rubino, F.}, TITLE = {Recognizing deverbal events in context}, YEAR = {2012}, ABSTRACT = {Event detection is a key task in order to access informa- tion through content. This paper focuses on events realized by deverbal nouns in Italian. Deverbal nouns obtained through transpositional suf- fixes (such as -zione; -mento, -tura and -aggio) are commonly known as nouns of action, i.e. nouns which denote the process/action described by the corresponding verbs. However, this class of nouns is also known for a specific polysemous alternation: they may denote the result of the process/action of the corresponding verb. This paper describes a sta- tistically based analysis that helps to develop a classifier for automatic identification of deverbal nouns denoting events in context by exploit- ing rules obtained from syntagmatic and collocational cues identified by linguists.}, KEYWORDS = {Deverbal nouns Event detection}, PAGES = {91-103}, URL = {http://www.gelbukh.com/ijcla/2011-1-2/IJCLA-2011.pdf#page=91}, VOLUME = {2}, PUBLISHER = {Bahri Publications (New Delhi, India)}, ISSN = {0976-0962}, JOURNAL = {International journal of computational linguistics and applications}, } @ARTICLE{TARTARISCO_2012_ARTICLE_TBCRAFGP_196454, AUTHOR = {Tartarisco, G. and Baldus, G. and Corda, D. and Raso, R. and Arnao, A. and Ferro, M. and Gaggioli, A. and Pioggia, G.}, TITLE = {Personal Health System architecture for stress monitoring and support to clinical decisions}, YEAR = {2012}, ABSTRACT = {Developments in computational techniques including clinical decision support systems, information processing, wireless communication and data mining hold new premises in Personal Health Systems. Pervasive Healthcare system architecture finds today an effective application and represents in perspective a real technological breakthrough promoting a paradigm shift from diagnosis and treatment of patients based on symptoms to diagnosis and treatment based on risk assessment. Such architectures must be able to collect and manage a large quantity of data supporting the physicians in their decision process through a continuous pervasive remote monitoring model aimed to enhance the understanding of the dynamic disease evolution and personal risk. In this work an automatic simple, compact, wireless, personalized and cost efficient pervasive architecture for the evaluation of the stress state of individual subjects suitable for prolonged stress monitoring during normal activity is described. A novel integrated processing approach based on an autoregressive model, artificial neural networks and fuzzy logic modeling allows stress conditions to be automatically identified with a mobile setting analysing features of the electrocardiographic signals and human motion. The performances of the reported architecture were assessed in terms of classification of stress conditions. © 2011 Elsevier B.V. All rights reserved.}, KEYWORDS = {Autonomic sympathovagal balance Autoregressive model Clinical decision support system Pervasive healthcare architecture Stress detection}, PAGES = {1296-1305}, URL = {http://www.sciencedirect.com/science/article/pii/S0140366411003720}, VOLUME = {35}, DOI = {10.1016/j.comcom.2011.11.015}, PUBLISHER = {IPC Science and Technology Press (Guildford, Regno Unito)}, ISSN = {0140-3664}, JOURNAL = {Computer communications}, } @ARTICLE{TERRANOVA_2012_ARTICLE_TFCRBSP_209663, AUTHOR = {Terranova, G. and Ferro, M. and Carpeggiani, C. and Recchia, V. and Braga, L. and Semelka, R. C. and Picano, E.}, TITLE = {Low Quality and Lack of Clarity of Current Informed Consent Forms in Cardiology: How to Improve Them}, YEAR = {2012}, ABSTRACT = {Guidelines on informed consent for clinical practice exhort physicians to use standard plain language to enhance patient comprehension and facilitate shared decision making. The aim of this study was to assess and improve quality and readability of current informed consent forms used in cardiology. We evaluated the currently used informed consent forms, previously written in Italian and English, of 7 common imaging examinations, according to the recommendations of scientific societies. For each text, we also developed a revised informed consent form according to reference standards, including Federal Plain Language guidelines. Regarding readability scores, we analyzed each text (standard and revised) with Flesch-Kincaid (F-K) grade level (higher numbers indicating harder-to-read text) and the Italian language-tailored Gulpease level (from 0 [difficult] to 100 [easy]). Overall quality and readability was poor for both the original English and Italian versions, and readability was improved with the revised form, with higher readability evidenced by changes in both F-K grade level (standard 10.2 ± 2.37% vs. revised 6.5 ± 0.41%; p < 0.001) for English and Gulpease (standard 45.7 ± 2% vs. revised 84.09 ± 2.98%; p < 0.0001) for Italian. In conclusion, current informed consent forms are complex, incomplete, and unreadable for the average patient. Substantial quality improvement and higher readability scores can be achieved with revised forms that explicitly discuss risks and are prepared following standard recommendations of plain writing.}, KEYWORDS = {bioethics imaging risk communication informed consent patient rights}, PAGES = {649-655}, URL = {http://www.sciencedirect.com/science/article/pii/S1936878X1200321X}, VOLUME = {5}, DOI = {10.1016/j.jcmg.2012.03.007}, PUBLISHER = {ELSEVIER SCIENCE INC (NEW YORK, USA, Stati Uniti d'America)}, ISSN = {1936-878X}, JOURNAL = {JACC-CARDIOVASCULAR IMAGING}, } @ARTICLE{TORAL_2012_ARTICLE_TFMM_218786, AUTHOR = {Toral, A. and Ferrández, S. and Monachini, M. and Munoz, R.}, TITLE = {Web 2. 0, Language Resources and standards to automatically build a multilingual Named Entity Lexicon}, YEAR = {2012}, ABSTRACT = {This paper proposes to advance in the current state-of-the-art of automatic Language Resource (LR) building by taking into consideration three elements: (1) the knowledge available in existing LRs, (2) the vast amount of information available from the collaborative paradigm that has emerged from the Web 2.0 and (3) the use of standards to improve interoperability. We present a case study in which a set of LRs for different languages (WordNet for English and Spanish and Parole-Simple-Clips for Italian) are extended with Named Entities (NE) by exploiting Wikipedia and the aforementioned LRs. The practical result is a multilingual NE lexicon connected to these LRs and to two ontologies: SUMO and SIMPLE. Furthermore, the paper addresses an important problem which affects the Computational Linguistics area in the present, interoperability, by making use of the ISO LMF standard to encode this lexicon. The different steps of the procedure (mapping, disambiguation, extraction, NE identification and postprocessing) are comprehensively explained and evaluated. The resulting resource contains 974,567, 137,583 and 125,806 NEs for English, Spanish and Italian respectively. Finally, in order to check the usefulness of the constructed resource, we apply it into a stateof-the-art Question Answering system and evaluate its impact; the NE lexicon improves the system's accuracy by 28.1%. Compared to previous approaches to build NE repositories, the current proposal represents a step forward in terms of automation, language independence, amount of NEs acquired and richness of the information represented.}, KEYWORDS = {Language Resources Named Entities Web 2. 0 Standards}, PAGES = {383-419}, URL = {http://link.springer.com/content/pdf/10.1007%2Fs10579-011-9148-x.pdf}, VOLUME = {46}, DOI = {10.1007/s10579-011-9148-x}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @BOOK{CALZOLARI_2012_BOOK_CMSS_225736, AUTHOR = {Calzolari, N. and Magnini, B. and Speranza, M. and Soria, C.}, TITLE = {The Italian language in the digital age-La lingua italiana nell'era digitale}, YEAR = {2012}, ABSTRACT = {This white paper is part of a series that promotes knowledge about language technology and its potential. It addresses educators, journalists, politicians, language communities and others. The availability and use of language technology in Europe varies between languages. Consequently, the actions that are required to further support research and development of language technologies also differ for each language. The required actions depend on many factors, such as the complexity of a given language and the size of its community. META-NET, a Network of Excellence funded by the European Commission, has conducted an analysis of current language resources and technologies. This analysis focused on the 23 official European languages as well as other important national and regional languages in Europe. The results of this analysis suggest that there are many significant research gaps for each language. A more detailed expert analysis and assessment of the current situation will help maximise the impact of additional research and minimize any risks. META-NET consists of 54 research centres from 33 countries that are working with stakeholders from commercial businesses, government agencies, industry, research organisations, software companies, technology providers and European universities. Together, they are creating a common technology vision while developing a strategic research agenda that shows how language technology applications can address any research gaps by 2020.}, KEYWORDS = {digital extinction, tecnologie del linguaggio, lingua italiana}, PAGES = {69}, URL = {http://www.springer.com/computer/ai/book/978-3-642-30775-1}, DOI = {10.1007/978-3-642-30776-8}, PUBLISHED = {White Paper Series}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-3-642-30775-1}, EDITOR = {Rehm, G. and Uszkoreit, H.}, } @INCOLLECTION{CAVIONI_2012_INCOLLECTION_CCMRZ_264766, AUTHOR = {Cavioni, V. and Cutugno, P. and Marconi, L. and Renati, R. and Zanetti, M. A.}, TITLE = {Essere e apparire: le identità digitali costruite in rete}, YEAR = {2012}, ABSTRACT = {Identità, apprendimento e comunità sono parole chiavi attraverso cui è possibile capire la portata innovativa che i nuovi ambienti di comunicazione on line stanno apportando alla vita sociale, culturale e privata di coloro che entrano in rete. In quest'ottica, il volume analizza i processi di interazione on line, di identità digitali, di comunità virtuali, con particolare riferimento ad ambienti e strumenti tecnologici di diverso tipo: forum, blog, Facebook. L'accesso ad Internet permette agli individui l'ingresso in uno spazio, non solo digitale, ma anche psicologico. La possibilità di comunicare senza vincoli di tempo e di spazio attraverso molteplici modalità comunicative permette alle persone di sentirsi realmente parte di una comunità all'interno della quale ogni "mente" è collegata alle altre. L'identità online non è soltanto definita dalle caratteristiche auto-attribuite da un utente a sé stesso, ma essa risulta determinata dai feedback di risposta dagli altri utenti, i quali danno ulteriori significati ai contenuti online in risposta a comportamenti agiti nel cyberspazio. Nella rete, quindi, è possibile rinegoziare la propria struttura identitaria, attraverso continui processi di differenziazione/identificazione, riflessioni e narrazioni di Sé nella comunità virtuale. Il linguaggio, come sistema di simboli volontariamente prodotti e attraverso il quale si esprime e realizza la comunicazione all'interno di una determinata comunità o gruppo sociale, costituisce indubbiamente un elemento essenziale dei processi di costruzione dell'identità. E' interessante esplorare nello specifico le modalità di scrittura, in particolare i testi online prodotti dalle giovani generazioni al fine identificarne i tratti salienti integrando gli aspetti di analisi linguistica con quelli maggiormente di tipo psicologico. L'obiettivo è stato quello di esplorare le modalità di scrittura dei giovani, identificandone similarità e specificità con i modi tradizionali di comunicare. Nello specifico è stata condotta un'analisi relativa agli aspetti linguistici e psicologici propri della scrittura in rete prodotta nei blog.}, KEYWORDS = {conoscenza, linguistica, media education, didattica, formazione}, PAGES = {105-123}, URL = {https://publications.cnr.it/doc/264766}, VOLUME = {9788856845891}, PUBLISHER = {Franco Angeli Edizioni (Roma-Milano, ITA)}, ISBN = {9788856845891}, BOOKTITLE = {Identità, apprendimento e comunità virtuali. Strumenti e attività on line}, EDITOR = {Albanese, O. and Ligorio, M. B. and Zanetti, M. A.}, } @INCOLLECTION{DINDO_2012_INCOLLECTION_DLNPCT_218892, AUTHOR = {Dindo, H. and La Tona, G. and Nivel, H. and Pezzulo, G. and Chella, A. and Thorisson, K. R.}, TITLE = {Simulation and anticipation as tools for coordinating with the future}, YEAR = {2012}, ABSTRACT = {A key goal in designing an artificial intelligence capable of performing complex tasks is a mechanism that allows it to efficiently choose appropriate and relevant actions in a variety of situations and contexts. Nowhere is this more obvious than in the case of building a general intelligence, where the contextual choice and application of actions must be done in the presence of large numbers of alternatives, both subtly and obviously distinct from each other. We present a framework for action selection based on the concurrent activity of multiple forward and inverse models. A key characteristic of the proposed system is the use of simulation to choose an action: the system continuously simulates the external states of the world (proximal and distal) by internally emulating the activity of its sensors, adopting the same decision process as if it were actually operating in the world, and basing subsequent choice of action on the outcome of such simulations. The work is part of our larger effort to create new observation-based machine learning techniques. We describe our approach, an early implementation, and an evaluation in a classical AI problem-solving domain: the Sokoban puzzle.}, KEYWORDS = {Machine learning techniques}, PAGES = {117-125}, URL = {http://link.springer.com/content/pdf/10.1007%2F978-3-642-34274-5_24}, VOLUME = {196}, PUBLISHER = {Springer-Verlag (Berlin Heidelberg, DEU)}, BOOKTITLE = {Biologically Inspired Cognitive Architectures}, EDITOR = {Chella, A.}, } @INCOLLECTION{IOANNDIS_2012_INCOLLECTION_ITDFVTP_218873, AUTHOR = {Ioanndis, D. and Tzovaras, D. and Dalle Mura, G. and Ferro, M. and Valenza, G. and Tognetti, A. and Pioggia, G.}, TITLE = {Gait and Anthropometric Profile Biometrics: A Step Forward}, YEAR = {2012}, ABSTRACT = {While a sharp debate is emerging about whether conventional biometric technology offers society any significant advantages over other forms of identification, and whether it constitutes a threat to privacy, technology is rapidly progressing. Politicians and the public are still discussing fingerprinting and iris scan, while scientists and engineers are already testing futuristic solutions. Second generation biometrics - which include multimodal biometrics, behavioural biometrics, dynamic face recognition, EEG and ECG biometrics, remote iris recognition, and other, still more astonishing, applications - is a reality which promises to overturn any current ethical standard about human identification. Robots which recognise their masters, CCTV which detects intentions, voice responders which analyse emotions: these are only a few applications in progress to be developed.}, KEYWORDS = {biometrics sensing seat}, PAGES = {105-127}, URL = {http://www.springer.com/social+sciences/applied+ethics/book/978-94-007-3891-1}, VOLUME = {11}, DOI = {10.1007/978-94-007-3892-8_5}, PUBLISHER = {Springer Verlag (Norwell MA, USA)}, ISBN = {978-94-007-3891-1}, BOOKTITLE = {Second Generation Biometrics: The Ethical, Legal and Social Context}, EDITOR = {Mordini, E. and Tzovaras, D.}, } @INCOLLECTION{MARINELLI_2012_INCOLLECTION_MBMSCCC_136479, AUTHOR = {Marinelli, R. and Bindi, R. and Marchi, S. and Santarcangelo, E. L. and Cavallaro, F. and Castellani, E. and Carli, G.}, TITLE = {Suscettibilità ipnotica e linguaggio}, YEAR = {2012}, ABSTRACT = {-}, KEYWORDS = {ipnosi, linguaggio, nlp}, URL = {https://publications.cnr.it/doc/136479}, PUBLISHER = {Bulzoni (Roma, ITA)}, } @INCOLLECTION{MORGAVI_2012_INCOLLECTION_MMMC_186569, AUTHOR = {Morgavi, G. and Marconi, L. and Morando, M. and Cutugno, P.}, TITLE = {From human creative cognitive processes to adaptable artificial system design}, YEAR = {2012}, ABSTRACT = {In epigenetic robotics, a new research field, interdisciplinary theory and empirical evidences are used to inform adaptive robotic models, and, vice-versa, these models can be used as tools to make experimental predictions in developmental psychology. A truly autonomous robot should be capable of evolving and `growing up' through experience. Following different psychological points of view, growing up implies a knowledge creative process called: change of functional meaning; increased complexity; enlargement of the internal knowledge map; abstraction and insight. To understand this creative process, we organized an experiment with pre-school children diving with the abstraction process. The cognitive development of children of this age do not include the ability of abstraction, but they are able to explain the process they are thinking. Forty-two metaphoric sentences have been proposed to eight working-groups, of nine to ten children each, asking for their abstracted meanings. After a preliminary "brainstorming" phase , where the free creative associations were prevalent, we addressed children's attention towards the individuation of the metaphoric meaning. The process has been recorded and then we analyzed and classified the answers. Collective speech have been analyzed to compensate the individual differences. The children disclosure was mostly driven by their value system, their motivations and their emotions. They tried many different strategies to reach the abstract meaning, starting from their concrete knowledge and experiences. Each children followed a set of thinking paths that resulted in some very interesting suggestions for the architecture of an adaptive and evolving robot: i.e. the importance of multi-sensor perception, motivation and emotional drives are underlined and, the growing up insights shows similarities to emergent self-organized behaviours.}, KEYWORDS = {epigenetis robotics, growing up, abstraction}, PAGES = {133-145}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84892171708\&origin=inward}, DOI = {10.4324/9780203325988}, PUBLISHER = {Psychology Press (London, GBR)}, ISBN = {9780203325988}, BOOKTITLE = {Attention, Representation, and Human Performance: Integration of Cognition, Emotion, and Motivation}, EDITOR = {Slim, M. M. and Dai David, Y. and Abdelmajid, N.}, } @INCOLLECTION{PEZZULO_2012_INCOLLECTION_P_218924, AUTHOR = {Pezzulo, G.}, TITLE = {Re-founding cognitivism based on the cybernetic idea of goal-directed action}, YEAR = {2012}, KEYWORDS = {goals, prediction, cognition}, PAGES = {13-23}, URL = {https://publications.cnr.it/doc/218924}, PUBLISHER = {College Publications Ltd (London, GBR)}, BOOKTITLE = {The Goals of Cognition. Essays in honour of Cristiano Castelfranchi}, EDITOR = {Paglieri, F. and Tummolini, L. and Falcone, R. and Miceli, M.}, } @INCOLLECTION{PIRRELLI_2012_INCOLLECTION_PFC_136472, AUTHOR = {Pirrelli, V. and Ferro, M. and Calderone, B.}, TITLE = {Learning Paradigms in Time and Space: Computational Evidence from Romance Languages}, YEAR = {2012}, ABSTRACT = {In the linguistic literature, paradigms have enjoyed a hybrid status, half-way between entrenched patterns of lexical organization and processing structures enforcing global constraints on the output of traditional inflection rules. We describe here an original computational model of the mental lexicon where paradigmatic structures emerge through learning as the by-product of the endogenous dynamics of lexical memorization as competitive self-organization, based on the complementary principles of formal contrast (in space) and association biuniqueness (in time).}, KEYWORDS = {Computational model, Lexical memorization, Mental lexicon, Processing structures, Self-organizing maps}, PAGES = {135-157}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84921732430\&origin=inward}, DOI = {10.1093/acprof:oso/9780199589982.003.0008}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {978-0-19-958998-2}, BOOKTITLE = {Morphological Autonomy: Perspectives for Romance Inflectional Morphology}, EDITOR = {Maiden, M. and Smith, J. C. and Goldbach, M. and Hinzelin, M.}, } @EDITORIAL{DIRETTODASEGRE_2012_EDITORIAL_DAMS_221663, AUTHOR = {Diretto Da Segre, C. and A Cura Di Martignoni, C. and Morini, L. and Sassi, M.}, TITLE = {Rimario diacronico dell'Orlando Furioso}, YEAR = {2012}, ABSTRACT = {Il rimario dell'Orlando Furioso qui edito è diacronico perchè rappresenta sistematicamente i numerosi cambi di rimante fra le tre redazioni dell'Orlando Furioso, tutte e tre curate personalmente dall'autore (A, 1516, B, 1521; c, 1532). Se perciò un verso ha subito cambi di rimante esso è presente sotto i rimanti successivamente impiegati nelle tre redazioni. Il confronto è immediato in questo rimario diacronico, sia che si parta da un verso di A per arrivare alla forma assunta in B e poi in C, sia che si parta da C e si risalga a B e ad A. Gli sviluppi del contesto analizzato si possono riportare anche all'insieme del macrocontesto grazie all'acclusa Tavola comparativa delle tre edizioni originali ...}, KEYWORDS = {Orlando Furioso. Rimario Diacronico}, PAGES = {1-1702}, URL = {https://publications.cnr.it/doc/221663}, PUBLISHER = {Iuss Press (Pavia, ITA)}, ISBN = {9788861980686}, } @EDITORIAL{FRANCESCONI_2012_EDITORIAL_FMPW_330113, AUTHOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Wyner, A.}, TITLE = {Proceedings of the Fourth Workshop on Semantic Processing of Legal Texts (SPLeT 2012)}, YEAR = {2012}, KEYWORDS = {Trattamento Automatico del Linguaggio, Linguaggio Giuridico, Estrazione di Conoscenza}, PAGES = {71}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/workshops/27.LREC%202012%20Workshop%20Proceedings%20SPLeT.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-7-7}, } @EDITORIAL{MARZI_2012_EDITORIAL_MP_287395, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Understanding the Architecture of the Mental Lexicon}, YEAR = {2012}, ABSTRACT = {The present collection stems from the 1st NetWordS Workshop "Understanding the architecture of the mental lexicon: Integration of existing approaches", held in the Pisa Research Area of the Italian National Research Council, in November 2011. "NetWordS: the European network on Word Structure in the languages of Europe" is the Research Networking Programme of the European Science Foundation launched in May 2011 with the ambitious goal of paving the way to the European interdisciplinary research agenda on the Mental Lexicon, with particular emphasis on the following three main challenges: - lexicon and rules in the grammar, - word knowledge and word use, - words and meanings.}, KEYWORDS = {Mental Lexicon, interdisciplinary approach}, PAGES = {101-274}, URL = {https://publications.cnr.it/doc/287395}, VOLUME = {XI}, PUBLISHER = {Il Mulino (Bologna, ITA)}, ISBN = {978-88-15-23601-2}, } @INPROCEEDINGS{BIANCHI_2012_INPROCEEDINGS_BTG_282573, AUTHOR = {Bianchi, E. and Tavosanis, M. and Giovannetti, E.}, TITLE = {Creation of a bottom-up corpus-based ontology for Italian Linguistics}, YEAR = {2012}, ABSTRACT = {This paper describes the steps of construction of a shallow lexical ontology of Italian Linguistics in Italian, set to be used by a meta-search engine for query refinement. The ontology was constructed with the software Protege 4.0.2 and encoded in OWL format; its construction has been carried out following the steps described in the well-known Ontology Learning From Text (OLFT) layer cake. The starting point was the automatic term extraction from a corpus of web documents concerning the domain of interest (304,000 words); as regards corpus construction, we describe the main criteria of the web documents selection and its critical points, concerning the definition of user profile and of degrees of specialisation. We then describe the process of term validation and construction of a glossary of terms of Italian Linguistics; afterwards, we outline the identification of synonymic chains and the main criteria of ontology design: top classes of ontology are Concept (containing taxonomy of concepts) and Term (containing terms of the glossary as instances), while concepts are linked through part-whole and involved-role relation, both borrowed from Wordnet. Finally, we show some examples of the application of the ontology for query refinement.}, KEYWORDS = {Ontologies, Italian Linguistics, Query refinement}, PAGES = {2641-2647}, URL = {https://publications.cnr.it/doc/282573}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, CONFERENCE_NAME = {LREC 2012-Eight International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Istanbul}, CONFERENCE_DATE = {23-25 maggio 2012}, BOOKTITLE = {Language Resources and Evaluation}, } @INPROCEEDINGS{BONIN_2012_INPROCEEDINGS_BDMV_310580, AUTHOR = {Bonin, F. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Lessico settoriale e lessico comune dell'estrazione di terminologia specialistica da corpora di dominio}, YEAR = {2012}, PAGES = {207-220}, URL = {https://publications.cnr.it/doc/310580}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, ISBN = {978-88-7870-655-2}, CONFERENCE_NAME = {XLIV congresso internazionale di studi della società di linguistica italiana}, CONFERENCE_PLACE = {Viterbo}, CONFERENCE_DATE = {27-29 settembre 2010}, BOOKTITLE = {Lessico e Lessicologia. Atti del XLIV congresso internazionale di studi della società di linguistica italiana}, } @INPROCEEDINGS{BOSCO_2012_INPROCEEDINGS_BMS_330109, AUTHOR = {Bosco, C. and Montemagni, S. and Simi, M.}, TITLE = {Harmonization and Merging of two Italian Dependency Treebanks}, YEAR = {2012}, ABSTRACT = {The paper describes the methodology which is currently being defined for the construction of a "Merged Italian Dependency Treebank" (MIDT) starting from already existing resources. In particular, it reports the results of a case study carried out on two available dependency treebanks, i.e. TUT and ISST-TANL. The issues raised during the comparison of the annotation schemes underlying the two treebanks are discussed and investigated with a particular emphasis on the definition of a set of linguistic categories to be used as a "bridge" between the specific schemes. As an encoding format, the CoNLL de facto standard is used.}, KEYWORDS = {Syntactic Annotation, Merging of Resources, Dependency Parsing}, PAGES = {23-30}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/workshops/06.LREC%202012%20Merging%20Proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {LREC 2012 Workshop on Language Resource Merging}, CONFERENCE_PLACE = {Istambul}, CONFERENCE_DATE = {22 May 2012}, BOOKTITLE = {Proceedings of the LREC 2012 Workshop on Language Resource Merging}, EDITOR = {Bel, N.}, } @INPROCEEDINGS{BOZZI_2012_INPROCEEDINGS_B_221536, AUTHOR = {Bozzi, A.}, TITLE = {La filologia del testo assistita dal calcolatore}, YEAR = {2012}, ABSTRACT = {Il termine filologia del testo abbraccia molte attività che uno studioso compie nel momento in cui decida di affrontare lo studio di un'opra antica, moderna o contemporanea. Tali incombenze possono riguardare sia lo sviluppo del testo e la ricerca di tutti gli elementi interni ed esterni che ne mettono in luce la storia, sia la valutazione e la collocazione di quanto il teto veicola in relazione con le conoscenze che si hanno sul suo autore, se noto, sull'ambiente in cui il testo nasce, sugli eventuali rapporti con altre fonti precedenti o contemporanee...}, KEYWORDS = {Filologia computazionale}, PAGES = {219-229}, URL = {https://publications.cnr.it/doc/221536}, VOLUME = {10}, CONFERENCE_NAME = {Il diritto romano e le scienze antichistiche nell'era digitale}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {12-13 settembre 2011}, } @INPROCEEDINGS{CALDERONE_2012_INPROCEEDINGS_CP_288012, AUTHOR = {Calderone, B. and Pirrelli, V.}, TITLE = {Apprendimento morfologico, relazioni base-derivato e topologie paradigmatiche. Evidenze psico-computazionali a confronto}, YEAR = {2012}, ABSTRACT = {Il presente lavoro è volto a esplorare alcune dinamiche acquisizionali relative ai processi di maturazione della competenza morfologica in apprendenti bambini. In quest"ottica, sono riportate due differenti simulazioni computazionali dei processi di apprendimento della morfologia flessiva in Italiano e in Inglese. La prima simulazione, propria di un quadro connessionista classico, dà conto in modo inadeguato delle differenti scale temporali nell"apprendimento di alcune forme flesse verbali in inglese e italiano. La letteratura sull"argomento (Pizzuto \& Caselli 1992, Noccetti 2003) documenta in modo convergente una maggiore rapidità nell"apprendimento delle forme del presente indicativo da parte dei bambini italiani rispetto al ritmo di acquisizione delle forme verbali corrispondenti (la forma di base e la terza persona singolare in -s) da parte di bambini di madre lingua inglese. La seconda simulazione, basata su un modello di memorie associative ,,a cascata" addestrate tramite protocollo non-supervisionato, rende conto in maniera non banale del paradosso acquisizionale, confermato su base inter-linguistica da un recente studio di Dressler e colleghi (Bittner et al., 2003), secondo cui sistemi flessivi più complessi e completi sono appresi con maggiore facilità di sistemi flessivi più semplici ed estesamente sincretici.}, KEYWORDS = {Lessico Mentale, apprendimento morfologico, paradigmi flessionali}, PAGES = {17}, URL = {https://publications.cnr.it/doc/288012}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, CONFERENCE_NAME = {XLII Convegno della Società di Linguistica Italiana}, CONFERENCE_PLACE = {Scuola Normale Superiore, Pisa}, CONFERENCE_DATE = {25-27 settembre 2008}, BOOKTITLE = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società di Linguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, EDITOR = {Bambini, V. and Ricci, I. and Bertinetto, P. M.}, } @INPROCEEDINGS{CALZOLARI_2012_INPROCEEDINGS_CDFMRRS_220194, AUTHOR = {Calzolari, N. and Del Gratta, R. and Francopoulo, G. and Mariani, J. and Rubino, F. and Russo, I. and Soria, C.}, TITLE = {The LRE Map. Harmonising Community Descriptions of Resources}, YEAR = {2012}, ABSTRACT = {Accurate and reliable documentation of Language Resources is an undisputable need: documentation is the gateway to discovery of Language Resources, a necessary step towards promoting the data economy. Language resources that are not documented virtually do not exist: for this reason every initiative able to collect and harmonise metadata about resources represents a valuable opportunity for the NLP community. In this paper we describe the LRE Map, reporting statistics on resources associated with LREC2012 papers and providing comparisons with LREC2010 data. The LRE Map, jointly launched by FLaReNet and ELRA in conjunction with the LREC 2010 conference, is an instrument for enhancing availability of information about resources, either new or already existing ones, reinforcing and facilitating the use of standards in the community. The LRE Map web interface provides the possibility of searching according to a fixed set of metadata and to view the details of extracted resources. The LRE Map is continuing to collect bottom-up input about resources from authors of other conferences through standard submission process. This will help broadening the notion of "language resources" and attract to the field neighboring disciplines that so far have been only marginally involved by the standard notion of language resources.}, KEYWORDS = {Language resources, metadata, documentation}, PAGES = {1084-1089}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 May 2012}, BOOKTITLE = {Proceedings of LREC'12-The Eight International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, E. N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Idijk, J. and Piperidis, S.}, } @INPROCEEDINGS{CASELLI_2012_INPROCEEDINGS_CFQRR_287038, AUTHOR = {Caselli, T. and Frontini, F. and Quochi, V. and Rubino, F. and Russo, I.}, TITLE = {Flexible Acquisition of Subcategorization Frames in Italian}, YEAR = {2012}, ABSTRACT = {Lexica of predicate-argument structures constitute a useful tool for several tasks in NLP. This paper describes a web-service system for automatic acquisition of verb subcategorization frames (SCFs) from parsed data in Italian. The system acquires SCFs in an unsupervised manner. We created two gold standards for the evaluation of the system, the first by mixing together information from two lexica (one manually created and the second automatically acquired) and manual exploration of corpus data and the other annotating data extracted from a specialized corpus (environmental domain). Data filtering is accomplished by means of the maximum likelihood estimate (MLE). The evaluation phase has allowed us to identify the best empirical MLE threshold for the creation of a lexicon (P=0.653, R=0.557, F1=0.601). In addition to this, we assigned to the extracted entries of the lexicon a confidence score based on the relative frequency and evaluated the extractor on domain specific data. The confidence score will allow the final user to easily select the entries of the lexicon in terms of their reliability: one of the most interesting feature of this work is the possibility the final users have to customize the results of the SCF extractor, obtaining different SCF lexica in terms of size and accuracy.}, KEYWORDS = {lexicon, automatic acquisition, subcategorisation frames}, PAGES = {2842-2848}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/summaries/390.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {9782951740877}, CONFERENCE_NAME = {Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 Maggio 2012}, BOOKTITLE = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{CASELLI_2012_INPROCEEDINGS_CRR_220288, AUTHOR = {Caselli, T. and Russo, I. and Rubino, F.}, TITLE = {Assigning Connotation Values to Events}, YEAR = {2012}, ABSTRACT = {Sentiment Analysis (SA) and Opinion Mining (OM) have become a popular task in recent years in NLP with the development of language resources, corpora and annotation schemes. The possibility to discriminate between objective and subjective expressions contributes to the identification of a document's semantic orientation and to the detection of the opinions and sentiments expressed by the authors or attributed to other participants in the document. Subjectivity word sense disambiguation helps in this task, automatically determining which word senses in a corpus are being used subjectively and which are being used objectively. This paper reports on a methodology to assign in a semi-automatic way connotative values to eventive nouns usually labelled as neutral through syntagmatic patterns that express cause-effect relations between emotion cause events and emotion words. We have applied our method to nouns and we have been able reduce the number of OBJ polarity values associated to event noun.}, KEYWORDS = {cause emotion events, event connotative values, sentiment polarity}, PAGES = {3082-3089}, URL = {http://www.jotform.com/uploads/fabioaffeilc/30222975566357/225367778092788016/1111_Pa}, CONFERENCE_NAME = {Eight International Conference on Language Resources and Evaluation Proceedings (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {21-27 maggio 2012}, } @INPROCEEDINGS{CIGNONI_2012_INPROCEEDINGS_CM_219500, AUTHOR = {Cignoni, L. and Marinelli, R.}, TITLE = {LEARNING THE ROPES: A SOFTWARE TOOL FOR CLIL COURSES IN MARITIME SCHOOLS AND INSTITUTIONS}, YEAR = {2012}, ABSTRACT = {This paper outlines a proposal for maritime English language teaching in public and private Nautical Schools and other maritime educational institutions and establishments in Italy, using a content and language integrated learning (CLIL) approach. The courses are addressed in particular to those students who would like to take up a marine career as officers, engineers or other crew members of the Merchant Navy, and thus require an adequate knowledge of seafaring terminology, but can also be interesting for those wishing to explore the origins and development of maritime language. In order to provide a more challenging environment and better opportunity for the learning of seafaring terms and expressions in English, students are supported by Mariterm, a lexical database, organized in semantic relations, available at the Institute for Computational Linguistics (ILC) of the National Research Council (CNR) in Pisa. A}, KEYWORDS = {maritime terminology}, PAGES = {5279-5288}, URL = {https://publications.cnr.it/doc/219500}, CONFERENCE_NAME = {4th International Conference on Education and New Learning Technologies}, CONFERENCE_PLACE = {Barcellona}, CONFERENCE_DATE = {2-4 luglio 2012}, } @INPROCEEDINGS{DELGRATTA_2012_INPROCEEDINGS_DFMQRAL_223098, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Quochi, V. and Rubino, F. and Abrate, M. and Lo Duca, A.}, TITLE = {L-LEME: an Automatic Lexical Merger based on the LMF Standard}, YEAR = {2012}, ABSTRACT = {The present paper describes LMF LExical MErger (L-LEME), an architecture to combine two lexicons in order to obtain new resource(s). L-LEME relies on standards, thus exploiting the benefits of the ISO Lexical Markup Framework (LMF) to ensure interoperability. L-LEME is meant to be dynamic and heavily adaptable: it allows the users to configure it to meet their specific needs. The L-LEME architecture is composed of two main modules: the Mapper, which takes in input two lexicons A and B and a set of user-defined rules and instructions to guide the mapping process (Directives D) and gives in output all matching entries. The algorithm also calculates a cosine similarity score. The Builder takes in input the previous results, a set of Directives D1 and produces a new LMF lexicon C. The Directives allow the user to define its own building rules and different merging scenarios. L-LEME is applied to a specific concrete task within the PANACEA project, namely the merging of two Italian SubCategorization Frame (SCF) lexicons. The experiment is interesting in that A and B have different philosophies behind, being A built by human introspection and B automatically extracted. Ultimately, L-LEME has interesting repercussions in many language technology applications}, KEYWORDS = {LMF, Lexicon mapping, similarity score}, PAGES = {31-40}, URL = {https://publications.cnr.it/doc/223098}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC) 2012}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {2012}, BOOKTITLE = {Proceedings of the LREC 2012 Workshop on Language Resource Merging}, EDITOR = {Bel, N. and Gavrilidou, M. and Monachini, M. and Quochi, V. and Rimell, L.}, } @INPROCEEDINGS{DELGRATTA_2012_INPROCEEDINGS_DFRRC_220182, AUTHOR = {Del Gratta, R. and Frontini, F. and Rubino, F. and Russo, I. and Calzolari, N.}, TITLE = {The Language Library: supporting community effort for collective resource production}, YEAR = {2012}, ABSTRACT = {Relations among phenomena at different linguistic levels are at the essence of language properties but today we focus mostly on one specific linguistic layer at a time, without (having the possibility of) paying attention to the relations among the different layers. At the same time our efforts are too much scattered without much possibility of exploiting other people's achievements. To address the complexities hidden in multilayer interrelations even small amounts of processed data can be useful, improving the performance of complex systems. Exploiting the current trend towards sharing we want to initiate a collective movement that works towards creating synergies and harmonisation among different annotation efforts that are now dispersed. In this paper we present the general architecture of the Language Library, an initiative which is conceived as a facility for gathering and making available through simple functionalities the linguistic knowledge the field is able to produce, putting in place new ways of collaboration within the LRT community. In order to reach this goal, a first population round of the Language Library has started around a core of parallel/comparable texts that have been annotated by several contributors submitting a paper for LREC2012. The Language Library has also an ancillary aim related to language documentation and archiving and it is conceived as a theory-neutral space which allows for several language processing philosophies to coexist.}, KEYWORDS = {annotation, metadata, scientific crowdsourcing}, PAGES = {43-49}, URL = {https://publications.cnr.it/doc/220182}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 may 2012}, } @INPROCEEDINGS{DELLORLETTA_2012_INPROCEEDINGS_DMMPV_219489, AUTHOR = {Dell'Orletta, F. and Marchi, S. and Montemagni, S. and Plank, B. and Venturi, G.}, TITLE = {The SPLeT-2012 Shared Task on Dependency Parsing of Legal Texts}, YEAR = {2012}, ABSTRACT = {The 4th Workshop on "Semantic Processing of Legal Texts" (SPLeT-2012) presents the first multilingual shared task on Dependency Parsing of Legal Texts. In this paper, we define the general task and its internal organization into sub-tasks, describe the datasets and the domain-specific linguistic peculiarities characterizing them. We finally report the results achieved by the participating systems, describe the underlying approaches and provide a first analysis of the final test results.}, KEYWORDS = {Dependency Parsing, Domain Adaptation, Legal Text Processing}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/workshops/27.LREC%202012%20Workshop%20Proceedings%20SPLeT.pdf}, CONFERENCE_NAME = {Fourth Workshop on Semantic Processing of Legal Texts (SPLeT 2012)-First Shared Task on Dependency Parsing of Legal Texts (SPLeT 2012)}, CONFERENCE_PLACE = {Istanbul}, CONFERENCE_DATE = {27 Maggio 2012}, } @INPROCEEDINGS{DELLORLETTA_2012_INPROCEEDINGS_DMMVAF_219483, AUTHOR = {Dell'Orletta, F. and Marchi, S. and Montemagni, S. and Venturi, G. and Agnoloni, T. and Francesconi, E.}, TITLE = {Domain Adaptation for Dependency Parsing at Evalita 2011}, YEAR = {2012}, ABSTRACT = {The domain adaptation task was aimed at investigating techniques for adapting state-of-the-art dependency parsing systems to new domains. Both the language dealt with, i.e. Italian, and the target domain, namely the legal domain, represent two main novelties of the task organised at Evalita 2011. In this paper, we define the task and describe how the datasets were created from different resources. In addition, we characterize the different approaches of the participating systems, report the test results, and provide a first analysis of these results.}, KEYWORDS = {Dependency Parsing, Domain Adaptation, Legal Text Processing}, PAGES = {1-7}, URL = {http://www.evalita.it/sites/evalita.fbk.eu/files/working_notes2011/Domain_Adaptation/}, CONFERENCE_NAME = {Evaluation of NLP and Speech Tools for Italian (EVALITA 2011): Domain Adaptation track}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {24-25 Gennaio 2012}, } @INPROCEEDINGS{DELLORLETTA_2012_INPROCEEDINGS_DMV_278420, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Genre-oriented Readability Assessment: a Case Study}, YEAR = {2012}, PAGES = {91-98}, URL = {https://publications.cnr.it/doc/278420}, ISBN = {978-1-62748-389-6}, CONFERENCE_NAME = {Workshop on "Speech and Language Processing Tools in Education" (SLP-TED)}, CONFERENCE_PLACE = {Mumbai, India}, CONFERENCE_DATE = {15 December, 2012}, BOOKTITLE = {Proceedings of Workshop on "Speech and Language Processing Tools in Education" (SLP-TED)}, } @INPROCEEDINGS{FRONTINI_2012_INPROCEEDINGS_FABBMPPS_278677, AUTHOR = {Frontini, F. and Aliprandi, C. and Bacciu, C. and Bartolini, R. and Marchetti, A. and Parenti, E. and Piccinonno, F. and Soru, T.}, TITLE = {GLOSS, an infrastructure for the semantic annotation and mining of documents in the public security domain}, YEAR = {2012}, ABSTRACT = {Efficient access to information is crucial in the work of organizations that require decision taking in emergency situations. This paper gives an outline of GLOSS, an integrated system for the analysis and retrieval of data in the environmental and public security domain. We shall briefly present the GLOSS infrastructure and its use, and how semantic information of various kinds is integrated, annotated and made available to the final users.}, KEYWORDS = {semantic annotation, text mining, geographic data}, PAGES = {21-25}, URL = {https://publications.cnr.it/doc/278677}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {Eight International Conference on Language Resources and Evaluation. LREC'12. European Language Resources Association: France}, CONFERENCE_PLACE = {Istanbul}, CONFERENCE_DATE = {21-27/05/2012}, } @INPROCEEDINGS{FRONTINI_2012_INPROCEEDINGS_FQR_220785, AUTHOR = {Frontini, F. and Quochi, V. and Rubino, F.}, TITLE = {Automatic Creation of Quality Multi-Word Lexica from Noisy Text Data}, YEAR = {2012}, ABSTRACT = {This paper describes the design of a tool for the automatic creation of multi-word lexica that is deployed as a web service and runs on automatically web-crawled data within the framework of the PANACEA platform. The main purpose of our task is to provide a (computationally "light") tool that creates a full high quality lexical resource of multi-word items. Within the platform, this tool is typically inserted in a work flow whose first step is automatic web-crawling. Therefore, the input data of our lexical extractor is intrinsically noisy. The paper evaluates the capacity of the tool to deal with noisy data, and in particular with texts containing a significant amount of duplicated paragraphs. The accuracy of the extraction of multi-word expressions from the original crawled corpus is compared to the accuracy of the extraction from a later "de-duplicated" version of the corpus. The paper shows how our method can extract with sufficiently good precision also from the original, noisy crawled data. The output of our tool is a multi-word lexicon formatted and encoded in XML according to the Lexical Mark-up Framework.}, KEYWORDS = {Lexical induction, multi-word extraction, web-based distributed platform, noisy data}, URL = {http://www.kde.cs.tut.ac.jp/~aono/pdf/COLING2012/AND/pdf/AND04.pdf}, PUBLISHER = {ACM, Association for computing machinery (New York, USA)}, ISBN = {978-1-4503-1919-5}, CONFERENCE_NAME = {AND 2012}, CONFERENCE_PLACE = {Mumbai, India}, CONFERENCE_DATE = {December 9, 2012}, BOOKTITLE = {Proceedings of the Sixth Workshop on Analytics for Noisy Unstructured Text Data}, } @INPROCEEDINGS{GAVRILIDOU_2012_INPROCEEDINGS_GLDPPMFDFAM_219704, AUTHOR = {Gavrilidou, M. and Labropoulou, P. and Desipri, E. and Piperidis, S. and Papageorgiou, H. and Monachini, M. and Frontini, F. and Declerck, T. and Francopoulo, G. and Arranz, V. and Mapelli, V.}, TITLE = {The META-SHARE Metadata Schema for the Description of Language Resources}, YEAR = {2012}, ABSTRACT = {This paper presents a metadata model for the description of language resources proposed in the framework of the META-SHARE infrastructure, aiming to cover both datasets and tools/technologies used for their processing. It places the model in the overall framework of metadata models, describes the basic principles and features of the model, elaborates on the distinction between minimal and maximal versions thereof, briefly presents the integrated environment supporting the LRs description and search and retrieval processes and concludes with work to be done in the future for the improvement of the model.}, KEYWORDS = {metadata, META-SHARE, LRs description}, PAGES = {1090-1097}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 may 2012}, } @INPROCEEDINGS{GIRAUDO_2012_INPROCEEDINGS_GMP_84808, AUTHOR = {Giraudo, H. and Montermini, F. and Pirrelli, V.}, TITLE = {Processi cognitivi nell'analisi delle classi verbali dell'italiano: un approccio sperimentale}, YEAR = {2012}, ABSTRACT = {L'analisi della flessione, soprattutto verbale, nelle lingue romanze ha ricevuto un notevole impulso negli ultimi anni, in particolare dall'apporto alla ricerca in linguistica teorica di discipline come la psicolinguistica o le scienze cognitive. In questo articolo intendiamo riesaminare la ripartizione dei verbi italiani in classi, e osservare come la teoria morfologica e l'analisi sperimentale possano dare risultati convergenti e contribuire a mettere in luce i processi mentali che costituiscono la base della competenza morfologica dei parlanti (cf. Pirrelli 2007a; 2007b e, per un'illustrazione Bonami et al. 2008).}, KEYWORDS = {Morphology, Word Processing, Word Learning, Mental Lexicon, L1}, URL = {https://publications.cnr.it/doc/84808}, VOLUME = {2 (CD ROM)}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, ISBN = {978-88-7870-652-1}, CONFERENCE_NAME = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società diLinguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, CONFERENCE_PLACE = {Scuola Normale Superiore, Pisa}, CONFERENCE_DATE = {25-27 settembre 2008}, BOOKTITLE = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società di Linguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, EDITOR = {Bambini, V. and Ricci, I. and Bertinetto, P. M.}, } @INPROCEEDINGS{LENCI_2012_INPROCEEDINGS_LMVC_285544, AUTHOR = {Lenci, A. and Montemagni, S. and Venturi, G. and Cutrulla, M. R.}, TITLE = {Enriching the ISST-TANL Corpus with Semantic Frames}, YEAR = {2012}, ABSTRACT = {The paper describes the design and the results of a manual annotation methodology devoted to enrich the ISST-TANL Corpus with Semantic Frames information. The main issues encountered in applying the English FrameNet annotation criteria to a corpus of Italian language are discussed together with the choice of anchoring the semantic annotation layer to the underlying dependency syntactic structure. We also describe an experiment to measure inter-annotator agreement and a first case study to extend and specialise FrameNet annotation to a corpus of legislative texts.}, KEYWORDS = {Semantic annotation, FrameNet, Multi-layer annotated corpus}, PAGES = {3719-3726}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/986_Paper.pdf}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 May 2012}, BOOKTITLE = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{MANCA_2012_INPROCEEDINGS_MSMTB_219614, AUTHOR = {Manca, M. and Spinazzè, L. and Mastandrea, P. and Tessarolo, L. and Boschetti, F.}, TITLE = {Musisque Deoque: Text Retrieval on Critical Editions}, YEAR = {2012}, ABSTRACT = {This paper aims at illustrating the main features of the Musisque Deoque Project, which provides a fully freely searchable archive of Latin poetry equipped with critical apparatus. The first part explains how variants are mapped on the reference edition and the second part illustrates the web interface to retrieve sequences of words taking into account possible variants.}, KEYWORDS = {Computational Philology}, PAGES = {127-138}, URL = {https://publications.cnr.it/doc/219614}, VOLUME = {26}, PUBLISHER = {Gesellschaft für Sprachtechnologie und Computerlinguistik (Germania, Germania)}, ISSN = {0175-1336}, CONFERENCE_NAME = {Workshop on Annotation of Corpora for Research in the Humanities (ACRH)}, CONFERENCE_PLACE = {Heidelberg}, CONFERENCE_DATE = {5 gennaio 2012}, BOOKTITLE = {Journal for Language Technology and Computational Linguistics (JLCL)}, } @INPROCEEDINGS{MARINELLI_2012_INPROCEEDINGS_MC_220230, AUTHOR = {Marinelli, R. and Cignoni, L.}, TITLE = {In the same boat and other idiomatic seafaring expressions}, YEAR = {2012}, ABSTRACT = {This paper reports on a research carried out at the Institute for Computational Linguistics (ILC) on a set of idiomatic nautical expressions in Italian and English. A total of 200 Italian expressions were first selected and examined, using both monolingual and bilingual dictionaries, as well as specific lexicographical works dealing with the subject of idiomaticity, especially of the maritime type, and a similar undertaking was then conducted for the English expressions. We discuss the possibility of including both the Italian and English idiomatic expressions in the semantic database Mariterm, which contains terms belonging to the maritime domain. We describe the terminological database and the way in which the idiomatic expressions can be organised within the system, so that, similarly to the other synsets, they are connected to other concepts represented in the database, but at the same time continue to belong to a group of particular linguistic expressions. Furthermore, we study similarities and differences in meaning and usage of some idiomatic expressions in the two languages.}, KEYWORDS = {Terminology, Semantic databases, Idiomatic expressions}, PAGES = {627-631}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, CONFERENCE_NAME = {Eight International Conference on Language Resources and Evaluation (LREC 2012)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 maggio 2012}, } @INPROCEEDINGS{MARINELLI_2012_INPROCEEDINGS_MC_220218, AUTHOR = {Marinelli, R. and Cignoni, L.}, TITLE = {Enriching a Terminological Database with a Set of Idiomatic Expressions}, YEAR = {2012}, ABSTRACT = {The research described here is aimed at enriching the terminological database Mariterm with a set of idiomatic expressions related to the nautical field. The database is available at the Institute for Computational Linguistics (ILC) of the National Research Council (CNR) in Pisa (Italy), and contains semantic information for around 3500 Italian terms belonging to the maritime domain. Each Italian term is linked to other terms by means of semantic "internal relations" and is also connected to the equivalent synonyms in English. We relate on the methodology designed to expand the database, increase the lexical resource with explanations on the origins of the most common expressions, and study the similarities and differences in meaning and usage of some idiomatic expressions in the two languages. The possibility of using this linguistic resource also for didactic purposes in public and private schools is considered.}, KEYWORDS = {Lexical semantic databases, terminology, idiomatic expressions}, PAGES = {690-696}, URL = {https://publications.cnr.it/doc/220218}, CONFERENCE_NAME = {4th International Conference on Education and New Learning Technologies}, CONFERENCE_PLACE = {Barcellona}, CONFERENCE_DATE = {2-4 luglio 2012}, } @INPROCEEDINGS{MARZI_2012_INPROCEEDINGS_M_218950, AUTHOR = {Marzi, C.}, TITLE = {Knowledge communities in grey}, YEAR = {2012}, ABSTRACT = {The dynamic nature of modern human social interactions, and the increasing capability of wireless and mobile devices for creating and sharing contents, open up the opportunity for a wide dissemination of information through complex knowledge sharing systems. As the shared knowledge components build cognitive ties, there is no real sharing of knowledge without a common understanding of it. In this article, particular emphasis is laid on technologies in Natural Language understanding and knowledge management for providing structured, intelligent access to the continuously evolving content, generated on-line in a pervasive collaborative environment. In detail, robust automated techniques for term extraction and knowledge acquisition are used to tap the information density and the global coherence of text excerpts sampled from both general-purpose and subject-specific social networks. We show empirically that the two sources may exhibit considerable differences in terms of content accessibility and informativeness. Topics: Subject based Communities; Social Networking.}, KEYWORDS = {Concept maps, Grey literature, Knowledge sharing, Web communities}, PAGES = {34-40}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84924107682\&origin=inward}, VOLUME = {13}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISSN = {1386-2316}, ISBN = {9789077484173}, CONFERENCE_NAME = {Thirteenth International Conference on Grey Literature: The Grey Circuit-From Social Networking to Wealth Creation (GL13)}, CONFERENCE_PLACE = {Washington D. C.-USA}, CONFERENCE_DATE = {05-06/12 2011}, BOOKTITLE = {The Grey Circuit-From Social Networking to Wealth Creation}, EDITOR = {Farace, D. J. and Fratzen, J.}, } @INPROCEEDINGS{MARZI_2012_INPROCEEDINGS_MFCP_287129, AUTHOR = {Marzi, C. and Ferro, M. and Caudai, C. and Pirrelli, V.}, TITLE = {Evaluating Hebbian Self-Organizing Memories for Lexical Representation and Access}, YEAR = {2012}, ABSTRACT = {The lexicon is the store of words in long-term memory. Any attempt at modelling lexical competence must take issues of string storage seriously. In the present contribution, we discuss a few desiderata that any biologically-inspired computational model of the mental lexicon has to meet, and detail a multi-task evaluation protocol for their assessment. The proposed protocol is applied to a novel computational architecture for lexical storage and acquisition, the "Topological Temporal Hebbian SOMs" (T2HSOMs), which are grids of topologically organised memory nodes with dedicated sensitivity to time-bound sequences of letters. These maps can provide a rigorous and testable conceptual framework within which to provide a comprehensive, multi-task protocol for testing the performance of Hebbian self-organising memories, and a comprehensive picture of the complex dynamics between lexical processing and the acquisition of morphological structure.}, KEYWORDS = {Mental Lexicon, Morphology Acquisition, Self-Organizing Maps}, PAGES = {886-893}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {8th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25/05/2012}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Uğur Doğan, M. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{MARZI_2012_INPROCEEDINGS_MFP_219553, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Prediction and Generalisation in Word Processing and Storage}, YEAR = {2012}, ABSTRACT = {Word storage and processing have traditionally been modelled according to different computational paradigms, in line with the classical corner-stone of "dual-route" models of word structure assuming a sharp dissociation between memory and computation (Clahsen 1999, Di Sciullo \& Williams 1987, Pinker \& Prince 1988, Parasada \& Pinker 1993). Even the most radical alternative to dual-route thinking, connectionist one-route models, challenged the lexicon-grammar dualism only by providing a neurally-inspired mirror image of classical base-to-inflection rules, while largely neglecting issues of lexical storage (Rumelhart \& McClelland 1986, McClelland \& Patterson 2002, Seidenberg \& McClelland 1989). Recent psycho- and neuro-linguistic evidence, however, supports a less deterministic and modular view of the interaction between stored word knowledge and on-line processing [Baayen et al. 1997, Hay 2001, Maratsos 2000, Stemberger \& Middleton 2003, Tabak et al. 2005, Ford et al. 2003, Post et al. 2008). The view entails simultaneous activation of distributed patterns of cortical connectivity encoding redundant distributional regularities in language data. Furthermore, recent developments in morphological theorising question the primacy of grammar rules over lexical storage, arguing that word regularities emerge from independent principles of lexical organisation, whereby lexical units and constructions are redundantly stored and mutually related through entailment relations (Matthews 1991, Corbett \& Fraser 1993, Pirrelli 2000, Burzio 2004, Booij 2010). We endorse here such a non modular view on Morphology to investigate two basic behavioural aspects of human word processing: morphological prediction and generalisation. The investigation is based on a computer model of morphology acquisition supporting the hypothesis that they both derive from a common pool of principles of lexical organisation.}, KEYWORDS = {Morphological generalisation, Word processing, Self-organising memory}, PAGES = {114-131}, URL = {http://mmm.lingue.unibo.it/}, CONFERENCE_NAME = {Eighth Mediterranean Morphology Meeting on "Morphology and the architecture of the grammar" (MMM8)}, CONFERENCE_PLACE = {Cagliari, Italy}, CONFERENCE_DATE = {14-17 September 2011}, EDITOR = {Ralli, A. and Booij, G. and Scalise, S. and Karasimos, A.}, } @INPROCEEDINGS{MONACHINI_2012_INPROCEEDINGS_MFDRKGP_220211, AUTHOR = {Monachini, M. and Frontini, F. and De Felice, I. and Russo, I. and Khan, F. and Gagliardi, G. and Panunzi, A.}, TITLE = {Verb interpretation for basic action types: annotation, ontology induction and creation of prototypical scenes}, YEAR = {2012}, ABSTRACT = {In the last 20 years dictionaries and lexicographic resources such as WordNet have started to be enriched with multimodal content. Short videos depicting basic actions support the user's need (especially in second language acquisition) to fully understand the range of applicability of verbs. The IMAGACT project has among its results a repository of action verbs ontologically organised around prototypical action scenes in the form of both video recordings and 3D animations. The creation of the IMAGACT ontology, which consists in deriving action types from corpus instances of action verbs, intra and cross linguistically validating them and producing the prototypical scenes thereof, is the preliminary step for the creation of a resouce that users can browse by verb, learning how to match different action prototypes with the correct verbs in the target language. The mapping of IMAGACT types onto WordNet synsets allows for a mutual enrichment of both resources.}, KEYWORDS = {ontology of actions, lexical resource, 3D animations}, PAGES = {69-80}, URL = {https://publications.cnr.it/doc/220211}, CONFERENCE_NAME = {COLING 2012-3rd Workshop on Cognitive Aspects of the Lexicon (CogALex-III)}, CONFERENCE_PLACE = {Mumbai, India}, CONFERENCE_DATE = {15 Dicembre 2012}, } @INPROCEEDINGS{MONEGLIA_2012_INPROCEEDINGS_MGPFRM_220262, AUTHOR = {Moneglia, M. and Gagliardi, G. and Panunzi, A. and Frontini, F. and Russo, I. and Monachini, M.}, TITLE = {IMAGACT: Deriving an Action Ontology from Spoken Corpora}, YEAR = {2012}, ABSTRACT = {This paper presents the IMAGACT annotation infrastructure which uses both corpus - based and competence - based methods for the simultaneous extraction of a language independent Action ontology from English and Italian spontaneous speech corpora. The infrastructure relies on an innovative methodology based on images of prototypical scenes and will identify high frequency action concepts in everyday life, suitable for the implementation of an open set of languages.}, KEYWORDS = {Action verbs Ontology imagery}, PAGES = {42-47}, URL = {https://publications.cnr.it/doc/220262}, ISBN = {978-90-74029-00-1}, CONFERENCE_NAME = {Eighth Joint ISO-ACL SIGSEM Workshop on Interoperable Semantic Annotation (ISA-8)}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {3-5 October 2012}, BOOKTITLE = {Proceedings of the Eight Joint ISO-ACL SIGSEM Workshop on Interoperable Semantic Annotation ISA-8}, EDITOR = {Bunt, H.}, } @INPROCEEDINGS{MONEGLIA_2012_INPROCEEDINGS_MMCPFGR_219656, AUTHOR = {Moneglia, M. and Monachini, M. and Calabrese, O. and Panunzi, A. and Frontini, F. and Gagliardi, G. and Russo, I.}, TITLE = {The IMAGACT Cross-linguistic Ontology of Action. A new infrastructure for natural language disambiguation}, YEAR = {2012}, ABSTRACT = {Action verbs, which are highly frequent in speech, cause disambiguation problems that are relevant to Language Technologies. This is a consequence of the peculiar way each natural language categorizes Action i.e. it is a consequence of semantic factors. Action verbs are frequently "general", since they extend productively to actions belonging to different ontological types. Moreover, each language categorizes action in its own way and therefore the cross-linguistic reference to everyday activities is puzzling. This paper briefly sketches the IMAGACT project, which aims at setting up a cross-linguistic Ontology of Action for grounding disambiguation tasks in this crucial area of the lexicon. The project derives information on the actual variation of action verbs in English and Italian from spontaneous speech corpora, where references to action are high in frequency. Crucially it makes use of the universal language of images to identify action types, avoiding the underdeterminacy of semantic definitions. Action concept entries are implemented as prototypic scenes; this will make it easier to extend the Ontology to other languages.}, KEYWORDS = {Action verbs, Ontology, Imagery}, PAGES = {2606-2613}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/428_Paper.pdf}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 may 2012}, } @INPROCEEDINGS{MONEGLIA_2012_INPROCEEDINGS_MMPFGR_220270, AUTHOR = {Moneglia, M. and Monachini, M. and Panunzi, A. and Frontini, F. and Gagliardi, G. and Russo, I.}, TITLE = {Mapping a corpusinduced ontology of action verbs on ItalWordNet}, YEAR = {2012}, ABSTRACT = {Action verbs are the least predictable linguistic type for bilingual dictionaries and they cause major problems for NLP technologies. This is not only because of language specific phraseology, but it is rather a consequence of the peculiar way each language categorizes events. In ordinary languages the most frequent action verbs are "general", since they extend productively to actions belonging to different ontological types. Moreover, each language categorizes actions in its own way and therefore the cross-linguistic reference to everyday activities is puzzling. A cross-linguistic stable ontology of actions is difficult to achieve because our knowledge on the actual variation of verbs across types of actions is largely unknown. This paper briefly presents the problems and the building strategies of the IMAGACT Ontology, which aims at filling this gap, and compares some early results on a set of Italian verbs with the information contained in ItalWordNet.}, KEYWORDS = {action verbs ontology image}, PAGES = {219-226}, URL = {https://publications.cnr.it/doc/220270}, ISBN = {978-80-263-0244-5}, CONFERENCE_NAME = {Global Wordnet Conference (GWC2012)}, CONFERENCE_PLACE = {Matsue, Japan}, CONFERENCE_DATE = {9-13 January 2012}, BOOKTITLE = {Proceedings of the 6th Global WordNet Conference (GWC2012)}, EDITOR = {Fellbaum, C. and Vossen, P.}, } @INPROCEEDINGS{MONTEMAGNI_2012_INPROCEEDINGS_MWDN_330114, AUTHOR = {Montemagni, S. and Wieling, M. and De Jonge, B. and Nerbonne, J.}, TITLE = {Patterns of Language Variation and Underlying Linguistic Features: A New Dialectometric Approach}, YEAR = {2012}, PAGES = {879-889}, URL = {https://publications.cnr.it/doc/330114}, VOLUME = {II}, PUBLISHER = {Franco Cesati Editore (Firenze, ITA)}, ISBN = {978-88-7667-433-4}, CONFERENCE_NAME = {XI Congresso SILFI (Società Internazionale di Linguistica e Filologia Italiana)}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-7 Ottobre 2010}, BOOKTITLE = {La variazione nell'italiano e nella sua storia. Varietà e varianti linguistiche e testuali. Atti dell'XI Congresso SILFI (Società Internazionale di Linguistica e Filologia Italiana)}, EDITOR = {Bianchi, P. and De Blasi, N. and De Caprio, C. and Montuori, F.}, } @INPROCEEDINGS{PARDELLI_2012_INPROCEEDINGS_PGS_218940, AUTHOR = {Pardelli, G. and Goggi, S. and Sassi, M.}, TITLE = {Grey Literature Between Tradition and Innovation: Is There a Continuum?}, YEAR = {2012}, ABSTRACT = {This study wants to explore ways of social media communication for Grey Literature. In particular it describes the role of social media in relation with traditional channels and how social media applications can be used for Grey.}, KEYWORDS = {Grey Literature, Communication Networks, Knowledge Networking, Knowledge Exchange}, PAGES = {165-169}, URL = {https://publications.cnr.it/doc/218940}, VOLUME = {13}, CONFERENCE_NAME = {Thirteenth International Conference on Grey Literature: The Grey Circuit, From Social Networking to Wealth Creation (GL 13)}, CONFERENCE_PLACE = {Washington D. C. USA}, CONFERENCE_DATE = {5-6 December 2011}, } @INPROCEEDINGS{PARDELLI_2012_INPROCEEDINGS_PSGB_217173, AUTHOR = {Pardelli, G. and Sassi, M. and Goggi, S. and Biagioni, S.}, TITLE = {From medical language processing to BioNLP domain}, YEAR = {2012}, ABSTRACT = {This paper presents the results of a terminological work on a reference corpus in the domain of Biomedicine. In particular, the research tends to analyse the use of certain terms in Biomedicine in order to verify their change over the time with the aim of retrieving from the net the very essence of documentation. The terminological sample contains words used in BioNLP and biomedicine and identifies which terms are passing from scientific publications to the daily press and which are rather reserved to scientific production. The final scope of this work is to determine how scientific dissemination to an ever larger part of the society enables a public of common citizens to approach communication on biomedical research and development; and its main source is a reference corpus made up of three main repositories from which information related to BioNLP and Biomedicine is extracted. The paper is divided in three sections: 1) an introduction dedicated to data extracted from scientific documentation; 2) the second section devoted to methodology and data description; 3) the third part containing a statistical representation of terms extracted from the archive: indexes and concordances allow to reflect on the use of certain terms in this field and give possible keys for having access to the extraction of knowledge in the digital era.}, KEYWORDS = {Information Extraction, Information Retrieval, Text mining, Digital Libraries}, PAGES = {2049-2055}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/687_Paper.pdf}, VOLUME = {7}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {Eight International Conference on Language Resources and Evaluation. LREC'12}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {21-27 may 2012}, BOOKTITLE = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{PIRRELLI_2012_INPROCEEDINGS_PG_84785, AUTHOR = {Pirrelli, V. and Guevara, E.}, TITLE = {Understanding NN Compounds}, YEAR = {2012}, ABSTRACT = {In this paper we intend to pursue two basic objectives: i) point out a substantial convergence between classification criteria for compounding that have developed independently from largely complementary perspectives and methodological stances, and ii) assess the important empirical consequences of this convergence and their potential impact on recent linguistic analyses of lexical compounds as either lexical (and specifically morphological) or syntactic phenomena. These two points are brought home by focusing on a particular class of Italian compounds, namely endocentric NN compounds such as ufficio reclami ('complaint office') or pesce palla ('ball fish') that prove to be increasingly productive in contemporary Italian (cf. Dardano 1978, Bisetto 2004).}, KEYWORDS = {Morphological composition, Word Processing, Word Learning, Mental Lexicon}, PAGES = {17}, URL = {https://publications.cnr.it/doc/84785}, VOLUME = {2 (CD ROM)}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, ISBN = {978-88-7870-652-1}, CONFERENCE_NAME = {Linguaggio e cervello / Semantica, Atti del XLII Convegno della Società diLinguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, CONFERENCE_PLACE = {Scuola Normale Superiore, Pisa}, CONFERENCE_DATE = {25-27 settembre 2008}, BOOKTITLE = {Linguaggio e cervello /Semantica, Atti del XLII Convegno della Società di Linguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, EDITOR = {Bambini, V. and Ricci, I. and Bertinetto, P. M.}, } @INPROCEEDINGS{POCH_2012_INPROCEEDINGS_PTHQB_286877, AUTHOR = {Poch, M. and Toral, A. and Hamon, O. and Quochi, V. and Bel, N.}, TITLE = {Towards a User-Friendly Platform for Building Language Resources based on Web Services}, YEAR = {2012}, ABSTRACT = {This paper presents the platform developed in the PANACEA project, a distributed factory that automates the stages involved in the acquisition, production, updating and maintenance of Language Resources required by Machine Translation and other Language Technologies. We adopt a set of tools that have been successfully used in the Bioinformatics field, they are adapted to the needs of our field and used to deploy web services, which can be combined to build more complex processing chains (workflows). This paper describes the platform and its different components (web services, registry, workflows, social network and interoperability). We demonstrate the scalability of the platform by carrying out a set of massive data experiments. Finally, a validation of the platform across a set of required criteria proves its usability for different types of users (non-technical users and providers).}, KEYWORDS = {service platform, workflow, interoperability}, PAGES = {1156-1163}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/543_Paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {Eighth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Istanbul, Turchia}, CONFERENCE_DATE = {23-25/05/2012}, BOOKTITLE = {Proceedings of the Eighth International Conference on Language Resources and Evaluation, LREC 2012}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{QUOCHI_2012_INPROCEEDINGS_QFR_220778, AUTHOR = {Quochi, V. and Frontini, F. and Rubino, F.}, TITLE = {A MWE Acquisition and Lexicon Builder Web Service}, YEAR = {2012}, ABSTRACT = {This paper describes the development of a web-service tool for the automatic extraction of Multi-word expressions lexicons, which has been integrated in a distributed platform for the automatic creation of linguistic resources. The main purpose of the work described is thus to provide a (computationally "light") tool that produces a full lexical resource: multi-word terms/items with relevant and useful attached information that can be used for more complex processing tasks and applications (e.g. parsing, MT, IE, query expansion, etc.). The output of our tool is a MW lexicon formatted and encoded in XML according to the Lexical Mark-up Framework. The tool is already functional and available as a service. Evaluation experiments show that the tool precision is of about 80%.}, KEYWORDS = {Multiword extraction, lexical resources, LMF, web services}, PAGES = {2291-2306}, URL = {http://aclweb.org/anthology/C/C12/C12-1140.pdf}, PUBLISHER = {Curran Associates (Red Hook, NY 12571, USA)}, ISBN = {9781627483896}, CONFERENCE_NAME = {International Conference on Computational Linguistics (COLING)}, CONFERENCE_PLACE = {Mumbai, India}, CONFERENCE_DATE = {December 2012}, BOOKTITLE = {Proceedings of COLING 2012: Technical Papers}, EDITOR = {Kay, M. and Boitet, C.}, } @INPROCEEDINGS{RUBINO_2012_INPROCEEDINGS_RFQ_220773, AUTHOR = {Rubino, F. and Frontini, F. and Quochi, V.}, TITLE = {Integrating NLP Tools in a Distributed Environment: A Case Study Chaining a Tagger with a Dependency Parser}, YEAR = {2012}, ABSTRACT = {The present paper tackles the issue of PoS tag conversion within the framework of a distributed web service platform for the automatic creation of language resources. PoS tagging is now considered a "solved problem"; yet, because of the differences in the tagsets, interchange of the various PoS taggers vailable is still hampered. In this paper we describe the implementation of a PoS-tagged-corpus converter, which is needed for chaining together in a workflow the FreeLing PoS tagger for Italian and the DESR dependency parser, given that these two tools have been developed independently. The conversion problems experienced during the implementation, related to the properties of the different tagsets and of tagset conversion in general, are discussed together with the solutions adopted. Finally, the converter is evaluated by assessing the impact of conversion on the performance of the dependency parser by comparing with the outcome of the native pipeline. From this we learn that in most cases parsing errors are due to actual tagging errors, and not to conversion itself. Besides, information on accuracy loss is an important feature in a distributed environment of (NLP) services, where users need to decide which services best suit their needs}, KEYWORDS = {PoS tag conversion, interoperability, NLP pipelines}, PAGES = {2125-2131}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/summaries/726.html}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {9782951740877}, CONFERENCE_NAME = {Language Resources and Evaluation Conference 2012}, CONFERENCE_PLACE = {Istanbul, Turchia}, CONFERENCE_DATE = {23-25 Maggio 2012}, BOOKTITLE = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Doğan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{RUIMY_2012_INPROCEEDINGS_RPG_220294, AUTHOR = {Ruimy, N. and Piccini, S. and Giovannetti, E.}, TITLE = {Defining and Structuring Saussure's Terminology}, YEAR = {2012}, ABSTRACT = {In the framework of the Italian project 'For a digital edition of Ferdinand de Saussure's manuscripts', an electronic thesaurus of Saussure's terminology is being built, which includes new terms extracted from recently found manuscripts. The lexical model on which it is grounded is a customized version of the SIMPLE model. In this paper, an overview of the customization process is provided, with a special focus on the steps taken for designing a domain-specific ontology as well as on the creation of additional semantic relations and features. Lexical entries are illustrated and the potential of a structured organization of semantic knowledge for gaining a wider understanding of the overall domain terminology is highlighted.}, KEYWORDS = {Computational Lexicon}, PAGES = {828-833}, URL = {http://www.euralex.org/elx_proceedings/Euralex2012/pp828-833%20Ruimy,%20Piccini%20and}, CONFERENCE_NAME = {15th EURALEX International Congress (EURALEX2012)}, CONFERENCE_PLACE = {Oslo, Norway}, CONFERENCE_DATE = {07-11 / 08 2012}, } @INPROCEEDINGS{RUIMY_2012_INPROCEEDINGS_RPG_220309, AUTHOR = {Ruimy, N. and Piccini, S. and Giovannetti, E.}, TITLE = {Les Outils Informatiques au Service de la Terminologie Saussurienne}, YEAR = {2012}, ABSTRACT = {Bien que l'oeuvre de Ferdinand de Saussure ait été diffusée à travers le monde, sa pensée a été en grande partie reconstruite et interprétée par ses étudiants et disciples. Ses écrits authentiques revêtent pourtant, de notre point de vue, une importance fondamentale : ils mettent en lumière l'attention que Saussure accorde à la terminologie linguistique. Lui-même utilise souvent un vocabulaire particulier. Il forge des néologismes ou confère un sens nouveau à des mots existants, il emploie quelques termes de façon éphémère, change la dénotation de certains concepts au fil du temps, créant ainsi une terminologie qui lui est propre. Dans cet article, nous présentons le premier thésaurus-lexique électronique de la terminologie linguistique saussurienne, en cours de création. La population de ce lexique est constituée par une nomenclature mise à jour, comprenant non seulement la terminologie répertoriée par Godel et Engler mais également de nouveaux termes extraits de manuscrits actuellement à l'étude. À travers ce lexique, nous nous proposons de fournir une représentation structurée de la terminologie saussurienne, de définir le contenu sémantique de chacun des termes ainsi que la nature des relations qui les unissent. À cet effet, nous avons choisi de customiser le modèle lexical SIMPLE qui, dans le panorama de la Lexicographie Computationnelle, s'est imposé comme standard de facto et offre une représentation sémantique hautement structurée des unités lexicales. Le processus de customisation du modèle de base s'est tout d'abord focalisé sur la conception d'une ontologie lexicale de domaine (ontologie SIMPLE_FdS), et sa traduction dans le langage de représentation d'ontologie Web OWL. Puis, les deux autres composants du modèle de base, i.e. un réseau de relations sémantiques et un ensemble de traits sémantiques, ont été également adaptés aux exigences du domaine traité. Les moyens expressifs jugés appropriés pour rendre compte de la terminologie saussurienne ont été retenus tandis que des traits et relations spécifiques ont été créés afin d'exprimer des propriétés et des liens qui caractérisent l'organisation conceptuelle de ce domaine de la connaissance. Le lexique est ici illustré par des exemples d'entrées lexicales et le potentiel de la base de connaissance par des requêtes sur les données mémorisées. L'organisation structurée de la connaissance lexicale et la richesse de la représentation sémantique font de ce lexique un outil de recherche sémantique particulièrement performant et qui devrait contribuer de manière significative à mieux maîtriser le vocabulaire saussurien et à éclairer certains aspects originaux de la pensée du maître genevois.}, KEYWORDS = {Computational Lexicon}, PAGES = {1043-1056}, URL = {http://www.shs-conferences.org/articles/shsconf/abs/2012/01/shsconf_cmlf12_000294/shs}, DOI = {10.1051/shsconf/20120100294}, PUBLISHER = {EDP Sciences (Les Ulis Cedex, FRA)}, ISBN = {978-2-7598-0783-3}, CONFERENCE_NAME = {3e Congrès Mondial de Linguistique Française (CMLF 2012)}, CONFERENCE_PLACE = {Lione-Francia}, CONFERENCE_DATE = {04-07 / 07 2012}, BOOKTITLE = {3e Congrès Mondial de Linguistique Française}, } @INPROCEEDINGS{RUSSO_2012_INPROCEEDINGS_R_288035, AUTHOR = {Russo, I.}, TITLE = {Alternanze sintattiche, classi semantiche, funzioni comunicative. La posizione dell'aggettivo nel sintagma nominale}, YEAR = {2012}, PAGES = {2 B-9-2 B-10}, URL = {https://publications.cnr.it/doc/288035}, ISBN = {978-88-7870-652-1}, CONFERENCE_NAME = {Linguaggio e cervello-Semantica / Language and the brain-Semantics}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {25-27 settembre 2008}, BOOKTITLE = {Linguaggio e cervello-Semantica / Language and the brain-Semantics, Atti del XLII Convegno della Società di Linguistica Italiana (Pisa, Scuola Normale Superiore, 25-27 settembre 2008)}, EDITOR = {Bambini, V. and Ricci, I. and Bertinetto, P. M. and Collaboratori}, } @INPROCEEDINGS{SORIA_2012_INPROCEEDINGS_SBCMMOPQC_219679, AUTHOR = {Soria, C. and Bel, N. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Calzolari, N.}, TITLE = {The FLaReNet Strategic Language Resource Agenda}, YEAR = {2012}, ABSTRACT = {The FLaReNet Strategic Agenda highlights the most pressing needs for the sector of Language Resources and Technologies and presents a set of recommendations for its development and progress in Europe, as issued from a three-year consultation of the FLaReNet European project. The FLaReNet recommendations are organised around nine dimensions: a) documentation b) interoperability c) availability, sharing and distribution d) coverage, quality and adequacy e) sustainability f) recognition g) development h) infrastructure and i) international cooperation. As such, they cover a broad range of topics and activities, spanning over production and use of language resources, licensing, maintenance and preservation issues, infrastructures for language resources, resource identification and sharing, evaluation and validation, interoperability and policy issues. The intended recipients belong to a large set of players and stakeholders in Language Resources and Technology, ranging from individuals to research and education institutions, to policy-makers, funding agencies, SMEs and large companies, service and media providers. The main goal of these recommendations is to serve as an instrument to support stakeholders in planning for and addressing the urgencies of the Language Resources and Technologies of the future.}, KEYWORDS = {strategic agenda, language resources planning, recommended priority actions}, PAGES = {1379-1386}, URL = {http://www.lrec-conf.org/proceedings/lrec2012/index.html}, ISBN = {978-2-9517408-7-7}, CONFERENCE_NAME = {The Eight International Conference on Language Resources and Evaluation (LREC'12)}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {23-25 may 2012}, BOOKTITLE = {Proceedings of the 8th international conference on Language Resources and Evaluation (LREC2012)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Dogan, M. U. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{SPADONI_2012_INPROCEEDINGS_STLRTSO_219516, AUTHOR = {Spadoni, F. and Tartarelli, A. and Loparco, L. and Rossi, R. and Tariffi, F. and Sassolini, E. and Ongaro, P.}, TITLE = {SMARTCITY: Customized and Dynamic Multimedia Content Production for Tourism Applications}, YEAR = {2012}, ABSTRACT = {This paper presents the SMARTCITY project experience: customized and dynamic multimedia content production for professional tourism applications.}, KEYWORDS = {Corpus Annotation Cultural Heritage Access To The Culture Information}, PAGES = {132-137}, URL = {http://digital.casalini.it/9788866551300}, CONFERENCE_NAME = {Electronic Imaging \& the Visual Arts EVA 2012 Florence (EVA 2012 Florence)}, CONFERENCE_PLACE = {Firenze, Italia}, CONFERENCE_DATE = {9-10-11 maggio 2012}, } @INPROCEEDINGS{ABRATE_2012_INPROCEEDINGS_ABFLMM_220733, AUTHOR = {Abrate, M. and Bacciu, C. and Frontini, F. and Lapolla, M. N. and Marchetti, A. and Monachini, M.}, TITLE = {Web Language Identification Testing Tool}, YEAR = {2012}, ABSTRACT = {Nowadays a variety of tools for automatic language identification are available. Regardless of the approach used, at least two features can be identified as crucial to evaluate the performances of such tools: the precision of the presented results and the range of languages that can be detected. In this work we shall focus on a subtask of written language identification that is important to preserve and enhance multilinguality in the Web, i.e. detecting the language of a Web page given its URL. Most specifically, the final aim is to verify to which extent under-represented languages are recognized by available tools. The main specificity of Web Language Identification (WLI) lies in the fact that often an HTML page can provide interesting extralinguistic clues (URL domain name, metadata, encoding, etc) that can enhance accuracy. We shall first provide some data and statistics on the presence of languages on the web, secondly discuss existing practices and tools for language identification according to different metrics - for instance the approaches used and the number of supported languages - and finally make some proposals on how to improve current Web Language Identifiers. We shall also present a preliminary WLI service that builds on the Google Chromium Compact Language Detector; the WLI tool allows us to test the Google n-gram based algorithm against an adhoc gold standard of pages in various languages. The gold standard, based on a selection of Wikipedia projects, contains samples in languages for which no automatic recognition has been attempted; it can thus be used by specialists to develop and evaluate WLI systems.}, KEYWORDS = {Multilingual Web}, URL = {https://publications.cnr.it/doc/220733}, CONFERENCE_NAME = {The Multilingual Web-the Way Ahead}, CONFERENCE_PLACE = {Luxembourg}, CONFERENCE_DATE = {15-16 March 2012}, } @INPROCEEDINGS{BARCA_2012_INPROCEEDINGS_BP_223974, AUTHOR = {Barca, L. and Pezzulo, G.}, TITLE = {Is visual lexical decision a dynamic and competitive process? No, if we look at reaction times. Yes, if we study how it unfolds in time}, YEAR = {2012}, ABSTRACT = {Visual lexical decision is a classical paradigm in Psycholinguistic, and numerous studies have assessed a so-called "lexicality effect" (i.e., better performance with lexical over non-lexical stimuli). Far less is know relative to the dynamics of choice, as many studies measure overal reaction times which are not informative of the underlying processes. To unfold visual lexical decision in time, we measured participants' hand movements toward one of two items alternatives by recording the streaming x,y coordinates of the computer mouse. Participants categorized as 'lexical' or 'non-lexical' four kinds of stimuli: high and low frequency words, pseudowords, and letter strings. Spatial attraction toward the opposite category was present for low frequency words and pseudowords. Increasing stimuli ambiguity lead to enhcanced movements' complexity and trajectories' attraction to competitors, as no such effect was present for high frequency words and letter strings.}, KEYWORDS = {Visual lexical decision written language processes, kinematics, dynamic models of decision-making, written language processes}, PAGES = {1}, URL = {https://publications.cnr.it/doc/223974}, DOI = {10.3389/conf.fnins.2012.86.00001}, CONFERENCE_NAME = {Neural Coding, Decision-Making \& Integration in Time, 2012}, CONFERENCE_PLACE = {Rauischholzhausen, Germany}, CONFERENCE_DATE = {26-29 Aprile 2012 Luogo}, } @INPROCEEDINGS{BOSCHETTI_2012_INPROCEEDINGS_B_220739, AUTHOR = {Boschetti, F.}, TITLE = {A Language Independent Pedagogical Model for Greek, Latin and Arabic}, YEAR = {2012}, ABSTRACT = {Pedagogical tools in a collaborative environment for ancient languages learning is illustrated.}, URL = {https://publications.cnr.it/doc/220739}, CONFERENCE_NAME = {New Approaches to Historical Languages-Tufts University Workshop}, CONFERENCE_PLACE = {Medford, MA-Tufts University}, CONFERENCE_DATE = {7 giugno 2012}, } @INPROCEEDINGS{BOSCHETTI_2012_INPROCEEDINGS_B_220750, AUTHOR = {Boschetti, F.}, TITLE = {Data Sets and Software Components: Adjustment and Reuse}, YEAR = {2012}, ABSTRACT = {Building (or extending) a digital environment to study ancient authors requires the management of digital resources that must be adjusted asynchronously and the development of software components highly decoupled.}, KEYWORDS = {reuse digital humanities infrastructure components}, URL = {https://publications.cnr.it/doc/220750}, CONFERENCE_NAME = {The Papyrus and the Hypertext. Athenaeus in the Scholarly Kitchen}, CONFERENCE_PLACE = {Parigi}, CONFERENCE_DATE = {5 maggio 2012}, } @INPROCEEDINGS{BOSCHETTI_2012_INPROCEEDINGS_B_220771, AUTHOR = {Boschetti, F.}, TITLE = {OCR Evaluation and Parallelization}, YEAR = {2012}, ABSTRACT = {The scalability of the OCR applied to a large amount of documents is discussed. In particulare, unsupervised methods to evaluate the accuracy of the OCR and parallelization of the processes, in order to reduce the time to perform the recognition, are illustrated.}, KEYWORDS = {OCR parallelization evaluation}, URL = {https://publications.cnr.it/doc/220771}, CONFERENCE_NAME = {Digital Humanities Seminars}, CONFERENCE_PLACE = {Leipzig}, CONFERENCE_DATE = {10 ottobre 2012}, } @INPROCEEDINGS{BOSCHETTI_2012_INPROCEEDINGS_B_221554, AUTHOR = {Boschetti, F.}, TITLE = {La localizzazione in lingua italiana dell'infrastruttura per lo studio dei classici greci e latini costituita dal Perseus Project}, YEAR = {2012}, ABSTRACT = {The parallelization of Greek and Latin texts with translations in Italian is discussed.}, KEYWORDS = {localization cyberinfrastructure}, URL = {https://publications.cnr.it/doc/221554}, CONFERENCE_NAME = {Convegno annuale dell'Associazione di Informatica Umanistica e Culture Digitali}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {14 dicembre 2012}, } @INPROCEEDINGS{BOSCHETTI_2012_INPROCEEDINGS_BDL_221560, AUTHOR = {Boschetti, F. and Del Grosso, A. M. and Lamé, M.}, TITLE = {Strumenti per l'analisi di testi bilingui al servizio dell'epigrafia digitale}, YEAR = {2012}, ABSTRACT = {Model for visualization and retrieval of bilingual epigraphic texts in parallel.}, KEYWORDS = {allineamento bilinguismo}, URL = {https://publications.cnr.it/doc/221560}, CONFERENCE_NAME = {Incontro di studi sulla versificazione epigrafica dall'antichità all'umanesimo}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {3 maggio 2012}, } @INPROCEEDINGS{DELGROSSO_2012_INPROCEEDINGS_D_390669, AUTHOR = {Del Grosso, A. M.}, TITLE = {Evaluation and parallelization of large-scale polytonic Greek OCR}, YEAR = {2012}, ABSTRACT = {This work aims to face the rapid digitization of Greek texts not yet in digital libraries, illustrating 1) what is an OCR engine; 2) what makes Greek OCR hard; 3) the work provided by Robertson and Boschetti; 4) future challenges on parallel architectures and proof reading system.}, KEYWORDS = {digital philology, computational philology, digital humanities, computer engineering}, URL = {https://publications.cnr.it/doc/390669}, CONFERENCE_NAME = {Digitizing Greek Literature}, CONFERENCE_PLACE = {Department of Computer Science of Leipzig University}, CONFERENCE_DATE = {15/10/2012}, } @INPROCEEDINGS{FRONTINI_2012_INPROCEEDINGS_FMLMAB_348940, AUTHOR = {Frontini, F. and Monachini, M. and Lapolla, M. N. and Marchetti, A. and Abrate, M. and Bacciu, C.}, TITLE = {Web Language Identification Testing Tool}, YEAR = {2012}, ABSTRACT = {Nowadays a variety of tools for automatic language identification are available. Regardless of the approach used, at least two features can be identified as crucial to evaluate the performances of such tools: the precision of the presented results and the range of languages that can be detected. In this work we shall focus on a subtask of written language identification that is important to preserve and enhance multilinguality in the Web, i.e. detecting the language of a Web page given its URL. Most specifically, the final aim is to verify to which extent under-represented languages are recognized by available tools. The main specificity of Web Language Identification (WLI) lies in the fact that often an HTML page can provide interesting extralinguistic clues (URL domain name, metadata, encoding, etc) that can enhance accuracy. We shall first provide some data and statistics on the presence of languages on the web, secondly discuss existing practices and tools for language identification according to different metrics - for instance the approaches used and the number of supported languages - and finally make some proposals on how to improve current Web Language Identifiers. We shall also present a preliminary WLI service that builds on the Google Chromium Compact Language Detector; the WLI tool allows us to test the Google n-gram based algorithm against an ad-hoc gold standard of pages in various languages. The gold standard, based on a selection of Wikipedia projects, contains samples in languages for which no automatic recognition has been attempted; it can thus be used by specialists to develop and evaluate WLI systems.}, KEYWORDS = {Language Identification Tools, Multilingual Web}, PAGES = {1-1}, URL = {https://publications.cnr.it/doc/348940}, CONFERENCE_NAME = {W3C Workshop, Call for Participation: The Multilingual Web-The Way Ahead}, CONFERENCE_PLACE = {Luxembourg}, CONFERENCE_DATE = {15-16/03/2012}, } @INPROCEEDINGS{MARZI_2012_INPROCEEDINGS_M_220819, AUTHOR = {Marzi, C.}, TITLE = {Innovation, Language, and the Web}, YEAR = {2012}, ABSTRACT = {Language and innovation are inseparable. Language conveys ideas which are essential in corporate innovation; innovation would be nearly impossible if we did not have language. Language establishes the most immediate connections with our conceptualisation of the outside world, and it provides the building blocks for communication. The structure of language itself reflects its functional and communicative use. Communication takes place when there is a real information exchange process. Every linguistic choice is necessarily meaningful, and absolute variables involve the parallel construction of form and meaning. From this perspective, language is not only structure, but a dynamic knowledge construction process as well. Knowledge transfer and innovation transfer are ubiquitous processes: knowledge extraction requires heterogeneous tasks related to the acquisition, from unstructured textual data in digital format, of structured and classified information relating to research topics. In the full version of this approach, emphasis will be laid on the mechanisms underlying language processing and communicative interaction, outlining knowledge retention and retrieval processes. The spread of Internet has enabled development of better bibliographic scientific databases with significantly improved capacity for storage and retrieval. In recent years, web searching has become the default mode of highly innovative information retrieval, though the main sources of digital information are unstructured or semi-structured documents. Information relating to developments in scientific research is collected in the form of abstracts or full publications, in large and growing bibliographic repositories. Considering the web as a corpus makes it possible to investigate how words are used to describe innovation, and how innovation topics can influence word usage and collocational behaviour. Investigation of corpora is concerned with the description of use and structure of language, by inquiring linguistic phenomena such as, co-occurence distributions, collocational variability, derivational productivity, neologism coinage. This will bring into focus the dynamic interplay between lexical creativity and innovative pragmatic contexts, thus blurring the traditional dichotomy between knowledge of language and its use. In particular, the work will focus on how words and language structures become vehicle for knowledge generation and innovation transfer, and how research data, research results and widely-distributed dissemination papers can support and enhance future research.}, KEYWORDS = {Lexical productivity, Language Technologies, Web corpora, Grey Literature}, PAGES = {85-88}, URL = {https://publications.cnr.it/doc/220819}, VOLUME = {14}, ISSN = {1385-2308}, ISBN = {978-90-77484-19-7}, CONFERENCE_NAME = {Fourteenth international Conference on Grey Literature (GL14)}, CONFERENCE_PLACE = {National Research Council, Rome-Italy}, CONFERENCE_DATE = {29-30 November 2012}, BOOKTITLE = {Tracking Innovation thorugh Grey Literature}, EDITOR = {Farace, D. J. and Frantzen, J. and Greynet}, } @INPROCEEDINGS{PARDELLI_2012_INPROCEEDINGS_P_221541, AUTHOR = {Pardelli, G.}, TITLE = {Lineamenti di Linguistica Computazionale per il recupero informativo}, YEAR = {2012}, ABSTRACT = {Le ricerche linguistiche a partire dal secondo dopoguerra hanno avuto un ritmo di evoluzione e di espansione molto rapido grazie anche ai metodi di analisi introdotti, come l'uso dei metodi statistici o quantitativi nello studio delle lingue e delle opere letterarie. Nacquero nuovi settori di applicazione, la linguistica incontrò altre scienze e l' interdisciplinarità venne sempre più praticata fino a diventare indispensabile. L'introduzione di sistemi di automazione delle ricerche nelle analisi linguistiche vide la nascita della Linguistica Computazionale LC che mise in connessione lo studio della lingua e l'elaboratore elettronico. Dalla fine degli anni '40 all'inizio degli anni '60 gli utilizzi del calcolo elettronico per l'elaborazione di dati linguistici si articolarono in due filoni principali: - Gli spogli elettronici dei testi che diedero impulso alla lessicografia computazionale, avviata da Padre Roberto Busa nel 1951 con la compilazione delle concordanze dell'opera omnia di Tommaso d'Aquino; - I tentativi di traduzione automatica TA, in inglese machine translation MT, avviati da Weaver nel 1949 con la pubblicazione del memorandum "Translation". La TA divenne da subito nucleo e centro di spinta della LC utlilizzando il calcolatore per trasportare un testo da una lingua naturale all'altra. Il contenuto argomentale dei testi della Biblioteca dell'Istituto di Linguistica Computazionale si sviluppa per lo più sulle tematiche sopra descritte. L'articolo darà enfasi alla terminologia della LC, nella prima parte, quella introduttiva; nella seconda parte saranno fornite informazioni inerenti il patrimonio bibliografico, il software di gestione usato e indicazioni per il recupero informativo; la terza parte andrà a presentare il "Fondo Antonio Zampolli", preziosa collezione di testi, alcuni unici in Italia, nel settore del trattamento automatico della lingua, che va ad arricchire e completare la Biblioteca ILC. In appendice sarà fornita una tabella riassuntiva dei termini estratti dai titoli degli articoli presentati alle conferenze di Linguistica Computazionale COLING, dal 1965 - anno della prima conferenza tenuta a New York - al 2010, conferenza tenuta a Pechino. Trattasi di 23 conferenze internazionali di Linguistica Computazionale, fondamentali nella storia della disciplina. L'estrazione terminologica del corpus COLING è stata operata con gli strumenti di analisi testuale del laboratorio DylanLab dell'Istituto di Linguistica Computazionale.}, KEYWORDS = {Linguistica Computazionale, Terminologia, Repository}, URL = {https://publications.cnr.it/doc/221541}, CONFERENCE_NAME = {Seminario di Studi-Benvenuti in Biblioteca! Umanesimo e società nelle collezioni librarie del CNR}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {29-30 maggio 2012}, } @INPROCEEDINGS{PARDELLI_2012_INPROCEEDINGS_PE_221547, AUTHOR = {Pardelli, G. and Enea, A.}, TITLE = {Per un'edizione digitale dei manoscritti di Ferdinand de Saussure: Verso la Costruzione del prototipo bibliografico}, YEAR = {2012}, ABSTRACT = {Per un'edizione digitale dei manoscritti di Ferdinand de Saussure - PRIN 2008 Unità operativa CNR-ILC Responsabile dott. Nilda Ruimy Relazione attività svolta da Gabriella Pardelli Obiettivo: costruzione del prototipo bibliografico saussurriano compatibile con le applicazioni sviluppate per il progetto e volte alla rete telematica. In particolare il prototipo si integra con l' ontologia di dominio sviluppata per la creazione del lessico saussurriano (ILC) e con l'applicazione di Natural Language Processing nata per il recupero della terminologia multilingue da immagini digitali (ILC). Metodologia: 1) scelta del software; 2) recupero delle informazioni bibliografiche ; 3) digitalizzazione 4) collegamento testo/immagine. 1) Il prototipo bibliografico degli scritti di Ferdinand de Saussure si avvale del software CDS/ISIS Computerised Documentation Service / Integrated Set of Information Systems. Il software è sviluppato, mantenuto e distribuito dall'UNESCO. L'applicazione permette l'associazione di file esterni (immagini e testi) oltre alla creazione di link tra record e archivi diversi. L'esportazione dei dati segue lo standard internazionale ISO2709. 2) La ricca bibliografia è stata elaborata dal coordinatore nazionale del progetto. Il recupero delle fonti saussurriane è stato svolto nella Biblioteca della Scuola Normale Superiore di Pisa e nella Biblioteca del Dipartimento di Linguistica di Pisa. 3) Le immagini sono state acquisite con la collaborazione di un fotografo professionista - fonti di prima generazione -. Invece per le fonti non recuperabili sul territorio italiano, l'acquisizione è frutto di portali europei dedicati a Saussure (Gallica) - fonti di seconda generazione - Con l'abbinamento della descrizione bibliografica alla rispettiva fonte digitalizzata il prototipo è stato ultimato. L'applicazione è stata presentata il 21 settembre 2012 al seminario internazionale organizzato dall'Università di Firenze, presso la Facoltà di Lettere , a conclusione del progetto. Consistenza: Il database contiene 150 record corredati dalla rispettiva immagine. Ogni record, diviso in campi e in ulteriori sotto campi, è stato pensato per organizzare e frammentare l'informazione e consentire indicizzazioni puntuali per recuperi informativi sui titoli, sulle lingue indagate da Saussure e oggetto di argomentazione linguistica (latino, greco, tedesco...), sulla tipologia della fonte documentaria, sulla data della comunicazione, sulla data della pubblicazione, sui commenti, sulle note, sulle riedizioni ...}, KEYWORDS = {Ferdinand de Saussure Banca dati bibliografica}, URL = {https://publications.cnr.it/doc/221547}, CONFERENCE_NAME = {Seminario internazionale Per un'edizione digitale dei manoscritti di Ferdinand de Saussure-Conclusione}, CONFERENCE_PLACE = {Università di Firenze}, CONFERENCE_DATE = {21-22 settembre 2012}, } @INPROCEEDINGS{PARDELLI_2012_INPROCEEDINGS_PSG_220806, AUTHOR = {Pardelli, G. and Sassi, M. and Goggi, S.}, TITLE = {Open Grey for Language Technology: a ride on the network}, YEAR = {2012}, ABSTRACT = {The aim of this paper is to introduce the Open Access movement for Natural Language Processing (NLP) by means of a wide range of open access Grey Literature documentation available on the web. In 2008 Robert Dale, in the last issue of volume 35 of Computational Linguistics said: "There are a number of definitions of the term 'open access' in circulation, but almost all share the key principle that scientific literature should be freely available for all to read, download, copy, distribute, and use (with appropriate attribution) without restriction". At first glance it might seem that the Open Access movement has gradually become more influential in the field of language technology by building repositories accessible through the network. Today's digital archives are niches of intellectual production spread by means of a wide range of documents (such as journal articles and proceedings) which, paradoxically, the search engines do not always reach. The use of inappropriate terms in the formulation of queries and the fragmentation of repositories in this area of investigation does not allow to retrieve information on a large scale. The full paper, after a first introductory section, will be organized in two sections: 1) the first dedicated to the methodology for searching and tracing open access resources and to the criteria for analyzing and selecting the online documentation; 2) the second devoted to a description of the state-of-the-art of Open Access Grey Literature material in a statistical and thematic scenario. As things stand, standardization of computational systems interconnected by links and tools of various nature allowing Internet users to easily retrieve the information that the web naturally makes available would then be essential. Topics: Sustainability, Public Accessible Resources, Product and Service enhancements, Open Access, Curation and Preservation}, KEYWORDS = {Open Access Movement Natural Language Processing}, PAGES = {89-94}, URL = {https://publications.cnr.it/doc/220806}, VOLUME = {14}, CONFERENCE_NAME = {GL14 Fourteenth International Conference on Grey Literature (GL14)}, CONFERENCE_PLACE = {National Research Council, Rome, Italy}, CONFERENCE_DATE = {29-30 November 2012}, } @INPROCEEDINGS{PICCINI_2012_INPROCEEDINGS_PRG_282584, AUTHOR = {Piccini, S. and Ruimy, N. and Giovannetti, E.}, TITLE = {Structuring a specific domain: an electronic thesaurus of Ferdinand de Saussure's terminology}, YEAR = {2012}, ABSTRACT = {Although Ferdinand de Saussure's work has been disseminated all over the world, his thought was mainlyreconstructed and interpreted by his students and disciples. His authentic writings are however of paramount importance as they highlight the complexity of the philosophical and semiological system Saussure developed and the attention he payed to linguistic terminology. The great Genevan linguist himself often used a peculiar vocabulary. He forged neologisms or conferred a new meaning to existing words, used a few terms ephemerally, changed the denotation of some concepts over the years, so to create his own idiosyncratic terminology, which is now at the base of modern linguistics. In this paper, we present the first electronic thesaurus-lexicon of the Saussurean linguistic terminology that is being developed in the framework of the ongoing Italian project 'For a digital edition of Ferdinand de Saussure's manuscripts'. The lexicon population consists of an updated nomenclature, encompassing not only the terminology gathered by two famous Saussurean scholars, Robert Godel and Rudolf Engler, but also new terms extracted from recently found manuscripts under study. In the lexicon, the overall structure of Saussure's terminology is made explicit and the semantic import of its component terms as well as the nature and relevance of their relationships are defined. The lexical model, on which the thesaurus is grounded, is a customized version of the SIMPLE model, a de facto standard in the domain of Computational Lexicography that enables a highly structured representation of lexical knowledge. The first stage of the customization process consisted in designing and translating into the Web Ontology Language OWL the central component of the SIMPLE_FdS model, i.e. a domain-specific lexical ontology that would structure Saussure's terminology. The two other building blocks of the root model, i.e. a network of semantic relations and a set of semantic features, were adapted in order to meet the requirements of the domain of interest. The expressive means deemed suitable for our domain were maintained whereas additional specific features and relations were created to account for term properties and relationships peculiar to the conceptual organization of this domain of knowledge. So far, 375 terms were endowed with a rich semantic description. In the extended paper, examples of lexical entries will be provided and many interesting possibilities of lexical investigation will be pointed out. Information retrieval queries may in fact be formulated using any single piece of information encoded, be it a semantic relation, a feature or a lexical unit. Such an electronic thesaurus-lexicon, based on a multidimensional structuring of concepts and a large network of semantic relations among terms, is therefore, in our opinion, a most valuable lexical research tool. It is our deep conviction that it will contribute to better master some of Saussure's reflections and to gain a wider understanding of the overall domain terminology. It might therefore help shading light on original aspectsof the author's thought.}, KEYWORDS = {Saussure, terminologia, lessici computazionali}, PAGES = {1}, URL = {https://publications.cnr.it/doc/282584}, CONFERENCE_NAME = {45th Annual Meeting of the Societas Linguistica Europaea (SLE)}, CONFERENCE_PLACE = {Stoccolma}, CONFERENCE_DATE = {29/08/2012-01/09/2012}, BOOKTITLE = {45th Annual Meeting of the Societas Linguistica Europae, Book of Abstracts}, EDITOR = {Cornillie, B. and Pascual, M. S. S.}, } @INPROCEEDINGS{PIOGGIA_2012_INPROCEEDINGS_PBNFATFSGDTM_284777, AUTHOR = {Pioggia, G. and Billeci, L. and Narzisi, A. and Farruggio, V. and Arnao, A. and Tartarisco, G. and Ferro, M. and Siracusano, R. and Germanò, E. and Deodato, M. and Tortorella, G. and Muratori, F.}, TITLE = {PRIMA PIETRA: Research, Integration, Enhancement, Assistance and Education Program for Autism Services and Rehabilitation Technologies}, YEAR = {2012}, ABSTRACT = {It is commonly recognized that autism spectrum disorder (ASD) symptoms are as early as 12 months of age and that the best outcomes are often achieved through early diagnosis and early intervention. However, there are many challenges to delivering health care to parents with a child with ASD. Difficulties to service delivery and utilization are more intensified for families living in suburban or remote areas, often resulting in limited access to preventative mental health services in general and parenting ASD interventions in particular. As Vismara an Rogers suggested (Vismara, 2010), the use of technology could support long-distance clinical health care. PRIMA PIETRA Italian project is focused on early diagnosis and intervention providing Early Start Denver Model (Dawson et al., 2009) using tele-rehabilitation. PRIMA PIETRA is a collaborative project supported by the Minister of Health of the Sicilian Region, in collaboration with Basilicata and Tuscany Regions.}, KEYWORDS = {autism spectrum disorder, pervasive healthcare, early diagnosis}, PAGES = {4}, URL = {https://imfar.confex.com/imfar/2012/webprogram/Paper10070.html}, CONFERENCE_NAME = {International Meeting for Autism Research}, CONFERENCE_PLACE = {Toronto, Canada}, CONFERENCE_DATE = {18 May 2012}, } @INPROCEEDINGS{PIRRELLI_2012_INPROCEEDINGS_P_288047, AUTHOR = {Pirrelli, V.}, TITLE = {At the core of lexical processing: computational and neurocognitive issues}, YEAR = {2012}, ABSTRACT = {The lexicon lies at the root of our linguistic competence and represents a fundamental interface domain between language and our conceptualisation of the outside world. In traditional conceptions of the language architecture, the lexicon has been generally characterised as a declarative memory store of static building blocks, with rules providing the basic principles and constraints on their on-line procedural combination. The talk deals with some recent computational models of self-organising memories and neuroimaging evidence of the connectivity of the perisylvian network for language processing and working memory located in the left hemisphere of the human brain, to suggest a different conception of the mental lexicon and its role in the architecture of language.}, KEYWORDS = {Memory, Mental Lexicon, Neurocognitive correlates}, URL = {http://hnk.ffzg.hr/fassbl2012/}, CONFERENCE_NAME = {8th International Conference Formal Approaches to South Slavic and Balkan Languages (FASSBL-8)}, CONFERENCE_PLACE = {Dubrovnik, Croatia}, CONFERENCE_DATE = {19-21 settembre 2012}, } @INPROCEEDINGS{PIRRELLI_2012_INPROCEEDINGS_P_288106, AUTHOR = {Pirrelli, V.}, TITLE = {Hebbian Self-Organizing Memories for Lexical Recoding and Processing}, YEAR = {2012}, ABSTRACT = {Hebbian self-organizing memories (Pirrelli et al. 2010, Ferro et al. 2011, Koutnik 2007) can provide a rigorous and testable conceptual framework within which to unify diverse functional hypotheses for lexical acquisition and processing, and to clarify how these hypotheses may be explained computationally. I discuss a few desiderata that any biologically-inspired computational model of the mental lexicon has to meet, and report on how well such desiderata are met by different types of Hebbian self-organizing memories, exhibiting empirically different maturational trends in lexical acquisition.}, KEYWORDS = {Self-organising Maps, Memory, Word Processing}, URL = {https://publications.cnr.it/doc/288106}, CONFERENCE_NAME = {Workshop on Exo-lexical variables in monolingual and bilingual morphological processing, IMM15}, CONFERENCE_PLACE = {Vienna}, CONFERENCE_DATE = {February 9-12, 2012}, } @INPROCEEDINGS{QUOCHI_2012_INPROCEEDINGS_Q_220828, AUTHOR = {Quochi, V.}, TITLE = {How predictive are grammatical constructions in Italian? The case of the caused-motion construction}, YEAR = {2012}, ABSTRACT = {Differently from English, Italian has a rich morpho logical system and a relative free word-order. For these reasons, the suitability of a "full-scope" constructional approach to Italian is not given. Although Goldberg's (1995, 2006) version of Construction grammar language is constructions all the way down (or up), one could still argue that in Italian, i.e. a language rich in morphology, abstract, grammatical constructions do not play a role, thus weakening the constructionist view. One of the strong points in favour of goldberg's approach is that argument structure constructions in English have been found to be highly predictive of sentence meaning (Goldberg et al. 2005), which provides a motivationfor their early acquisition by children. Many of such studies and evidences are still missing for Italian. This contribution will therefore attempt to start filling this gap by testing the predictive power of the Italian Caused Motion Construction. Data is taken from the CHILDES database (MacWhinney 2000) and annotated according to constructional properties and verb meaning. The annotation is then used to calculate the Cue and Category Validity (Murphy 2002) of both the Construction and the main verbs, which measures their predictive power (i.e respectively their reliability and availability) in relation to the overall sentence meaning. Results show that the Italian Caused Motion Construction is not only more reliable than verbs as a predictor of overall sentence meaning, but it is also more available.}, KEYWORDS = {Construction Grammar Psicolinguistica Linguistica del corpus}, PAGES = {265-265}, URL = {http://www.sle2012.eu/downloads/Book_abstracts_SLE2012_23aug_final.pdf}, CONFERENCE_NAME = {45th Annual Meeting of the Societas Linguistica Europaea (SLE2012)}, CONFERENCE_PLACE = {Stoccolma, Svezia}, CONFERENCE_DATE = {29/8-1/9 2012}, } @INPROCEEDINGS{SORIA_2012_INPROCEEDINGS_S_226389, AUTHOR = {Soria, C.}, TITLE = {Voices of Italy: a project for the preservation of Italian language diversity}, YEAR = {2012}, ABSTRACT = {This submission will bring to discussion a project idea that addresses documentation and preservation of regional languages of Italy. Italy holds a unique position in Europe, with around 40 languages spoken and 31 of them in danger. Yet, little is being done for fostering their preservation, and little (if any) is the perception by the general public of the proportions of the phenomenon of language endangerment. The result of the project should be an audiovisual digital archive for storing samples of regional languages. User s will interact with the archive through a range of so called new technologies: not only a web portal for accessing, browsing and searching information, but also mobile devices Apps, a YouTube channel, social networking platforms, etc. for contributing language material. Since these new media are widespread among the Italian population, the youngest one in particular (36 million people in Italy have a connection to the Internet, and 9,3 millions from a mobile device, with an increase of 74% in a year) it seems feasible to crowdsource the linguistic material to populate the archive, thereby offering an engaging experience that would boost involvement of speakers' communities into the documentation and preservation endeavour. It would be interesting to discuss the feasibility of this project under the current technological and cultural constraints.}, PAGES = {17-17}, URL = {https://publications.cnr.it/doc/226389}, CONFERENCE_NAME = {Language Endangerment: Methodologies and New Challenges}, CONFERENCE_PLACE = {Cambridge (UK)}, CONFERENCE_DATE = {06/07/2012}, BOOKTITLE = {Language Endangerment: Methodologies and New Challenges}, } @INPROCEEDINGS{SORIA_2012_INPROCEEDINGS_S_226380, AUTHOR = {Soria, C.}, TITLE = {Voices of Italy: a project for the preservation of Italian language diversity}, YEAR = {2012}, URL = {https://publications.cnr.it/doc/226380}, CONFERENCE_NAME = {Language Endangerment: Methodologies and New Challenges}, CONFERENCE_PLACE = {Cambridge (UK)}, CONFERENCE_DATE = {06/07/2012}, } @INPROCEEDINGS{SORIA_2012_INPROCEEDINGS_SZ_317628, AUTHOR = {Soria, C. and Zoli, C.}, TITLE = {New markets for Language Technology for minority languages}, YEAR = {2012}, ABSTRACT = {Language Technology offers significant opportunities for minority languages and can be a major force in addressing and alleviating some of the difficulties they face. For minority languages in particular, speech and language technology are a powerful means to bring together speakers' communities, to have a major impact on language learning support, to promote inclusion of elderly or impaired people and to foster widespread use of a language through digital means. In this talk, we will be presenting first the main outcomes of the research carried out by the META-NET project, resulting in the publication of the White Paper Series "Europe's Languages in the Digital Age". The series, that reports on the state of each European language with respect to Language Technology, offers an updated synthesis of the most urgent risks and chances faced, in particular, by less-serviced languages. The presentation will then provide concrete examples of LT solutions for minority languages, discussing their potential impact on those languages, in particular with regard to their role for language maintenance and preservation in the eyes of the younger, digitally-oriented generation.}, KEYWORDS = {Language technology, NLP, minority languages}, URL = {https://publications.cnr.it/doc/317628}, CONFERENCE_NAME = {3e Symposium sur le Multilinguisme dans le Cyberespace}, CONFERENCE_PLACE = {Parigi}, CONFERENCE_DATE = {21/11/2012-23/11/2012}, } @INPROCEEDINGS{SPADONI_2012_INPROCEEDINGS_STLRTSO_220325, AUTHOR = {Spadoni, F. and Tartarelli, A. and Loparco, L. and Rossi, R. and Tariffi, F. and Sassolini, E. and Ongaro, P.}, TITLE = {SMARTCITY: CUSTOMIZED AND DYNAMIC MULTIMEDIA CONTENT PRODUCTION FOR TOURISM APPLICATIONS}, YEAR = {2012}, ABSTRACT = {This paper presents the final results of the SMARTCITY project, co-funded by the Tuscany Region under the POR CREO 1.d program. The project proposes an innovative methodology as well as advanced technologies enabling professional services for cultural tourism applications in urban areas as well larger archaeological sites.}, KEYWORDS = {Tourism application Dynamic Multimedia Content Production Semantic Annotation}, URL = {https://publications.cnr.it/doc/220325}, CONFERENCE_NAME = {Smartcity: Customized and dynamic multimedia content production for tourism applications (EVA 2012 Florence)}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {9-10-11 maggio 2012}, } @TECHREPORT{ALIPRANDI_2012_TECHREPORT_ABBFLMPS_221743, AUTHOR = {Aliprandi, C. and Bacciu, C. and Bartolini, R. and Frontini, F. and Lapolla, N. and Marchetti, A. and Piccinonno, F. and Soru, T.}, TITLE = {Specifiche architetturali e funzionali}, YEAR = {2012}, ABSTRACT = {Questo documento contiene le specifiche funzionali ed architetturali del sistema GLOSS elaborate come risultato dell'obiettivo operativo 1. Tali specifiche debbono essere di riferimento per tutte le fasi di sviluppo dei vari componenti del sistema stesso e della loro integrazione in un prototipo dimostrativo. Ad una breve introduzione che richiama gli obiettivi generali del progetto, seguono: 1. La descrizione delle funzionalità suddivisa nelle varie fasi che compongono il flusso operativo di GLOSS. 2. La descrizione dell'architettura del sistema da realizzare nella quale si fornisce lo schema dell'integrazione dei vari componenti, il protocollo di comunicazione e memorizzazione dei dati che viene trattato più nel dettaglio nel documento D1.2 GAF - Gloss Annotation Format, e la descrizione di ciascun componente del sistema. Per sua natura, questo documento sarà soggetto a revisione durante tutto il periodo di sviluppo del sistema. Questa prima versione deve intendersi come guida per l'implementazione ed ha lo scopo di fornire a chi partecipa a questo progetto una visione generale delle funzionalità di GLOSS e come queste dovranno essere integrate nel prototipo dimostratore.}, KEYWORDS = {GLOSS specifiche funzionali}, URL = {https://publications.cnr.it/doc/221743}, } @TECHREPORT{BOZZI_2012_TECHREPORT_BGBNMPRD_390781, AUTHOR = {Bozzi, A. and Giovannetti, E. and Boschetti, F. and Nahli, O. and Marchi, S. and Piccini, S. and Ruimy, N. and Del Grosso, A. M.}, TITLE = {Greek into Arabic: contents, technologies and (humanistic and scientific) applications of a new software}, YEAR = {2012}, ABSTRACT = {This contribution aims to describe the methodological approach to Digital Philology by means of the G\&A Web Application. It also shows running examples for the: 1) Visualization and ordering of parallel texts subdivided in pericopes; 2) Linguistic annotations; 3) Scholarly comments; and 4)Search functions}, KEYWORDS = {digital philology, computational philology, software engineering, Greek into Arabic, Computational linguistics}, URL = {https://publications.cnr.it/doc/390781}, } @TECHREPORT{CALZOLARI_2012_TECHREPORT_C_288663, AUTHOR = {Calzolari, N.}, TITLE = {Multilingual Semantic Web and the challenges of Open Language Data}, YEAR = {2012}, ABSTRACT = {Language Technology (LT) is a data-intensive field and major breakthroughs have stemmed from a better use of more and more Language Resources (LRs). LRs and Open/Shared Language Data is therefore a great topic! New approaches are needed, both for Data and Meta-Data (LRs and Meta- LRs). My topics are linked to the layer of LRs and language services that serve LT, and especially open information on LRs and on research results. How can Linked Data contribute?}, KEYWORDS = {Language Resources (LRs)}, PAGES = {28-30}, URL = {http://drops.dagstuhl.de/opus/volltexte/2013/3788/pdf/dagrep_v002_i009_p015_s12362.pdf}, DOI = {10.4230/DagRep.2.9.15}, } @TECHREPORT{CHIARELLA_2012_TECHREPORT_C_221747, AUTHOR = {Chiarella, D.}, TITLE = {Virtualizzazione di reti geografiche}, YEAR = {2012}, ABSTRACT = {In this increasingly connected world the spread of the Internet is, in fact, revolutionizing the way of conceiving applications that gradually tend to migrate to the network to be accessible wherever you are. The design must be done with a different point of view taking into account that these applications must respond to requests from anywhere in the world and not just from local networks. The basic problem is that an app designed to operate over a WAN is actually developed within a Local Area Network (LAN). In this way it is difficult to objectively assess its performance given the fact that a local network has very different characteristics from those of a Wide Area Network (WAN) in terms of bandwidth, latency, packet loss, etc.. Often the loss of performance which can be observed in the transition from the development environment, in which tests are run, to that of production is to be attributed in a decisive manner to the type of connections. It becomes essential to have a tool by which to try the possible scenarios. In the course of this technical report we will see how to install and use a software to simulate the use of an application-level WAN.}, KEYWORDS = {virtualizzazione, web application performance, testing, emulatore WAN}, URL = {https://publications.cnr.it/doc/221747}, } @TECHREPORT{DELGRATTA_2012_TECHREPORT_DMTALRBP_484488, AUTHOR = {Del Gratta, R. and Monachini, M. and Tesconi, M. and Abrate, M. and Lo Duca, A. and Rimell, L. and Bel, N. and Padró, M.}, TITLE = {D6. 4 Lexical Merger}, YEAR = {2012}, ABSTRACT = {This document describes the experiments on the merging of lexical resources performed during the project and the development of two merging components for LMF lexicons}, KEYWORDS = {LFM, Lexical Merger}, PAGES = {1-39}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D6.4.pdf}, } @TECHREPORT{DELGROSSO_2012_TECHREPORT_DM_391001, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Il trattamento digitale dei manoscritti di F. de Saussure}, YEAR = {2012}, ABSTRACT = {Il contributo presenta i risultati del progetto PRIN2008 Per un'edizione digitale dei manoscritti di Ferdinand de Saussure", finanziato dal Ministero dell'Istruzione, dell'Università e della Ricerca italiano per il biennio 2009-2011.}, KEYWORDS = {prin, saussure, digital humanities, computational philology, digital philology}, URL = {https://publications.cnr.it/doc/391001}, } @TECHREPORT{DELGROSSO_2012_TECHREPORT_DB_221671, AUTHOR = {Del Grosso, A. and Boschetti, F.}, TITLE = {Parallel OCR for Ancient Greek Critical Editions}, YEAR = {2012}, ABSTRACT = {This project is focused on the parallelization of OCR processes applied to Ancient Greek critical editions. Two experiments have been performed. The first experiment is related to parameters differently tuned on the nodes of the grid, in order to identify the best combination that improves the accuracy of the recognition. The second experiment concerns the application of OCR with the best parameters on sample pages by a divide et impera strategy. Results related to the performances of the parallelization are discussed.}, KEYWORDS = {OCR Ancient Greek Parallelization}, URL = {http://www.hpc-europa.eu/files/2012/Hum_1263_DEL%20GROSSO%20Angelo%20Mario.pdf}, } @TECHREPORT{MARZI_2012_TECHREPORT_M_221751, AUTHOR = {Marzi, C.}, TITLE = {Neuroimaging: mania, revolution, or technological evolution? A critical review}, YEAR = {2012}, ABSTRACT = {Imaging has become an increasingly important tool in both research and clinical care. A range of neuroimaging technologies provide unprecedented sensitivity to visualisation of brain structure (i.e. anatomy) and function (i.e. physiology) from the level of individual molecules to the whole brain. Many imaging methods are non-invasive and allow dynamic processes to be monitored over time. Imaging is enabling researchers to identify neural networks involved in cognitive processes; understand disease pathways; recognise and diagnose diseases early, when they are most effectively treated; and determine how therapies work. The cognitive neuroscience of higher order auditory processing has advanced enormously in a brief time, in large part benefiting from neuroimaging approaches. A significant amount of progress has been made, and much of it can be attributed to the possibilities for crossing boundaries afforded by neuroimaging tools. More sophisticated experiments combined with fMRI and EEG are helping to know what the brain is doing as people perform cognitive, emotional, and behavioural actions. MEG technology will allow linguists to explore how social interaction and sensorimotor experience affects the cortical processing of language in children; and the combination of behavioural and brain measures may enhance the certainty with which dyslexia can be predicted for a child and promote the possibility of preventive intervention.}, KEYWORDS = {Neuroimaging technologies, Cognitive neuroscience, Language studies}, URL = {https://publications.cnr.it/doc/221751}, } @TECHREPORT{POCH_2012_TECHREPORT_PHQDTTPB_221573, AUTHOR = {Poch, M. and Hamon, O. and Quochi, V. and Del Gratta, R. and Toral, A. and Thurmair, G. and Prokopidis, P. and Bel, N.}, TITLE = {D3. 4 Third version (v4) of the integrated platform and documentation}, YEAR = {2012}, ABSTRACT = {The deliverable describes the third and final version of the PANACEA platform.}, KEYWORDS = {infrastrutture Trattamento del linguaggio naturale}, URL = {https://publications.cnr.it/doc/221573}, } @TECHREPORT{PROKOPIDIS_2012_TECHREPORT_PPTPFRT_221582, AUTHOR = {Prokopidis, P. and Papavassiliou, V. and Toral, A. and Poch Riera, M. and Frontini, F. and Rubino, F. and Thurmair, G.}, TITLE = {D4. 5 Final Report on the Corpus Acquisition & Annotation subsystem and its components}, YEAR = {2012}, ABSTRACT = {PANACEA WP4 targets the creation of a Corpus Acquisition and Annotation (CAA) subsystem for the acquisition and processing of monolingual and bilingual language resources (LRs). The CAA subsystem consists of tools that have been integrated as web services in the PANACEA platform of LR production. D4.2 Initial functional prototype and documentation in T13 and D4.4 Report on the revised Corpus Acquisition \& Annotation subsystem and its components in T23 provided initial and updated documentation on this subsystem, while this deliverable presents the final documentation of the subsystem as it evolved after the third development cycle of the project. The deliverable is structured as follows. The Corpus Acquisition Component (i.e. the Focused Monolingual and Bilingual Crawlers (FMC/FBC)) is described in section 2. The final list of tools for corpus normalization (cleaning and de-duplication) is detailed in section 3. Section 4 provides documentation on all NLP tools included in the subsystem. Due to its nature, this deliverable aggregates considerable parts of all previous WP4 deliverables. The main new additions include a) new functionalities for, among others, crawling strategy, de-duplication, and detection of parallel document pairs; and b) new NLP tools for syntactic analysis, named entity recognition, tweet processing and anonymization.}, KEYWORDS = {Corpus Acquisition}, URL = {http://www.jotform.com/uploads/fabioaffeilc/30222975566357/225350067351490116/PANACEA}, } @TECHREPORT{QUOCHI_2012_TECHREPORT_QFBHPPBTTK_221616, AUTHOR = {Quochi, V. and Frontini, F. and Bartolini, R. and Hamon, O. and Poch Riera, M. and Padro, M. and Bel, N. and Thurmair, G. and Toral, A. and Kamran, A.}, TITLE = {D7. 4 Third evaluation report. Evaluation of PANACEA v3 and produced resources}, YEAR = {2012}, ABSTRACT = {D7.4 reports on the evaluation of the different components integrated in the PANACEA third cycle of development as well as the final validation of the platform itself. All validation and evaluation experiments follow the evaluation criteria already described in D7.1. The main goal of WP7 tasks was to test the (technical) functionalities and capabilities of the middleware that allows the integration of the various resource-creation components into an interoperable distributed environment (WP3) and to evaluate the quality of the components developed in WP5 and WP6. The content of this deliverable is thus complementary to D8.2 and D8.3 that tackle advantages and usability in industrial scenarios. It has to be noted that the PANACEA third cycle of development addressed many components that are still under research. The main goal for this evaluation cycle thus is to assess the methods experimented with and their potentials for becoming actual production tools to be exploited outside research labs. For most of the technologies, an attempt was made to re-interpret standard evaluation measures, usually in terms of accuracy, precision and recall, as measures related to a reduction of costs (time and human resources) in the current practices based on the manual production of resources. In order to do so, the different tools had to be tuned and adapted to maximize precision and for some tools the possibility to offer confidence measures that could allow a separation of the resources that still needed manual revision has been attempted. Furthermore, the extension to other languages in addition to English, also a PANACEA objective, has been evaluated. The main facts about the evaluation results are now summarized.}, KEYWORDS = {PANACEA, evaluation, machine translation}, URL = {http://hdl.handle.net/10230/22533}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQ_221631, AUTHOR = {Rimell, L. and Bel, N. and Padró, M. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {D6. 2 Integrated Final Version of the Components for Lexical Acquisition}, YEAR = {2012}, ABSTRACT = {The PANACEA project has addressed one of the most critical bottlenecks that threaten the development of technologies to support multilingualism in Europe, and to process the huge quantity of multilingual data produced annually. Any attempt at automated language processing, particularly Machine Translation (MT), depends on the availability of language-specific resources. Such Language Resources (LR) contain information about the language's lexicon, i.e. the words of the language and the characteristics of their use. In Natural Language Processing (NLP), LRs contribute information about the syntactic and semantic behaviour of words - i.e. their grammar and their meaning - which inform downstream applications such as MT. To date, many LRs have been generated by hand, requiring significant manual labour from linguistic experts. However, proceeding manually, it is impossible to supply LRs for every possible pair of European languages, textual domain, and genre, which are needed by MT developers. Moreover, an LR for a given language can never be considered complete nor final because of the characteristics of natural language, which continually undergoes changes, especially spurred on by the emergence of new knowledge domains and new technologies. PANACEA has addressed this challenge by building a factory of LRs that progressively automates the stages involved in the acquisition, production, updating and maintenance of LRs required by MT systems. The existence of such a factory will significantly cut down the cost, time and human effort required to build LRs. WP6 has addressed the lexical acquisition component of the LR factory, that is, the techniques for automated extraction of key lexical information from texts, and the automatic collation of lexical information into LRs in a standardized format. The goal of WP6 has been to take existing techniques capable of acquiring syntactic and semantic information from corpus data, improving upon them, adapting and applying them to multiple languages, and turning them into powerful and flexible techniques capable of supporting massive applications. One focus for improving the scalability and portability of lexical acquisition techniques has been to extend exiting techniques with more powerful, less "supervised" methods. In NLP, the amount of supervision refers to the amount of manual annotation which must be applied to a text corpus before machine learning or other techniques are applied to the data to compile a lexicon. More manual annotation means more accurate training data, and thus a more accurate LR. However, given that it is impractical from a cost and time perspective to manually annotate the vast amounts of data required for multilingual MT across domains, it is important to develop techniques which can learn from corpora with less supervision. Less supervised methods are capable of supporting both large-scale acquisition and efficient domain adaptation, even in the domains where data is scarce. Another focus of lexical acquisition in PANACEA has been the need of LR users to tune the accuracy level of LRs. Some applications may require increased precision, or accuracy, where the application requires a high degree of confidence in the lexical information used. At other times a greater level of coverage may be required, with information about more words at the expense of some degree of accuracy. Lexical acquisition in PANACEA has investigated confidence thresholds for lexical acquisition to ensure that the ultimate users of LRs can generate lexical data from the PANACEA factory at the desired level of accuracy.}, KEYWORDS = {Lexical Acquisition}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D6.2.pdf}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQD_221650, AUTHOR = {Rimell, L. and Bel, N. and Padró, M. and Frontini, F. and Monachini, M. and Quochi, V. and Del Gratta, R.}, TITLE = {D6. 5 Merged dictionaries}, YEAR = {2012}, ABSTRACT = {This document presents the merged dictionaries delivered in PANACEA. Those dictionaries result from merging already existing lexica, generally for general domain, with domain specific lexica acquired using PANACEA platform. The domain specific lexica are presented and delivered in D6.3 and the merging repository that allowed the multilevel merging in D6.4.}, KEYWORDS = {merged dictionaries, computational lexicon}, URL = {http://www.panacea-lr.eu//en/deliverables/list}, } @TECHREPORT{RIMELL_2012_TECHREPORT_RBPFMQD_221755, AUTHOR = {Rimell, L. and Bel, N. and Padrò, M. and Frontini, F. and Monachini, M. and Quochi, V. and Del Gratta, R.}, TITLE = {D6. 3 Monolingual lexica for English, Spanish and Italian tuned for a particular domain (LAB and ENV)}, YEAR = {2012}, ABSTRACT = {This document presents the lexica acquired using PANACEA platform for Labour and Environment domains. The languages of the lexica are English, Spanish and Italian. The lexical information acquired depends on the language, according to the available tools in the platform.}, KEYWORDS = {Lexicon Acqusition}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D6.3.pdf}, } @TECHREPORT{TARTARISCO_2012_TECHREPORT_TBCFP_221736, AUTHOR = {Tartarisco, G. and Baldus, G. and Corda, D. and Ferro, M. and Pioggia, G.}, TITLE = {Decision Support Processing Architecture}, YEAR = {2012}, ABSTRACT = {This report presents the design and im-plementation of the INTERSTRESS Deci-sion Support System (DSS). The goal of the DSS is to assess the psychological state of each patient by analyzing the previously acquired knowledge, such as patient's physiological and behavioural profile, and current sensory data. Starting from such information, the DSS then infers physiological and behavioural markers of stress.}, KEYWORDS = {decision support system multimodal analysis artificial neural networks Bayesian models machine learning}, URL = {https://publications.cnr.it/doc/221736}, } @TECHREPORT{VISINTAINER_2012_TECHREPORT_VMCKCPTF_221683, AUTHOR = {Visintainer, F. and Muro, M. and Carlino, A. and Kalogirou, K. and Contreras, J. and Pioggia, G. and Tartarisco, G. and Ferro, M.}, TITLE = {Two vehicle demonstrators for elderly drivers support}, YEAR = {2012}, KEYWORDS = {elderly support biometrics sensing seat}, URL = {https://publications.cnr.it/doc/221683}, } @MISC{BOSCHETTI_2012_MISC_B_221568, AUTHOR = {Boschetti, F.}, TITLE = {Iperspazi del mondo mediterraneo}, YEAR = {2012}, ABSTRACT = {Investigation in parallel of multilingual semantic spaces.}, KEYWORDS = {semantic spaces}, URL = {https://publications.cnr.it/doc/221568}, } @MISC{DELGROSSO_2012_MISC_DMMP_390653, AUTHOR = {Del Grosso, A. M. and Marchi, S. and Murano, F. and Pesini, L.}, TITLE = {Banca dati testuale Codifica Théorie des sonantes}, YEAR = {2012}, ABSTRACT = {Banca dati testuale XML della Théorie des sonantes edito dalla Marchese nel 2002.}, KEYWORDS = {PRIN, Saussure, Digital philology, Computational philology, Digital Humanities}, URL = {http://licodemo.ilc.cnr.it:8080/Saussure_Wapp/controlPanelView.xhtml}, } @MISC{MARZI_2012_MISC_M_221565, AUTHOR = {Marzi, C.}, TITLE = {Seminario sulla rete europea della struttura della parola (NetWordS)-Dottorato in Linguistica, UniPV}, YEAR = {2012}, ABSTRACT = {Words are the basic building block of lòanguage productivity, establishing the most immediate connections between language and our conceptualisation of world, and they represent compelx interface units, which are not only part of larger constructions but are themselves made up of simpler sublecxical costituents. A better understanding of the human strategies involved in learning and processing word structures lies at the heart of our comprehension of the basic mechanism seving language and cognition.}, KEYWORDS = {Word Structure}, URL = {http://studiumanistici.unipv.it/?pagina=p\&titolo=ling-Marzihomepage}, } @ARTICLE{BARCA_2011_ARTICLE_BFP_203361, AUTHOR = {Barca, L. and Frascarelli, F. and Pezzulo, G.}, TITLE = {Working memory and Mental Imagery in Cerebral Palsy: A single case investigation}, YEAR = {2011}, ABSTRACT = {In this study we describe visuospatial working memory and visual mental imagery of a child with Cerebral Palsy. Beyond a moderate impairment of visuomotor integration skills, cognitive level and memory span, poor performance emerged in figures reconstruction, in memorizing matrix patterns and movements along a path. No such deficits were observed in recalling figures and their positions on a grid and learning groups of words using a visual imagery strategy. This case highlights that impaired action execution impairs performance in imagery tasks as well, but not when alternative strategies (e.g., verbal encoding) can be adopted. Results are discussed considering recent evidence on working memory and visual imagery links, and their role in motor rehabilitation training.}, KEYWORDS = {Cerebral palsy, Visuospatial Working Memory, Visual Imagery, Motor rehabilitation training, Dorsal stream vulnerability}, PAGES = {1-7}, URL = {https://publications.cnr.it/doc/203361}, DOI = {10.1080/13554794.2011.588183}, PUBLISHER = {Oxford University Press (Oxford, Regno Unito)}, ISSN = {1355-4794}, JOURNAL = {Neurocase (Oxf., Print)}, } @ARTICLE{CARDUCCI_2011_ARTICLE_CASCC_30889, AUTHOR = {Carducci, A. and Alfani, S. and Sassi, M. and Cinini, A. and Calamusa, A.}, TITLE = {Mass media health information: Quantitative and qualitative analysis of daily press coverage and its relation with public perceptions}, YEAR = {2011}, ABSTRACT = {Objective: This paper describes the methods followed by the Pisa University OCS for collecting, storing and analyzing all health-related articles and database contents. Moreover, an example population survey on the topic of food safety based on such analysis is shown. Methods: Articles published each day since 1999 in Italy's three most popular newspapers are collected and stored in a Data Base Text; on these articles quantitative and qualitative analyses were conducted. On the basis of these results as well as of epidemiological data, a questionnaire survey was carried out about sources of information, knowledge and risk perception of citizens regarding food safety. Results: On a total of 24,434 articles on all health topics, 18% regarded food related hazards: their evolution over time showed peaks on BSE, avian flu and dioxin. A large proportion of the people surveyed declared having changed their food habits, at least temporarily, as a consequence of media information. Most get their information on food safety mainly from television. Most respondents remembered having previously heard news on BSE, avian flu and dioxin, but did not recall having heard of listeriosis, brucellosis or typhoid fever. Conclusions: Newspapers articles facing food related hazards tend to be alarming thus affecting the citizens risk perception. On the other hand people often ignore how to manage their own food safety in a practical way. Practice implications: Analysis of media messages can help to evaluate and correct the negative effects that may result in wrong information.}, KEYWORDS = {Risk perception, Food safety, Mass media Communication, Population survey}, PAGES = {475-478}, URL = {http://www.sciencedirect.com/science/article/pii/S0738399111000061}, VOLUME = {Volume 82, Issue 3}, DOI = {10.1016/j.pec.2010.12.025}, PUBLISHER = {Excerpta Medica (Princeton, N. J, Stati Uniti d'America)}, ISSN = {0738-3991}, JOURNAL = {Patient education and counseling}, } @ARTICLE{CHERSI_2011_ARTICLE_CFPP_205122, AUTHOR = {Chersi, F. and Ferro, M. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Time, Language and Action-A Unified Long-Term Memory Model for Sensory-Motor Chains and Word Schemata}, YEAR = {2011}, ABSTRACT = {Action and language are known to be organized as closely-related brain subsystems. An Italian CNR project implemented a computational neural model where the ability to form chains of goal-directed actions and chains of linguistic units relies on a unified memory architecture obeying the same organizing principles.}, PAGES = {27-28}, URL = {http://ercim-news.ercim.eu/images/stories/EN84/EN84-web.pdf}, VOLUME = {84}, PUBLISHER = {ERCIM (Le Chesnay)}, ISSN = {0926-4981}, JOURNAL = {ERCIM news}, } @ARTICLE{FERRO_2011_ARTICLE_FMP_205180, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {A Self-Organizing Model of Word Storage and Processing: Implications for Morphology Learning}, YEAR = {2011}, ABSTRACT = {In line with the classical cornerstone of "dual-route" models of word structure, assuming a sharp dissociation between memory and computation, word storage and processing have traditionally been modelled according to different computational paradigms. Even the most popular alternative to dual-route thinking - connectionist one-route models - challenged the lexicon-grammar dualism only by providing a neurally-inspired mirror image of classical base-to-inflection rules, while largely neglecting issues of lexical storage. Recent psycho- and neuro-linguistic evidence, however, supports a less deterministic and modular view of the interaction between stored word knowledge and on-line processing. We endorse here such a non modular view on morphology to offer a computer model supporting the hypothesis that they are both derivative of a common pool of principles for memory self-organization.}, KEYWORDS = {Lexical Processing, Self Organizing Maps, Morphological Structure, Serial Memory}, PAGES = {209-226}, URL = {http://www.rivisteweb.it/doi/10.1418/35840}, VOLUME = {2}, DOI = {10.1418/35840}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{MARZI_2011_ARTICLE_MPS_186118, AUTHOR = {Marzi, C. and Pardelli, G. and Sassi, M.}, TITLE = {A terminology based re-definition of Grey Literature}, YEAR = {2011}, ABSTRACT = {The conventionally accepted definition of Grey Literature, as Information produced and distributed by non-commercial publishing, does not take into consideration either the increasing availability of forms of grey knowledge, or the growing importance of computerbased encoding and management as the standard mode of creating and developing grey literature. Semi-automated terminological analysis of almost twenty years of terminological creativity in the proceedings of eleven GL International Conferences offers the opportunity to pave the way to a bottom-up redefinition of Grey Literature stemming from attested terminological creativity and lexical innovation. In this paper, we focus on a set of automatically-acquired terms obtained by subjecting our reference Corpus to a number of pre-processing steps of automated text analysis, such as concordances, frequency lists and lexical association scores. Acquired terms allow us to throw in sharp relief developing trends and important shifts of emphasis in the current understanding of the notion of Grey Literature.}, KEYWORDS = {Grey Literature, Terminology extraction}, PAGES = {19-23}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84869064979\&origin=inward}, VOLUME = {7}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{NAHLI_2011_ARTICLE_N_288551, AUTHOR = {Nahli, O.}, TITLE = {Yaḥyā ibn ‘Adī sulla differenza fra la logica greca e la grammatica araba}, YEAR = {2011}, ABSTRACT = {?is paper examines the Treatise on the Difference between the two fields of philosophical logic and Arabic grammar (Maq?la f? taby?n al-fa?l bayna ?ina'atay al-man?iq al-falsaf? wa-l-na?w al-'arab?) by Ab? Zakariy?' Ya?y? ibn 'Ad?, providing also its Italian translation. It will appear that Ya?y? ibn 'Ad?'s approach is based on F?r?b?'s ideas about the relationship between logic and the sciences of language. Even more important is the fact that the difference established by Ya?y? ibn 'Ad?'s between logic and grammar both as for the subject (maw??') and as for scope (?arad) counts as the source for Avicenna's distinction between subject (maw??') and scope (?arad) of the metaphysics}, PAGES = {47-67}, URL = {http://www.greekintoarabic.eu/uploads/media/3wafae_utlimo.pdf}, VOLUME = {1}, PUBLISHER = {Pacini Editore (Pisa, Italia)}, ISSN = {2281-2687}, JOURNAL = {Studia graeco-arabica}, } @ARTICLE{PEZZULO_2011_ARTICLE_P_205147, AUTHOR = {Pezzulo, G.}, TITLE = {Grounding Procedural and Declarative Knowledge in Sensorimotor Anticipation}, YEAR = {2011}, ABSTRACT = {We propose a view of embodied representations that is alternative to both symbolic/linguistic approaches and purely sensorimotor views of cognition, and can account for procedural and declarative knowledge manipulation. In accordance with recent evidence in cognitive neuroscience and psychology, we argue that anticipatory and simulative mechanisms, which arose during evolution for action control and not for cognition, determined the first form of representational content and were exapted for increasingly sophisticated cognitive uses. In particular, procedural and declarative forms of knowledge can be explained, respectively, in terms of on-line sensorimotor anticipation and off-line simulations of potential actions, which can give access to tacit knowledge and make it explicit. That is, mechanisms that evolved for the on-line prediction of the consequences of one's own actions (i.e. forward models) determine a (procedural) form of representation, and became exapted for off-line use. They can therefore be used to produce (declarative) knowledge of the world, by running a simulation of the action that would produce the relevant information. We conclude by discussing how embodied representations afford a form of internal manipulation that can be described as internalized situated action.}, KEYWORDS = {anticipation, simulation, representation, internal model, grounding}, PAGES = {78-114}, URL = {https://publications.cnr.it/doc/205147}, VOLUME = {26}, PUBLISHER = {Basil Blackwell (Oxford, Regno Unito)}, ISSN = {0268-1064}, JOURNAL = {Mind \& language (Print)}, } @ARTICLE{PEZZULO_2011_ARTICLE_PBCN_201339, AUTHOR = {Pezzulo, G. and Baldassarre, G. and Cesta, A. and Nolfi, S.}, TITLE = {Research on Cognitive Robotics at the Institute of Cognitive Sciences and Technologies, National Research Council of Italy}, YEAR = {2011}, ABSTRACT = {ISTC-CNR is a research hub in cognitive robotics as this is the main research focus of several research labs working within it. The interdisciplinary approach used is one of the key characteristics of cognitive robotics studies at ISTC-CNR. This research involves over 30 people (among researchers, Post-Docs, and PhD students) having different backgrounds (ranging from engineering and computer science to psychology, neuroscience, and philosophy) and pursuing research objectives as diverse as (a) the use of computational and robotic models to investigate psychological and neural phenomena, (b) the realization of novel paradigms for robot learning, control, planning, decision making, team making, and human-robot interaction, and (c) the delivery of novel autonomous robotic technologies that act in real-world scenarios.}, KEYWORDS = {Cognitive Robotics, Cognitive Sistems}, PAGES = {367-374}, URL = {https://publications.cnr.it/doc/201339}, VOLUME = {12}, DOI = {10.1007/s10339-011-0402-3}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {1612-4782}, JOURNAL = {Cognitive processing (Print)}, } @ARTICLE{PEZZULO_2011_ARTICLE_PBCFMS_205229, AUTHOR = {Pezzulo, G. and Barsalou, L. W. and Cangelosi, A. and Fischer, M. H. and McRae, K. and Spivey, M. J.}, TITLE = {The mechanics of embodiment: A dialogue on embodiment and computational modeling}, YEAR = {2011}, ABSTRACT = {Embodied theories are increasingly challenging traditional views of cognition by arguing that conceptual representations that constitute our knowledge are grounded in sensory and motor experiences, and processed at this sensorimotor level, rather than being represented and processed abstractly in an amodal conceptual system. Given the established empirical foundation, and the relatively underspecified theories to date, many researchers are extremely interested in embodied cognition but are clamoring for more mechanistic implementations. What is needed at this stage is a push toward explicit computational models that implement sensorimotor grounding as intrinsic to cognitive processes. In this article, six authors from varying backgrounds and approaches address issues concerning the construction of embodied computational models, and illustrate what they view as the critical current and next steps toward mechanistic theories of embodiment. The first part has the form of a dialog between two fictional characters: Ernest, the "experimenter," and Mary, the "computational modeler." The dialog consists of an interactive sequence of questions, requests for clarification, challenges, and (tentative) answers, and touches the most important aspects of grounded theories that should inform computational modeling and, conversely, the impact that computational modeling could have on embodied theories. The second part of the article discusses the most important open challenges for embodied computational modeling.}, KEYWORDS = {rounded cognition, embodiment, simulation, cognitive robotics, computational modeling}, PAGES = {1-21}, URL = {http://www.frontiersin.org/cognition/10.3389/fpsyg.2011.00005/abstract}, VOLUME = {2}, DOI = {10.3389/fpsyg.2011.00005}, PUBLISHER = {Frontiers media (Lausanne, Svizzera)}, JOURNAL = {Frontiers in Psychology}, } @ARTICLE{PEZZULO_2011_ARTICLE_PC_205233, AUTHOR = {Pezzulo, G. and Calvi, G.}, TITLE = {Computational explorations of perceptual symbol system theory}, YEAR = {2011}, ABSTRACT = {The aim of this paper is twofold. First, we provide a methodological pathway from theories of situated, embodied cognition to simulations with an eye to empirical evidence, and suggest a possible cross-fertilization between cognitive robotics and psychology. Psychological theories, in particular those formulated at an abstract level, include models which are often severely underspecified at the level of mechanisms. This is true in the synchronic, constructive perspective (how can the effects observed in experiments be concretely generated by the model's mechanisms?) and in the diachronic, developmental perspective (how can such mechanisms be learned and developed?). The synthetic method of artificial cognitive systems research, and in particular of cognitive robotics, can complement research in psychology (and neurosciences) by exploring the constructive and developmental aspects of theories. Our second aim is to provide an example of such a methodology by describing simulations aiming at developing a perceptual symbol system (PSS) (Barsalou, 1999). We then describe the two main theoretical constructs of the PSS, perceptual symbols and simulators, illustrate their development in an artificial system, and test the system in prediction, categorization, and abstraction tasks.}, KEYWORDS = {Perceptual symbol systems Schemas Embodiment Anticipation Simulation}, PAGES = {275-297}, URL = {http://www.sciencedirect.com/science/article/pii/S0732118X09000336}, VOLUME = {29}, DOI = {10.1016/j.newideapsych.2009.07.004}, PUBLISHER = {Pergamon Press (New York, Regno Unito)}, ISSN = {0732-118X}, JOURNAL = {New ideas in psychology}, } @ARTICLE{PEZZULO_2011_ARTICLE_PD_205167, AUTHOR = {Pezzulo, G. and Dindo, H.}, TITLE = {What should I do next? Using shared representations to solve interaction problems}, YEAR = {2011}, ABSTRACT = {Studies on how the social mind" works reveal that cognitive agents engaged in joint actions actively estimate and in uence another's cognitive variables, and form shared representations with them. (How) do shared rep- resentations enhance coordination? In this paper we provide a probabilistic model of joint action that emphasizes how shared representations help solv- ing interaction problems. We focus on two aspects of the model. First, we discuss how shared representations permit to coordinate at the level of cog- nitive variables (beliefs, intentions and actions), and determine a coherent unfolding of execution and predictive processes in the brains of two agents. Second, we discuss the importance of signaling actions as part of a strategy for sharing representations and the active guidance of another's actions to- wards the achievement of a joint goal. Furthermore, we present data from a human-computer experiment (the Tower Game) in which two agents (human and computer) have to build together a tower made of colored blocks, but only the human knows the constellation of the tower to be built (e.g., red- blue-red-blue-. . . ). We report evidence that humans use signaling strategies that take another's uncertainty into consideration, and that in turn our model is able to use humans' actions as cues to align" its representations and to select complementary actions.}, KEYWORDS = {prediction, joint action, signaling}, PAGES = {613-630}, URL = {https://publications.cnr.it/doc/205167}, VOLUME = {211}, DOI = {10.1007/s00221-011-2712-1}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {1432-1106}, JOURNAL = {Experimental brain research (Internet)}, } @ARTICLE{PEZZULO_2011_ARTICLE_PR_205174, AUTHOR = {Pezzulo, G. and Rigoli, F.}, TITLE = {The value of foresight: how prospection affects decision-making}, YEAR = {2011}, ABSTRACT = {Traditional theories of decision-making assume that utilities are based on the intrinsic value of outcomes; in turn, these values depend on associations between expected outcomes and the current motivational state of the decision-maker. This view disregards the fact that humans (and possibly other animals) have prospection abilities, which permit anticipating future mental processes and motivational and emotional states. For instance, we can evaluate future outcomes in light of the motivational state we expect to have when the outcome is collected, not (only) when we make a decision. Consequently, we can plan for the future and choose to store food to be consumed when we expect to be hungry, not immediately. Furthermore, similarly to any expected outcome, we can assign a value to our anticipated mental processes and emotions. It has been reported that (in some circumstances) human subjects prefer to receive an unavoidable punishment immediately, probably because they are anticipating the dread associated with the time spent waiting for the punishment. This article offers a formal framework to guide neuroeconomic research on how prospection affects decision-making. The model has two characteristics. First, it uses model-based Bayesian inference to describe anticipation of cognitive and motivational processes. Second, the utility-maximization process considers these anticipations in two ways: to evaluate outcomes (e.g., the pleasure of eating a pie is evaluated differently at the beginning of a dinner, when one is hungry, and at the end of the dinner, when one is satiated), and as outcomes having a value themselves (e.g., the case of dread as a cost of waiting for punishment). By explicitly accounting for the relationship between prospection and value, our model provides a framework to reconcile the utility-maximization approach with psychological phenomena such as planning for the future and dread.}, KEYWORDS = {prediction, prospection, decision-making}, PAGES = {1-15}, URL = {http://www.frontiersin.org/decision_neuroscience/10.3389/fnins.2011.00079/abstract}, VOLUME = {5}, DOI = {10.3389/fnins.2011.00079}, PUBLISHER = {Frontiers Research Foundation (Lausanne, Svizzera)}, ISSN = {1662-453X}, JOURNAL = {Frontiers in neuroscience (Online)}, } @ARTICLE{THOMPSON_2011_ARTICLE_TMMCDLMMPQRSVRA_205232, AUTHOR = {Thompson, P. and McNaught, J. and Montemagni, S. and Calzolari, N. and Del Gratta, R. and Lee, V. and Marchi, S. and Monachini, M. and Pezik, P. and Quochi, V. and Rupp, C. and Sasaki, Y. and Venturi, G. and Rebholz Schuhmann, D. and Ananiadou, S.}, TITLE = {The BioLexicon: a large-scale terminological resource for biomedical text mining}, YEAR = {2011}, ABSTRACT = {Background Due to the rapidly expanding body of biomedical literature, biologists require increasingly sophisticated and efficient systems to help them to search for relevant information. Such systems should account for the multiple written variants used to represent biomedical concepts, and allow the user to search for specific pieces of knowledge (or events) involving these concepts, e.g., protein-protein interactions. Such functionality requires access to detailed information about words used in the biomedical literature. Existing databases and ontologies often have a specific focus and are oriented towards human use. Consequently, biological knowledge is dispersed amongst many resources, which often do not attempt to account for the large and frequently changing set of variants that appear in the literature. Additionally, such resources typically do not provide information about how terms relate to each other in texts to describe events. Results This article provides an overview of the design, construction and evaluation of a large-scale lexical and conceptual resource for the biomedical domain, the BioLexicon. The resource can be exploited by text mining tools at several levels, e.g., part-of-speech tagging, recognition of biomedical entities, and the extraction of events in which they are involved. As such, the BioLexicon must account for real usage of words in biomedical texts. In particular, the BioLexicon gathers together different types of terms from several existing data resources into a single, unified repository, and augments them with new term variants automatically extracted from biomedical literature. Extraction of events is facilitated through the inclusion of biologically pertinent verbs (around which events are typically organized) together with information about typical patterns of grammatical and semantic behaviour, which are acquired from domain-specific texts. In order to foster interoperability, the BioLexicon is modelled using the Lexical Markup Framework, an ISO standard. Conclusions The BioLexicon contains over 2.2 M lexical entries and over 1.8 M terminological variants, as well as over 3.3 M semantic relations, including over 2 M synonymy relations. Its exploitation can benefit both application developers and users. We demonstrate some such benefits by describing integration of the resource into a number of different tools, and evaluating improvements in performance that this can bring.}, KEYWORDS = {Text Mining, Information Extraction, Computational Lexicon}, PAGES = {1-29}, URL = {http://www.biomedcentral.com/1471-2105/12/397}, VOLUME = {12}, DOI = {10.1186/1471-2105-12-397}, PUBLISHER = {BioMed Central ([London], Regno Unito)}, ISSN = {1471-2105}, JOURNAL = {BMC bioinformatics}, } @ARTICLE{VALENZA_2011_ARTICLE_VPAFSD_329628, AUTHOR = {Valenza, G. and Pioggia, G. and Armato, A. and Ferro, M. and Scilingo, E. P. and De Rossi, D.}, TITLE = {A neuron-astrocyte transistor-like model for neuromorphic dressed neurons}, YEAR = {2011}, ABSTRACT = {Experimental evidences on the role of the synaptic glia as an active partner together with the bold synapse in neuronal signaling and dynamics of neural tissue strongly suggest to investigate on a more realistic neuron-glia model for better understanding human brain processing. Among the glial cells, the astrocytes play a crucial role in the tripartite synapsis, i.e. the dressed neuron. A well-known two-way astrocyte-neuron interaction can be found in the literature, completely revising the purely supportive role for the glia. The aim of this study is to provide a computationally efficient model for neuron-glia interaction. The neuron-glia interactions were simulated by implementing the Li-Rinzel model for an astrocyte and the Izhikevich model for a neuron. Assuming the dressed neuron dynamics similar to the nonlinear input-output characteristics of a bipolar junction transistor, we derived our computationally efficient model. This model may represent the fundamental computational unit for the development of real-time artificial neuron-glia networks opening new perspectives in pattern recognition systems and in brain neurophysiology.}, KEYWORDS = {Bio-computational architectures for signal processing, Neuron, Astrocyte, Synapse, Neuron-astrocyte interaction model}, PAGES = {679-685}, URL = {http://www.sciencedirect.com/science/article/pii/S0893608011000979}, VOLUME = {24}, DOI = {10.1016/j.neunet.2011.03.013}, PUBLISHER = {Pergamon (New York, Stati Uniti d'America)}, ISSN = {0893-6080}, JOURNAL = {Neural networks}, } @ARTICLE{VENTURI_2011_ARTICLE_V_320343, AUTHOR = {Venturi, G.}, TITLE = {Semantic annotation of Italian legal texts: a FrameNet-based approach}, YEAR = {2011}, PAGES = {46-79}, URL = {https://publications.cnr.it/doc/320343}, VOLUME = {3}, DOI = {10.1075/cf.3.1.02ven}, PUBLISHER = {Benjamins (Amsterdam, Paesi Bassi)}, ISSN = {1876-1933}, JOURNAL = {Constructions and frames (Print)}, } @BOOK{RATTI_2011_BOOK_RMOM_317996, AUTHOR = {Ratti, D. and Marconi, L. and Oyee, J. B. and Mosuy, P. N.}, TITLE = {Diccionario Fang-Español Español-Fang}, YEAR = {2011}, ABSTRACT = {Un diccionario es un libro que habla de las palabras, que explica el sentido de las palabras, no es un libro que se lee como un cuento, sino un libro que se abre para buscar información, para aclarar dudas, para saber cómo se dice en fa? o en español una determinada palabra. Un diccionario bilingüe además explica el sentido de la palabra de una lengua a la otra y funciona como instrumento para aprender a comunicarse en una lengua nueva y también para afianzar el conocimiento de su propia lengua. Este diccionario tiene la ambición de representar, como lengua escrita, las dos variedades del fang de Guinea, el oka y el ntumu, y contiene 8773 entradas repartidas en 3531 comunes a oka y ntumu, 2690 entradas solo del oka y 2552 solo del ntumu. Además, se presenta como un instrumento de fácil consulta, tanto para los hablantes de fang, como para todo el que quiera aprenderlo. Para la realización de esta obra se han tenido que tomar muchas opciones en el ámbito de la ortografía, de las entradas, de la clasificación de las palabras, de las definiciones, etc., y , por lo tanto, este diccionario se propone como una hipótesis que los hablantes de Guinea y los académicos nativos podrán y tendrán que corregir. El uso de la lengua a lo largo del tiempo sancionará la validez de las reglas y las convenciones o determinará cómo cambiarlas. El conocimiento de cualquier fenómeno no viene dado por la solución de cualesquiera dudas o experiencias, sino por el desarrollo o la negación de lo que ya se conoce: nunca se pasa de la perfecta ignorancia al perfecto conocimiento. El diccionario está dividido en dos partes, la primera contiene las palabras fang como entrada y la traducción española. La segunda parte contiene las palabras españolas con la correspondiente traducción en fang.}, KEYWORDS = {dizionario, lingua fang, spagnolo}, PAGES = {1148}, URL = {https://publications.cnr.it/doc/317996}, PUBLISHER = {2CT Asociación para la Conservación de la Cultura tribal (Pieve Ligure, ITA)}, ISBN = {978-88-906102-0-2}, } @INCOLLECTION{BUTZ_2011_INCOLLECTION_BP_205245, AUTHOR = {Butz, M. V. and Pezzulo, G.}, TITLE = {Anticipatory learning}, YEAR = {2011}, ABSTRACT = {Anticipatory learning is sometimes considered synonymous with the general mechanism of learning to generate predictions or learning a predictive or forward model of an encountered environment or problem. However, the term anticipation usually does not simply refer to predictions, but rather to predictions that are expected to be relevant to an organism and that are used to effectively adapt decisions and behaviors of organisms. Therefore, anticipatory learning is not merely about learning to predict, but learning to predict those aspects that are relevant for the learning system. Such predictions may start on a very low sensorimotor level, such as learning how body movements feel in order to be able to focus on other sensory information. On a higher level, action-dependent contingencies may be learned that are highly useful for decision making ...}, KEYWORDS = {Curious learning, Ideo-motor principle of learning, Learning of predictions, Sensorimotor learning}, PAGES = {263-266}, URL = {http://www.springerreference.com/docs/html/chapterdbid/319709.html}, PUBLISHER = {Springer (Dordrecht, NLD)}, ISBN = {978-1-4419-1427-9}, BOOKTITLE = {Encyclopedia of the Sciences of Learning}, } @INCOLLECTION{CALZOLARI_2011_INCOLLECTION_C_322399, AUTHOR = {Calzolari, N.}, TITLE = {Linguistica Computazionale e Risorse Linguistiche}, YEAR = {2011}, ABSTRACT = {A partire dagli anni '80 le cosiddette Risorse Linguistiche (RL) hanno progressivamente acquisito un ruolo sempre più importan­ te nella linguistica computazionale e nel Trattamento Automatico della Lingua (TAL). Questo processo è iniziato con il diffondersi degli approcci data-driven , all'interno dei quali le RL hanno con­ tribuito in modo decisivo allo sviluppo di sistemi innovativi e so­ prattutto "robusti", cioè in grado di trattare i fenomeni reali della lingua così come usata dai parlanti piuttosto che i fenomeni ritenuti "interessanti" dal linguista. Si è cominciato allora a riconoscere il ruolo infrastrutturale delle RL: Antonio Zampolli è stato il primo a comprendere questo ruolo e a lui dobbiamo anche l'introduzione del termine stesso "risorse linguistiche" ...}, KEYWORDS = {Linguistica Computazionale, Risorse Linguistiche RL, Standard}, PAGES = {32-64}, URL = {https://publications.cnr.it/doc/322399}, PUBLISHER = {Città Nuova (Roma, ITA)}, ISBN = {978-88-311-3505-4}, BOOKTITLE = {Scienze informatiche e biologiche. Epistemologia e ontologia}, EDITOR = {Cicchese, G. and Pettorossi, A. and Reghizzi, S. C. and Senni, V.}, } @INCOLLECTION{DELLORLETTA_2011_INCOLLECTION_DMVV_138775, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Vecchi, E. M. and Venturi, G.}, TITLE = {Tecnologie linguistico-computazionali per il monitoraggio della competenza linguistica italiana degli alunni stranieri nella scuola primaria e secondaria}, YEAR = {2011}, ABSTRACT = {La possibilità di disporre di tecnologie avanzate e innovative che permettano di monitorare la competenza linguistica degli alunni stranieri e, al contempo, valutare l'adeguatezza dei materiali didattici a loro offerti può essere di supporto all'insegnante nell'orientare la propria azione formativa, rendendo così il processo di integrazione linguistico-culturale meno faticoso e traumatico. In tale ottica, questo studio, realizzato col supporto di una piattaforma ormai consolidata di metodi e strumenti per il trattamento automatico dell'italiano, costituisce il primo tentativo condotto in relazione alla lingua italiana, per mettere a punto una metodologia di monitoraggio linguistico rivolta specificamente agli studenti apprendenti la lingua italiana come L2 ed alle loro produzioni scritte.}, KEYWORDS = {Trattamento Automatico del Linguaggio, Stranieri, Lingua italiana}, PAGES = {319-336}, URL = {https://publications.cnr.it/doc/138775}, PUBLISHER = {Mc Graw-Hill (Milano, ITA)}, ISBN = {978-88-386-7296-5}, BOOKTITLE = {Percorsi Migranti}, EDITOR = {Bruno, G. C. and Caruso, I. and Sanna, M. and Vellecco, I.}, } @INCOLLECTION{HAYASHI_2011_INCOLLECTION_HDCMSB_205409, AUTHOR = {Hayashi, Y. and Declerck, T. and Calzolari, N. and Monachini, M. and Soria, C. and Buitelaar, P.}, TITLE = {Language Service Ontology}, YEAR = {2011}, ABSTRACT = {The Language Grid is a distinctive language service infrastructure in the sense that it accommodates a wide variety of user needs, ranging from technical novices to experts; language resource consumers to language resource providers. As these language services are various in type and each of them can be idiosyncratic in many aspects, the service infrastructure has to address the issue of interoperability. A key to solve this issue is not only to build the services around standardized resources and interfaces, but also to establish a knowledge structure that copes effectively with a range of language services. Given this knowledge structure, referred to as a service ontology, each language service can be systematically classified and its usage specified by a corresponding API. This not only enables the utilization of existing language resources but facilitates the dissemination of newly created language resources as services.}, KEYWORDS = {Language grid, ontology}, PAGES = {85-100}, URL = {https://publications.cnr.it/doc/205409}, DOI = {10.1007/978-3-642-21178-2_6}, PUBLISHER = {Springer-Verlag (Berlin/Heidelberg, DEU)}, ISBN = {978-3-642-21177-5}, BOOKTITLE = {The Language Grid}, EDITOR = {Ishida, T.}, } @INCOLLECTION{MARINELLI_2011_INCOLLECTION_M_206096, AUTHOR = {Marinelli, R.}, TITLE = {Costruzione di risorse terminologiche: criteri, risultati e prodotti}, YEAR = {2011}, ABSTRACT = {L'articolo parla della costruzione di tre lessici specialistici organizzati come database di tipo relazionale. I tre database contengono termini appartenenti a tre campi di conoscenza specifici: terminologia marittima (navigazione e trasporti marittimi), terminologia fiscale, terminologia sindacale e del lavoro. Il database di terminologia marittima è stato costruito sulla base del modello del database semantico lessicale ItalWordNet e la metodologia sperimentata è stata applicata per creare gli altri database. Essa fa riferimento i) alla gestione di corpora di linguaggi specialistici, ii) all'uso di database generici per identificare ed estrarre insiemi di parole, potenzialmente appartenenti a domini particolari, che possano essere inseriti nei database terminologici e iii) alla creazione del software e all'utilizzo del tool per la gestione dei database. Vorremo mettere in rilievo nella descrizione delle tre risorse terminologiche i vari tipi di relazioni semantico lessicali che legano ciascun termine agli altri concetti all'interno del singolo database di dominio, ai database generici di lingua Italiana (ItalWordNet) e Inglese (WordNet), e infine ai concetti dell'ontologia generale (Top Ontology) ereditata da IWN e ai concetti dell'ontologia di dominio.}, KEYWORDS = {lexical resources, terminology, lexical semantic databases}, PAGES = {129-146}, URL = {https://publications.cnr.it/doc/206096}, PUBLISHER = {EDUCatt Università Cattolica (Milano, ITA)}, ISBN = {978-88-8311-820-3}, BOOKTITLE = {Terminologie specialistiche e prodotti terminologici}, EDITOR = {Zanola, M. T. and Bonadonna, M. F.}, } @INCOLLECTION{OGNIBENE_2011_INCOLLECTION_OCP_205224, AUTHOR = {Ognibene, D. and Catenacci Volpi, N. and Pezzulo, G.}, TITLE = {Learning to grasp information with your own hands}, YEAR = {2011}, ABSTRACT = {Autonomous robots immersed in a complex world can seldom directly access relevant parts of the environment by only using their sensors. Indeed, finding relevant information for a task can require the execution of actions that explicitly aim at unveiling previously hidden information. Informativeness of an action depends strongly on the current environment and task beyond the architecture of the agent. An autonomous adaptive agent has to learn to exploit the epistemic (e.g., information-gathering) implications of actions that are not architecturally designed to acquire information (e.g. orientation of sensors). The selection of these actions cannot be hardwired as general-purpose information-gathering actions, because differently from sensor control actions they can have effects on the environment and can affect the task execution. In robotics information-gathering actions have been used in navigation [7]; in active vision [4]; and in manipulation [3]. In all these works the informative value of each action was known and exploited at design time while the problem of actively facing un-predicted state uncertainty has not received much}, KEYWORDS = {Artificial Intelligence, Robotics}, PAGES = {398-399}, URL = {https://publications.cnr.it/doc/205224}, VOLUME = {6856}, DOI = {10.1007/978-3-642-23232-9_46}, ISBN = {978-3-642-23231-2}, BOOKTITLE = {Towards Autonomous Robotic Systems: 12th Annual Conference, TAROS 2011}, EDITOR = {Groß, R. and Alboul, L. and Melhuish, C. and Witkowski, M. and Prescott, T. J. and Penders, J.}, } @INCOLLECTION{PEZZULO_2011_INCOLLECTION_PB_206104, AUTHOR = {Pezzulo, G. and Butz, M. V.}, TITLE = {Schema-based architectures of machine learning}, YEAR = {2011}, ABSTRACT = {Schema-based architectures (SBAs) consist of collections of modularly and hierarchically organized schemas, which constitute building blocks for perception, cognition, and action. An SBA organizes these schemas in such a way so that action selection, motor coordination, and cognition in the general sense interact effectively. SBAs were mainly inspired by theories of sensorimotor adaptation and cognitive development and learning. Particularly Jean Piaget's research on and theories of cognitive development in infants and children inspired the design of SBAs. Machine learning develops algorithms to learn, structure, and continuously adapt SBAs. Various forms of representations are used to develop SBAs, including symbolic representations, rule-based representations, as well as neural network representations.}, KEYWORDS = {Human Cognitive Architecture}, PAGES = {2942-2945}, URL = {http://www.springerreference.com/docs/html/chapterdbid/319710.html}, PUBLISHER = {Springer (Dordrecht, NLD)}, BOOKTITLE = {Encyclopedia of the Sciences of Learning}, EDITOR = {Seel, N. M.}, } @INCOLLECTION{ROBALDO_2011_INCOLLECTION_RCRG_205729, AUTHOR = {Robaldo, L. and Caselli, T. and Russo, I. and Grella, M.}, TITLE = {From Italian Text to TimeML Document via Dependency Parsing}, YEAR = {2011}, ABSTRACT = {This paper describes the first prototype for building TimeML xml documents starting from raw text for Italian. First, the text is parsed with the TULE parser, a dependency parser developed at the University of Turin. The parsed text is then used as input to the TimeML rule-based module we have implemented, henceforth called as 'The converter'. So far, the converter identifies and classifies events in the sentence. The results are rather satisfatory, and this leads us to support the use of dependency syntactic relations for the development of higher level semantic tools.}, KEYWORDS = {Parsing, TimeML}, PAGES = {177-187}, URL = {https://publications.cnr.it/doc/205729}, VOLUME = {6609}, PUBLISHER = {Springer-Verlag (Berlin/Heidelberg, DEU)}, ISBN = {978-3-642-19436-8}, EDITOR = {Gelbukh, A.}, } @INCOLLECTION{SASSI_2011_INCOLLECTION_SC_206089, AUTHOR = {Sassi, M. and Cinini, A.}, TITLE = {La banca dati dei provvedimenti della sezione disciplinare del Consiglio Superiore della Magistratura(1990-2007)}, YEAR = {2011}, KEYWORDS = {Linguistica Computazionale, Analisi Sentenze}, PAGES = {129-150}, URL = {https://publications.cnr.it/doc/206089}, PUBLISHER = {CLUEB (Bologna, ITA)}, ISBN = {978-88-491-3513-8}, EDITOR = {Fabri, M.}, } @EDITORIAL{CALZOLARI_2011_EDITORIAL_CBSGMQ_206410, AUTHOR = {Calzolari, N. and Baroni, P. and Soria, C. and Goggi, S. and Monachini, M. and Quochi, V.}, TITLE = {Proceedings of the 3rd European Language Resources and Technologies Forum: Language Resources in the Sharing Age-the Strategic Agenda}, YEAR = {2011}, ABSTRACT = {Proceedings of the third FLaReNet forum on the European Language Resources and Technologies, held in Venezia, at the Auditorium Santa Margherita of the Università Ca' Foscari, on 26-27 May 2011.}, KEYWORDS = {Language Resources, Language Technologies}, PAGES = {86}, URL = {http://www.flarenet.eu/sites/default/files/FLaReNet_Forum_2011_Proceedings.pdf}, } @INPROCEEDINGS{CALZOLARI_2011_INPROCEEDINGS_CDFR_205564, AUTHOR = {Calzolari, N. and Del Gratta, R. and Frontini, F. and Russo, I.}, TITLE = {The Language Library: Many Layers, More Knowledge}, YEAR = {2011}, ABSTRACT = {In this paper we outline the general concept of the Language Library, a new initiative that has the purpose of building a huge archive of structured colletion of linguistic information. The Language Library is conceived as a community built repository and as an environment that allows language specialists to share multidimensional and multi-level annotated/processed resources. The first steps towards its implementation are briefly sketched.}, KEYWORDS = {Language Resources, Language Library}, PAGES = {93-97}, URL = {https://publications.cnr.it/doc/205564}, ISBN = {978-974-466-564-5}, CONFERENCE_NAME = {Workshop on Language Resources, Technology and Services in the Sharing Paradigm}, CONFERENCE_PLACE = {Chiang Mai}, CONFERENCE_DATE = {12 Novembre 2011}, } @INPROCEEDINGS{CALZOLARI_2011_INPROCEEDINGS_CIPS_288988, AUTHOR = {Calzolari, N. and Ishida, T. and Piperidis, S. and Sornlertlamvanich, V.}, TITLE = {Introduction}, YEAR = {2011}, ABSTRACT = {Some of the current major initiatives in the area of language resources - FLaReNet (http://www.flarenet.eu/), Language Grid (http://langrid.nict.go.jp/en/index.html) and META-SHARE (www.meta-share.org, www.meta-net.eu) - have agreed to organise a joint workshop on infrastructural issues that are critical in the age of data sharing and open data, to discuss the state of the art, international cooperation, future strategies and priorities, as well as the road-map of the area. It is an achievement, and an opportunity for our field, that recently a number of strategic-infrastructural initiatives have started all over the world. It is also a sign that funding agencies recognise the strategic value of our field and the importance of helping a coherent growth also through a number of coordinated actions. Some of these initiatives, two European and one Asian, have agreed to join forces to foster a debate that may lead to future coordinated actions all over the world. o}, KEYWORDS = {Language Resources and Technologies}, PAGES = {viii-ix}, URL = {https://publications.cnr.it/doc/288988}, CONFERENCE_NAME = {Workshop on Language Resources, Technology and Services in the Sharing Paradigm, IJCNLP 2011}, CONFERENCE_PLACE = {Chiang Mai, Thailand}, CONFERENCE_DATE = {November 12, 2011}, EDITOR = {Calzolari, N. and Ishida, T. and Piperidis, S. and Sornlertlamvanich, V.}, } @INPROCEEDINGS{CALZOLARI_2011_INPROCEEDINGS_CMQ_205719, AUTHOR = {Calzolari, N. and Monachini, M. and Quochi, V.}, TITLE = {Interoperability Framework: The FLaReNet action plan proposal}, YEAR = {2011}, ABSTRACT = {Standards are fundamental to ex-change, preserve, maintain and integrate data and language resources, and as an essential basis of any language resource infrastructure. This paper promotes an Interoperability Framework as a dynamic environment of standards and guidelines, also intended to support the provision of language-(web)service interoperability. In the past two decades, the need to define common practices and formats for linguistic resources has been increasingly recognized and sought. Today open, collaborative, shared data is at the core of a sound language strategy, and standardisation is actively on the move. This paper first describes the current landscape of standards, and presents the major barriers to their adoption; then, it describes those scenarios that critically involve the use of standards and provide a strong motivation for their adoption; lastly, a series of actions and steps needed to operationalise standards and achieve a full interoperability for Language Resources and Technologies are proposed.}, KEYWORDS = {Language Resources, standards}, PAGES = {41-49}, URL = {https://publications.cnr.it/doc/205719}, ISBN = {978-974-466-564-5}, CONFERENCE_NAME = {Workshop on Language Resources, Technology and Services in the Sharing Paradigm}, CONFERENCE_PLACE = {Chiang Mai}, CONFERENCE_DATE = {12 Novembre 2011}, } @INPROCEEDINGS{CIGNONI_2011_INPROCEEDINGS_CFCF_205516, AUTHOR = {Cignoni, L. and Fornaciari, A. and Coschino, F. and Fornaciari, G.}, TITLE = {Step-by-step Organization of a University CLIL Course}, YEAR = {2011}, ABSTRACT = {This paper reports on the organization of CLIL (Content and Language Integrated Learning) University courses in funerary archaeology held at the Division of Palaeopathology of Pisa University. We outline the different steps involved in the practical implementation of the proposed approach, which include choice of topic, linguistic content, tasks and strategies, and we describe the ways in which CLIL can be used both in the classroom and in archaeological fieldwork excavations for teaching of the discipline and practical experience with leading scholars in the field. Each two-hour lesson slot is divided into four parts, devoted to both the receptive (reading, listening) and productive (writing, speaking) skills, which constantly expose the students to language, helping them understand the contents of the discipline. It is necessary to take into account the additional difficulties students attending the courses might have, which are due to their having to learn basic and academic language skills and new subject concepts at the same time. All the material relevant to the course is simplified and adapted to the needs and language of the students, who are supported by authentic materials in the form of text-books, articles, tutorials, illustrations, audio and video recordings, and by a number of activities ranging from gap-filling exercises, matching words with their definitions, jumbled sentences, sentence formation, preparation of posters, powerpoint demonstrations. The trainees are also involved in increasing an ongoing bilingual English-Italian glossary and contextualized English grammar. Working individually, then in pairs and in small groups, they are responsible for the different areas of the discipline. Funerary archaeology is the study of death, ancient burials and human skeletal remains, body disposal, etc., and includes skeleton anthropology, bone diagenesis, taphonomic anthropology, as well as other features comprising excavation phases, techniques and tools employed, field archaeology.}, KEYWORDS = {Funerary archaeology}, PAGES = {7}, URL = {https://publications.cnr.it/doc/205516}, ISBN = {978-88-7647-677-8}, CONFERENCE_NAME = {International Conference-ICT for Language Learning}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {20-21 ottobre 2011}, EDITOR = {Editore, S.}, } @INPROCEEDINGS{CIGNONI_2011_INPROCEEDINGS_CFF_205524, AUTHOR = {Cignoni, L. and Fornaciari, A. and Fornaciari, G.}, TITLE = {An English Grammar and a bilingual Glossary acting as complementary Tools for a CLIL-based Course}, YEAR = {2011}, ABSTRACT = {This paper describes how two complementary tools, an English grammar and a bilingual (Italian- English) glossary, can be expanded by University students attending a CLIL (Content and Language Integrated Learning) course in which a funerary archaeology lecturer, an English language instructor and an archaeologist work together, integrating content and language. This work is part of a wider project carried out at the Division of Palaeopathology, History of Medicine and Bioethics, and approved by the University of Pisa. Starting from a sample of Italian and English monographic texts and other publications in printed or electronic form dealing with the subject of funerary archaeology and other related research areas (anthropology, field archaeology, anatomy and chemistry, which can provide new insights into past civilizations, cultures and practices so far undiscovered), we have extracted separate, preliminary lists of specialized terms. The students working alone, in pairs or in groups, are asked to expand these lists, tracking down additional words with their definitions and example sentences drawn from other authoritative sources. The information with specification of the authors and detailed bibliographical references should be written in independent appropriately labelled files, and sent to the computer analyst responsible for the computer software editing. The reading of various definitions at different levels of depth will enable the user to understand better, have a clearer and more exhaustive picture of a particular word, concept, or phenomenon. The glossary, addressed to the students who are at the same time creators and users of the product, can also be of interest to professors, scholars or translators who need to dispose of the specialised terms of funerary archaeology in a language other than their own. Many of the definitions and other types of useful information can be exploited to illustrate the different grammar points and structures of an easy-to-use on-line English intermediate-level grammar book, to study the grammar not in isolation but in meaningful contexts and real-life situations, to encourage the learners to become active explorers of the language. This ongoing grammar can be a valuable resource for students with minimum linguistic knowledge and competence, but also be useful to those wishing to improve the English language, enhancing their learning proficiency. Implementation of the two complementary products - grammar and glossary - will proceed together, contributing to the learning of funerary archaeology on the part of the students, both learners and creators of the two tools. As we know, the possibilities offered by the computer in terms of space, links, cross-references, etc. make it possible to organize and customize the material, meeting as much as possible the users' needs. The technological tools increasingly available in the educational context support both the subject and language teacher in making the learning process easier and more engaging, helping clarify certain concepts in a non-traditional way in order to accomplish various instructional objectives.}, KEYWORDS = {funerary archaeology}, PAGES = {1834-1841}, URL = {https://publications.cnr.it/doc/205524}, ISBN = {978-84-615-0442-8}, CONFERENCE_NAME = {International Conference on Education and Development and New Learning Technologies}, CONFERENCE_PLACE = {Barcellona}, CONFERENCE_DATE = {4-6 Luglio 2011}, EDITOR = {Chova, L. G. and Belenguer, D. M. and Martínez, A. L.}, } @INPROCEEDINGS{CUTUGNO_2011_INPROCEEDINGS_CCZMM_264929, AUTHOR = {Cutugno, P. and Chiarella, D. and Zini, V. and Marconi, L. and Morgavi, G.}, TITLE = {CorLES: Corpus e Lessico Elementare Scritto. Realización de un corpus y léxico del italiano escrito en las escuelas}, YEAR = {2011}, PAGES = {935-938}, URL = {https://publications.cnr.it/doc/264929}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {9789597174196}, CONFERENCE_NAME = {XII Simposio Internacional de Comunicación Social: Comunicación Social en el siglo XXI}, CONFERENCE_PLACE = {SANTIAGO DE CUBA}, CONFERENCE_DATE = {17-21 gennaio 2011}, BOOKTITLE = {Comunicación Social en el siglo XXI-Vol. II}, EDITOR = {Ruiz Miyares, L. and Alvarez Silva, M. R.}, } @INPROCEEDINGS{CUTUGNO_2011_INPROCEEDINGS_CZCM_264928, AUTHOR = {Cutugno, P. and Zini, V. and Chiarella, D. and Marconi, L.}, TITLE = {To tell oneself on the web: the case of the teenager blog}, YEAR = {2011}, PAGES = {939-943}, URL = {https://publications.cnr.it/doc/264928}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {9789597174196}, CONFERENCE_NAME = {XII Simposio Internacional de Comunicación Social: Comunicación Social en el siglo XXI}, CONFERENCE_PLACE = {SANTIAGO DE CUBA}, CONFERENCE_DATE = {17-21 gennaio 2011}, BOOKTITLE = {Comunicación Social en el siglo XXI-Vol. II}, EDITOR = {Ruiz Miyares, L. and Alvarez Silva, M. R.}, } @INPROCEEDINGS{DELLORLETTA_2011_INPROCEEDINGS_DMV_205510, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {READ-IT: assessing readability of Italian texts with a view to text simplification}, YEAR = {2011}, ABSTRACT = {In this paper, we propose a new approach to readability assessment with a specific view to the task of text simplification: the intended audience includes people with low literacy skills and/or with mild cognitive impairment. READ-IT represents the first advanced readability assessment tool for what concerns Italian, which combines traditional raw text features with lexical, morpho-syntactic and syntactic information. In READ-IT readability assessment is carried out with respect to both documents and sentences where the latter represents an important novelty of the proposed approach creating the prerequisites for aligning the readability assessment step with the text simplification process. READ-IT shows a high accuracy in the document classification task and promising results in the sentence classification scenario.}, KEYWORDS = {Readability Assessment, Text Simplification}, PAGES = {73-83}, URL = {http://dl.acm.org/citation.cfm?id=2140511}, ISBN = {978-1-937284-14-5}, CONFERENCE_NAME = {SLPAT '11 Proceedings of the Second Workshop on Speech and Language Processing for Assistive Technologies}, CONFERENCE_PLACE = {Edimburgo, UK}, CONFERENCE_DATE = {30 Luglio 2011}, } @INPROCEEDINGS{DELLORLETTA_2011_INPROCEEDINGS_DVM_205505, AUTHOR = {Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {ULISSE: an unsupervised algorithm for detecting reliable dependency parses}, YEAR = {2011}, ABSTRACT = {In this paper we present ULISSE, an unsupervised linguistically--driven algorithm to select reliable parses from the output of a dependency parser. Different experiments were devised to show that the algorithm is robust enough to deal with the output of different parsers and with different languages, as well as to be used across different domains. In all cases, ULISSE appears to outperform the baseline algorithms.}, KEYWORDS = {Dependency Parsing, Selection of Reliable Parses, Unsupervised Algorithm}, PAGES = {115-124}, URL = {http://dl.acm.org/citation.cfm?id=2018950}, ISBN = {978-1-932432-92-3}, CONFERENCE_NAME = {CoNLL '11 Proceedings of the Fifteenth Conference on Computational Natural Language Learning}, CONFERENCE_PLACE = {Portland, Oregon, USA}, CONFERENCE_DATE = {23-24 Giugno 2011}, } @INPROCEEDINGS{DINDO_2011_INPROCEEDINGS_DZP_203805, AUTHOR = {Dindo, H. and Zambuto, D. and Pezzulo, G.}, TITLE = {Motor simulation via coupled internal models using sequential Monte Carlo}, YEAR = {2011}, ABSTRACT = {We describe a generative Bayesian model for action understanding in which inverse-forward internal model pairs are considered 'hypotheses' of plausible action goals that are explored in parallel via an approximate inference mechanism based on sequential Monte Carlo methods. The reenactment of internal model pairs can be considered a form of motor simulation, which supports both perceptual prediction and action understanding at the goal level. However, this procedure is generally considered to be computationally inefficient. We present a model that dynamically reallocates computational resources to more accurate internal models depending on both the available prior information and the prediction error of the inverse-forward models, and which leads to successful action recognition. We present experimental results that test the robustness and efficiency of our model in real-world scenarios.}, KEYWORDS = {prediction, simulation}, PAGES = {2113-2119}, URL = {https://publications.cnr.it/doc/203805}, PUBLISHER = {AAAI Press (Arlington [VA], USA)}, CONFERENCE_NAME = {Proceedings of the Twenty-Second International Joint Conference on Artificial Intelligence, Barcelona, Catalonia, Spain, 16-22 July 2011}, CONFERENCE_PLACE = {Barcelona}, CONFERENCE_DATE = {16-22 July 2011}, EDITOR = {Walsh, T.}, } @INPROCEEDINGS{EUGENIO_2011_INPROCEEDINGS_ES_205537, AUTHOR = {Eugenio, P. and Sassolini, E.}, TITLE = {The "Micro Semantics" for intelligent browsing}, YEAR = {2011}, ABSTRACT = {Study and development of methodologies to improve systems of "information retrieval". Our approach is based on the integration of techniques, originally created to disciplines such as philology, lexicography, literature, with linguistic and statistical tools for the extraction and analysis of information in the text. Also we experimented a special methodology, for the creation of specific semantic metadata for text materials. In this paper, we describe "SmartCity", a project in which we applied these strategies. The project aims at designing and developing multimedia content (audio-guide for the new generation of interactive media and off-line and on-line) for the use of custom-cultural tourist routes, both physical (in the context of museums and cities) and virtual.}, KEYWORDS = {Semantic Analysis, Information Retrieval, Text Mining}, PAGES = {117-123}, URL = {https://publications.cnr.it/doc/205537}, VOLUME = {4}, ISBN = {978-88-905639-8-0}, CONFERENCE_NAME = {5th International Congress on "Science and Technology for the Safeguard of Cultural Heritage in the Mediterranean Basin"}, CONFERENCE_PLACE = {Istanbul, Turkey}, CONFERENCE_DATE = {22-25 November 2011}, } @INPROCEEDINGS{FERRO_2011_INPROCEEDINGS_FMP_205490, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {T2HSOM: Understanding the Lexicon by Simulating Memory Processes for Serial Order}, YEAR = {2011}, ABSTRACT = {Over the last several years, both theoretical and empirical approaches to lexical knowledge and encoding have prompted a radical reappraisal of the traditional dichotomy between lexicon and grammar. The lexicon is not simply a large waste basket of exceptions and sub-regularities, but a dynamic, possibly redundant repository of linguistic knowledge whose principles of relational organization are the driving force of productive generalizations. In this paper, we overview a few models of dynamic lexical organization based on neural network architectures that are purported to meet this challenging view. In particular, we illustrate a novel family of Kohonen self-organizing maps (T2HSOMs) that have the potential of simulating competitive storage of symbolic time series while exhibiting interesting properties of morphological organization and generalization. The model, tested on training samples of as morphologically diverse languages as Italian, German and Arabic, shows sensitivity to manifold types of morphological structure and can be used to bootstrap morphological knowledge in an unsupervised way.}, KEYWORDS = {Mental Lexicon, Self-organizing Maps, Morphology}, PAGES = {32-41}, URL = {http://alpage.inria.fr/~sagot/woler2011/WoLeR2011/Program_%26_Proceedings.html}, CONFERENCE_NAME = {First International Workshop on Lexical Resources}, CONFERENCE_PLACE = {Ljubljana Slovenia}, CONFERENCE_DATE = {1-5 Agosto 2011}, BOOKTITLE = {First International Workshop on Lexical Resources}, EDITOR = {Sagot, B.}, } @INPROCEEDINGS{FRONTINI_2011_INPROCEEDINGS_FMGLPFAM_205601, AUTHOR = {Frontini, F. and Monachini, M. and Gavrilidou, M. and Labropoulou, P. and Piperidis, S. and Francopoulo, G. and Arranz, V. and Mapelli, V.}, TITLE = {A Metadata Schema for the Description ofLanguage Resources (LRs)}, YEAR = {2011}, ABSTRACT = {This paper presents the metadata schema for describing language resources (LRs) currently under development for the needs of META-SHARE, an open distributed facility for the exchange and sharing of LRs. An essential ingredient in its setup is the existence of formal and standardized LR descriptions, cornerstone of the interoperability layer of any such initiative. The description of LRs is granular and abstractive, combining the taxonomy of LRs with an inventory of a structured set of descriptive elements, of which only a minimal subset is obligatory; the schema additionally proposes recommended and optional elements. Moreover, the schema includes a set of relations catering for the appropriate inter-linking of resources. The current paper presents the main principles and features of the metadata schema, focusing on the description of text corpora and lexical / conceptual resources.}, KEYWORDS = {metadata, language resources}, PAGES = {84-92}, URL = {https://publications.cnr.it/doc/205601}, ISBN = {978-974-466-564-5}, CONFERENCE_NAME = {Workshop on Language Resources, Technology and Services in the Sharing Paradigm}, CONFERENCE_PLACE = {Chiang Mai}, CONFERENCE_DATE = {12 Novembre 2011}, } @INPROCEEDINGS{GIOVANNETTI_2011_INPROCEEDINGS_GM_282633, AUTHOR = {Giovannetti, E. and Marchi, S.}, TITLE = {Cross-Language Boosting in Pattern-based Semantic Relation Extraction from Text}, YEAR = {2011}, ABSTRACT = {In this work we propose a novel technique called "Cross-Language Boosting" (C-LB), aimed at increasing the accuracy of pattern-based semantic relation extraction systems: given a pair of terms expressed in a "Target Language" (e.g. in Italian), we can translate the terms in a "Support Language" (e.g. in English) and apply the translated term pair to reliable lexico-syntactic patterns expressed in that language to increase the accuracy of the system. Experiments have been conducted by comparing the results obtained by the SemRelEx system, a hybrid unsupervised system for semantic relation extraction from texts, with and without the support of the C-LB technique, applied to a set of candidate semantically related term pairs automatically extracted from a corpus in the History of Art domain.}, KEYWORDS = {Computational Linguistics, Cross Language, semantic relation extraction systems, Ontology Learning from Text}, PAGES = {29-36}, URL = {https://web.archive.org/web/20121101020859/http://www.proceedings2011.cla-conf.info/}, ISBN = {9788360810477}, CONFERENCE_NAME = {Computational Linguistics Application Conference-CLA 2011}, CONFERENCE_PLACE = {Jachranka, Poland}, CONFERENCE_DATE = {17-19 ottobre 2011}, BOOKTITLE = {Proceedings of the Computational Linguistics-Applications Conference}, EDITOR = {Jassem, K. and Fuglewicz, P. and Piasecki, M. and Przepiorkowski, A.}, } @INPROCEEDINGS{MARINELLI_2011_INPROCEEDINGS_MS_205527, AUTHOR = {Marinelli, R. and Spadoni, G. P.}, TITLE = {An organized set of generic and specialized lexicons}, YEAR = {2011}, ABSTRACT = {The paper describes the construction of three specialized lexicons organized as databases of a relational type. The databases contain terms belonging to different knowledge fields: the first is a database of maritime terminology (technical-nautical and maritime transport domain) (MDB); in the other two databases terms belonging to the knowledge field of taxation law (TDB) and to the domain of labour law and union labour rules (LDB) are codified. The database of maritime terminology (MDB) was built first, on the basis of the EuroWorNet (EWN) and ItalWordNet (IWN) model, using lexical semantic relations to codify terms, within the framework of the Princeton WordNet (WN) philosophy; it includes about 4000 lemmas. The other two databases (1600 and 1500 lemmas respectively) were structured following similar criteria, in keeping with the methods already successfully experimented to create the MDB and also to enhance it with a subset of terms belonging to the scientific domain of Meteorology, that is an organized set of research phases constituting a true methodology to create terminological databases. These phases are herewith described: i) the corpus approach; ii) the generic database approach. Then the whole terminological network is outlined, highlighting the different kinds of relations linking the terminological and the generic resources.}, KEYWORDS = {Linguistic resources, lexical semantic databases, terminology}, PAGES = {944-947}, URL = {http://www.santiago.cu/hosting/linguistica/simposios.php?id=en\&s=12th}, VOLUME = {2}, PUBLISHER = {Centro de linguística aplicada, Ministerio de ciencia, tecnología y medio ambiente (Santiago de Cuba, CUB)}, ISBN = {978-959-7174-19-6}, CONFERENCE_NAME = {12th International Symposium on Social Communication}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {17-21 gennaio 2011}, } @INPROCEEDINGS{MARZI_2011_INPROCEEDINGS_MPS_176389, AUTHOR = {Marzi, C. and Pardelli, G. and Sassi, M.}, TITLE = {A terminology based re-definition of Grey Literature}, YEAR = {2011}, ABSTRACT = {The conventionally accepted definition of Grey Literature, as Information produced and distributed by non-commercial publishing, does not take into consideration either the increasing availability of forms of grey knowledge, or the growing importance of computer-based encoding and management as the standard mode of creating and developing grey literature. Semi-automated terminological analysis of almost twenty years of terminological creativity in the proceedings of eleven GL International Conferences offers the opportunity to pave the way to a bottom-up redefinition of Grey Literature stemming from attested terminological creativity and lexical innovation. In this paper, we focus on a set of automatically-acquired terms obtained by subjecting our reference Corpus to a number of pre-processing steps of automated text analysis, such as concordances, frequency lists and lexical association scores. Acquired terms allow us to throw in sharp relief developing trends and important shifts of emphasis in the current understanding of the notion of Grey Literature.}, KEYWORDS = {GL conference corpus, Grey literature definition, Terminology extraction}, PAGES = {27-31}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84883303651\&origin=inward}, VOLUME = {12}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISSN = {1386-2316}, ISBN = {9789077484166}, CONFERENCE_NAME = {Twelfth International Conference on Grey Literature: Trasparency in Grey Literature, Grey Tech Approaches to High Tech Issues}, CONFERENCE_PLACE = {Praga}, CONFERENCE_DATE = {6-7 dicembre 2010}, BOOKTITLE = {Trasparency in Grey Literature, Grey Tech Approaches to High Tech Issues}, EDITOR = {Farace, D. J. and Fratzen, J.}, } @INPROCEEDINGS{PARDELLI_2011_INPROCEEDINGS_PSOBG_199282, AUTHOR = {Pardelli, G. and Sassi, M. and Orsolini, P. and Biagioni, S. and Giannini, S.}, TITLE = {An open archive of scientific communication}, YEAR = {2011}, ABSTRACT = {This paper presents the results of a terminological work conducted by the authors on a Digital Archives Net of the Italian National Research Council (CNR) in the field of Computer Science. In particular, the research tends to analyse the use of certain terms in Computer Science in order to verify their change over the time with the aim of retrieving from the net the very essence of documentation. Its main source is a reference corpus made up of 13,500 documents which collects the scientific productions of three CNR research Institutes. They are ISTI (Institute of Information Science and Technologies), IIT (Institute of Informatics and Telematics) and ILC (Institute of Computational Linguistics), all of them born from the "Centro Studi sulle Calcolatrici Elettroniche (CSCE)" and now belonging to the CNR Department of Information \& Communication Technologies and Cultural Identity. This study is divided in three sections: an introductory one dedicated to the data extracted from the scientific documentation: the data have in common the use of some terms proper of the Computer Science lexicon although these term belong to different branches (Linguistics, Informatics and Telematics); the second section is devoted to the description of the contents managed by the PUMA (Publication Management System) system; the third section contains a statistical representation of terms extracted from archive: some comparison tables between the occurrences of the most used terms in the scientific documentation produced by the three Institutes will be created and diagrams with percentages about the most frequently used terms will be displayed too. Lastly, indexes and concordances will allow to reflect on the use of certain terms in this field and give possible keys for having access to the extraction of knowledge in the digital era.}, KEYWORDS = {Digital Archives, Communication, Terminology, Open Access}, PAGES = {914-918}, URL = {http://www.santiago.cu/hosting/linguistica/simposios.php?s=XII}, VOLUME = {II}, PUBLISHER = {Centro de linguística aplicada, Ministerio de ciencia, tecnología y medio ambiente (Santiago de Cuba, CUB)}, ISBN = {978-959-7174-19-6}, CONFERENCE_NAME = {Comunicación Social en el Siglo XXI. XII Simposio Internacional de Comunicacion Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {17-21 gennaio 2011}, BOOKTITLE = {Comunicacion social en el siglo XXI, vol. II}, EDITOR = {Miyares, L. R. and Silva, M. R. Á.}, } @INPROCEEDINGS{PEZZULO_2011_INPROCEEDINGS_PR_205541, AUTHOR = {Pezzulo, G. and Rigoli, F.}, TITLE = {Planning in view of future needs: a bayesian model of anticipated motivation}, YEAR = {2011}, ABSTRACT = {Traditional neuroeconomic theories of decision-making assume that utilities are based on intrinsic values of outcomes and that those values depend on how salient are outcomes in relation to the current motivational state. The fact that humans, and possibly also other animals, are able to plan in view of future motivations is not accounted by this view. So far, it is not clear which are the structures and the computational mechanisms employed by the brain during these processes. In this article, we present a Bayesian computational model that describes how the brain considers future motivations and assigns value to outcomes in relation to this information. We compare our model of anticipated motivation with a model that implements the standard perspective in decision-making and assigns value only based on the animal's current motivations. The results of our simulations indicate an advantage of the model of anticipated motivation in volatile environments. Finally we connect our computational proposal to animal and human studies on prospection and foresight abilities and to neurophysiological investigations on their neural underpinnings.}, KEYWORDS = {prospection, foresight, goal-directed decisionmaking, model-based, expected utility}, PAGES = {174-176}, URL = {http://nbu.bg/cogs/eurocogsci2011/proceedings/pdfs/EuroCogSci-paper174.pdf}, ISBN = {978-954-535-660-5}, CONFERENCE_NAME = {European Conference on Cognitive Science 2011}, CONFERENCE_PLACE = {Sofia}, CONFERENCE_DATE = {21-24 Maggio 2011}, EDITOR = {Kokinov, B. and Karmiloff Smith, A. and Nersessian, N. J.}, } @INPROCEEDINGS{RIGOLI_2011_INPROCEEDINGS_RPP_205551, AUTHOR = {Rigoli, F. and Pavone, E. F. and Pezzulo, G.}, TITLE = {Interaction of goal-directed and pavlovian systems in aversive domains}, YEAR = {2011}, ABSTRACT = {Recent neuroscientific models of human behavior distinguish between different cognitive controllers: two instrumental systems (goal-directed and habitual) that maximize utility through learned actions, and a so-called Pavlovian system, which implements innate reactive responses. Although the interaction between instrumental and Pavlovian controllers has been suggested as a key process underlying emotional phenomena and surprising forms of misbehavior, few is known about it, especially in the sensorimotor aversive domain. With a combined experimental and computational approach, we study the interactions between instrumental (goal-directed) and Pavlovian processes in the aversive domain. First, we present a human experiment in which goal-directed and Pavlovian systems compete in order to control responses. The results indicate that Pavlovian processes can significantly interfere with goal-directed behavior. Second, we compare four alternative Bayesian models for their accuracy in modeling human performance. The results indicate a better fit for an architecture in which the Pavlovian controller can use both model-based and model-free features.}, KEYWORDS = {Goal-directed system, Pavlovian system, Bayesian model, implicit classical conditioning}, PAGES = {3211-3216}, URL = {http://mindmodeling.org/cogsci2011/papers/0739/paper0739.pdf}, ISBN = {978-954-535-660-5}, CONFERENCE_NAME = {European Conference on Cognitive Science 2011 New Bulgarian University Sofia}, CONFERENCE_PLACE = {Sofia}, CONFERENCE_DATE = {21-24 Maggio 2011}, BOOKTITLE = {European Perspectives on Cognitive Science}, EDITOR = {Kokinov and , B. and Karmiloff Smith and , A. and Nersessian and , N. J.}, } @INPROCEEDINGS{RUSSO_2011_INPROCEEDINGS_RCRBM_205736, AUTHOR = {Russo, I. and Caselli, T. and Rubino, F. and Boldrini, E. and Martínez Barco, P.}, TITLE = {EMOCause: An Easy-adaptable Approach to Extract Emotion Cause Contexts}, YEAR = {2011}, ABSTRACT = {In this paper we present a method to automatically identify linguistic contexts which contain possible causes of emotions or emotional states from Italian newspaper articles (La Repubblica Corpus). Our methodology is based on the interplay between relevant linguistic patterns and an incremental repository of common sense knowledge on emotional states and emotion eliciting situations. Our approach has been evaluated with respect to manually annotated data. The results obtained so far are satisfying and support the validity of the methodology proposed.}, KEYWORDS = {sentiment analysis}, PAGES = {153-160}, URL = {https://publications.cnr.it/doc/205736}, CONFERENCE_NAME = {2nd Workshop on Computational Approaches to Subjectivity and Sentiment Analysis}, CONFERENCE_PLACE = {Portland, USA}, CONFERENCE_DATE = {24 Giugno 2011}, } @INPROCEEDINGS{SPADONI_2011_INPROCEEDINGS_STS_205482, AUTHOR = {Spadoni, F. and Tariffi, F. and Sassolini, E.}, TITLE = {SMARTCITY: Innovative Technologies for customized and dynamic multimedia content production for Tourism applications}, YEAR = {2011}, ABSTRACT = {This paper presents the first results of the SMARTCITY project, co-funded by the Tuscany Region under the POR CREO 1.d program. the project proposess an innovative methodology as well as advanced technologies enabling professional services for cultural tourism applications in urban areas as well as larger archaeological sites.}, KEYWORDS = {smartcity project, Tourism Applications, Dynamic Multimedia Content Production}, PAGES = {130-135}, URL = {https://publications.cnr.it/doc/205482}, PUBLISHER = {Pitagora Editrice Bologna (Bologna, ITA)}, ISBN = {88-371-1837-6}, CONFERENCE_NAME = {EVA 2011 Florence Electronic Imaging and the Visual Arts}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {4-5-6 maggio 2011}, EDITOR = {Cappellini, V.}, } @INPROCEEDINGS{CUTUGNO_2011_INPROCEEDINGS_CCZRMZCA_304915, AUTHOR = {Cutugno, P. and Cavioni, V. and Zanetti, M. A. and Renati, R. and Marconi, L. and Zini, V. and Chiarella, D. and Aloisio, V.}, TITLE = {Blog, identità virtuale, narrazione, analisi linguistica, adolescenza}, YEAR = {2011}, PAGES = {91-91}, URL = {https://publications.cnr.it/doc/304915}, CONFERENCE_NAME = {CKGB Terzo Congresso Nazionale "Empowerment, Formazione e Tecnologie. L'individuo, il Gruppo e l'Organizzazione"}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {6-8 Aprile 2011}, } @INPROCEEDINGS{DELLORLETTA_2011_INPROCEEDINGS_DM_205737, AUTHOR = {Dell'Orletta, F. and Montemagni, S.}, TITLE = {Towards an NLP-based approach for measuring syntactic complexity: preliminary experiments with Italian texts from different registers}, YEAR = {2011}, ABSTRACT = {In this paper, we explore how NLP can be used to automatically identify relevant syntactic complexity features in texts with the aim of assessing their correlation with specific linguistic registers. Our final goal is twofold. On the one hand, we demonstrate that automatic morpho-syntactic and syntactic annotation of texts provides sufficiently accurate output for use in the automatic extraction and measurement of syntactic complexity features. On the other hand, we identify the set of syntactic features strongly correlating with considered linguistic registers.}, KEYWORDS = {Language Variation, Natural Language Processing, Syntactic Complexity}, URL = {http://www.benszm.net/BSBWWS/Dellorletta_Montemagni.pdf}, CONFERENCE_NAME = {Workshop on "Cross-linguistic and language-internal variation in text and speech: focus on the joint analysis of multiple characteristics"}, CONFERENCE_PLACE = {Freiburg Institute for Advanced Studies (FRIAS), University of Freiburg}, CONFERENCE_DATE = {29/10/2010}, } @INPROCEEDINGS{FRONTINI_2011_INPROCEEDINGS_FM_205738, AUTHOR = {Frontini, F. and Monachini, M.}, TITLE = {Towards interfacing lexical and ontological resources}, YEAR = {2011}, ABSTRACT = {During the last two decades, the Computational Linguistics community has dedicated considerable effort to the research and development Lexical Resources (LRs), especially Computational Lexicons. These LRs, even though belonging to different linguistic approaches and theories, share a common element; all of them contain, explicitly or implicitly, an ontology as the means of organizing their structure.}, KEYWORDS = {language resources, ontologies}, PAGES = {26}, URL = {https://publications.cnr.it/doc/205738}, CONFERENCE_NAME = {ONTOLOGIES AND LEXICAL SEMANTICS}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {01 Ottobre 2011}, } @INPROCEEDINGS{MARZI_2011_INPROCEEDINGS_M_205896, AUTHOR = {Marzi, C.}, TITLE = {Knowledge Communities in Grey}, YEAR = {2011}, ABSTRACT = {The dynamic nature of modern human social interactions, and the increasing capability of wireless and mobile devices for creating and sharing contents, open up the opportunity for a wide dissemination of information through complex knowledge sharing systems. The development of digital technologies and the continuous evolution of telecommunication networks are rapidly heading our society towards a culture of participation and to a more and more interactive communication. Adaptive networking protocols and data management systems are fostering pervasive information and communication environments. In this context, subject based communities offer the steadily increasing availability of ubiquitous accessible information. Networking communities, focussed on supporting relationships and content sharing, act at the same time as providers and users of all kind of grey literature materials in a highly distributed and collaborative scenario. Collaboration networks are thus becoming a key element in the advancement and dissemination of knowledge in scientific domains as well as in diverse aspects of everyday human life. In this sense, social media at best enhance new frontier ideas and highly innovative contents; they offer the enormous potential to transform research, and research results, into a knowledge co-creation process. As the shared knowledge components build cognitive ties, there is no real sharing of knowledge without a common understanding of it. Large amounts of structured information have to be managed, and generation and assimilation of knowledge have to be facilitated. The unlimited universe of data and information available on the web need to be identified, classified, analyzed, filtered, so as to enhance the generation and assimilation of new knowledge. Knowledge needs to be represented, standardized and distilled from multiple sources. Tagging on a web scale provides a potentially useful source of metadata, and paves the way to automated post-processing services such as information retrieval, and acquisition of concepts from large document repositories. In other words it creates an environment conducive to knowledge transfer. In the full version, particular emphasis will be laid on technologies in natural language understanding and knowledge management for providing structured, intelligent access to the continuously evolving content generated on-line in a pervasive collaborative environment. In particular, the work will focus on exploring the interaction/synergy between different modes/tools for knowledge acquisition and representation: from highly structured, standardized and objective knowledge information systems based on ontological hierarchies and relations to more dynamic, subjective tools for volatile knowledge representation such as word clouds and concept maps. This approach will highlight current automated tools for concept acquisition and ontology learning that are conducive to an incremental approach to content access and management, to establish a fruitful bridge between modes of knowledge sharing/creation and dynamic, incremental approaches to automated knowledge acquisition and representation.}, KEYWORDS = {Grey Literature, Web communities, Knowledge sharing, Concept Maps}, PAGES = {26-30}, URL = {https://publications.cnr.it/doc/205896}, VOLUME = {13}, ISSN = {1385-2308}, ISBN = {978-90-77484-00-5}, CONFERENCE_NAME = {Thirteenth International Conference on Grey Literature: The Grey circuit-From Social networking to Wealth Creation}, CONFERENCE_PLACE = {Washington D. C.-USA}, CONFERENCE_DATE = {5-6 December 2011}, BOOKTITLE = {The Grey Circuit-From Social Networking to Wealth Creation}, EDITOR = {Farace, D. J. and Fratzen, J.}, } @INPROCEEDINGS{MONTEMAGNI_2011_INPROCEEDINGS_M_205779, AUTHOR = {Montemagni, S.}, TITLE = {Ontology Learning. An introduction}, YEAR = {2011}, ABSTRACT = {The tutorial is organised into two parts: PART 1 is devoted to provide the basic notions underlying Ontology Learning, in particular why it is needed, how it can be carried out and how its results can be evaluated. PART 2 discusses the topic of Ontology Learning in the Legal domain, with particular attention to the specific challenges posed by it. It also provides an overview of different feasibility studies carried out in the legal domain.}, KEYWORDS = {Ontology Learning, Legal Information extraction, Natural Language Processing}, URL = {https://publications.cnr.it/doc/205779}, CONFERENCE_NAME = {Summer School LEX 2011, Ravenna, Italy "Managing Legal Resources in the Semantic Web"}, CONFERENCE_PLACE = {Ravenna, Italia}, CONFERENCE_DATE = {8 settembre 2011}, } @INPROCEEDINGS{MONTEMAGNI_2011_INPROCEEDINGS_MWDN_205911, AUTHOR = {Montemagni, S. and Wieling, M. and De Jonge, B. and Nerbonne, J.}, TITLE = {Synchronic patterns of Tuscan phonetic variation and diachronic change: evidence from a dialectometric study}, YEAR = {2011}, ABSTRACT = {A careful investigation of synchronic patterns of linguistic variation with underlying linguistic features can lead to important insights into the comprehension of diachronic phonetic processes. Starting from the analysis of synchronic patterns of phonetic variation in Tuscany we tackled one of the main and most debated features of Tuscan dialects, the phenomenon of spirantization with a specific view to the so-called Tuscan "gorgia" (i.e. voiceless spirantization). In particular, we showed that the newly proposed method of spectral partitioning of bipartite graphs applied to synchronic dialectal data can effectively be used to investigate diachronic phonetic processes. From a careful analysis of the sound correspondences involving voiceless and voiced stops, we tracked the evolution of the spirantization phenomenon in several respects. First, we tracked spirantization geographically, across Tuscany from the influential center of Florence to the peripheral areas. Second, we tracked it phonologically, from voiceless to voiced stops, and within each voicing class from velars to dentals and then to bilabials. Finally, we tracked it demographically, with young speakers using the most innovative sound correspondences more than old speakers. The fact that these results are in line with the literature on the topic of Tuscan "gorgia" demonstrates the potential of the method of spectral partitioning of bipartite graphs with respect to the reconstruction of diachronic processes starting from diatopically distributed synchronic dialectal data.}, KEYWORDS = {Dialectometry, Phonetic Variation, Tuscan Dialects}, PAGES = {120-121}, URL = {http://westernlinguistics.ca/methods14/files/all_abstracts_one_document.pdf}, CONFERENCE_NAME = {Fourteenth Methods in Dialectology Conference}, CONFERENCE_PLACE = {University of Western Ontario}, CONFERENCE_DATE = {2-6 August 2011}, } @INPROCEEDINGS{PARDELLI_2011_INPROCEEDINGS_PSG_205788, AUTHOR = {Pardelli, G. and Sassi, M. and Goggi, S.}, TITLE = {Grey Literature Between Tradition and Innovation: Is there a Continuum?}, YEAR = {2011}, ABSTRACT = {This study wants to explore ways of social media communication for Grey Literature. In particular it describes the role of social media in relation with traditional channels and how social media applications can be used for Grey.}, KEYWORDS = {Grey Literature, Communication networks, Knowledge networking, knowledge exchange}, PAGES = {64-65}, URL = {https://publications.cnr.it/doc/205788}, VOLUME = {13}, ISBN = {978-90-77484-00-5}, CONFERENCE_NAME = {Thirteenth International Conference on Grey Literature: The Grey Circuit, From Social Networking to Wealth Creation}, CONFERENCE_PLACE = {Washington D. C. USA-Library of Congress}, CONFERENCE_DATE = {5-6 december 2011}, EDITOR = {Farace, D. J. and Frantzen, J.}, } @INPROCEEDINGS{QUOCHI_2011_INPROCEEDINGS_Q_287125, AUTHOR = {Quochi, V.}, TITLE = {The development of Light-'do' Verb Constructions in Italian}, YEAR = {2011}, ABSTRACT = {This contribution presents the results of a study of the development of Light 'do' Verb Constructions in Italian based on naturalistic data. The claim is that there exists a Light Verb pivot schema that accounts for new productive formations and that this pattern is learnt by young children because it constitutes a labeling technique for naming new events, activities and situations. The findings of this research support two hypotheses of language acquisition. The results are based on analysis of longitudinal transcriptions of adult children interactions contained in the CHILDES databank (MacWhinney 2000).}, KEYWORDS = {child language, construction grammar, light verb constructions}, PAGES = {256-257}, URL = {http://sle2011.cilap.es/downloads/book_abstracts.pdf}, CONFERENCE_NAME = {SLE 2011-44 TH ANNUAL MEETING}, CONFERENCE_PLACE = {Logroño, Spain}, CONFERENCE_DATE = {8-11 Settembre 2011}, BOOKTITLE = {SLE 2011-44TH ANNUAL MEETING BOOK OF ABSTRACT}, EDITOR = {Arista, J. M.}, } @INPROCEEDINGS{TERRANOVA_2011_INPROCEEDINGS_TFCRDGRP_205909, AUTHOR = {Terranova, G. and Ferro, M. and Carpeggiani, C. and Recchia, V. and Dodaro, A. and Gioffrè, D. and Richard, S. and Picano, E.}, TITLE = {Unreadability of current informed consent forms in cardiology-and how to improve it}, YEAR = {2011}, ABSTRACT = {Guidelines on informed consent for clinical practice and research trials recommend the use of standard plain language to enhance patient comprehension and to facilitate shared decision-making. Aim: To assess readability of our current informed consent forms used in cardiology.}, KEYWORDS = {public health, health policy, informed consent, readability}, PAGES = {69-70}, URL = {http://spo.escardio.org/abstract-book/presentation.aspx?id=97162}, VOLUME = {32}, CONFERENCE_NAME = {European Society of Cardiology}, CONFERENCE_PLACE = {Paris (France)}, CONFERENCE_DATE = {Agosto 2011}, } @TECHREPORT{ARRANZ_2011_TECHREPORT_ABBCCDFGMQRR_290606, AUTHOR = {Arranz, V. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Del Gratta, R. and Frontini, F. and Goggi, S. and Monachini, M. and Quochi, V. and Rubino, F. and Russo, I.}, TITLE = {The FLaReNet Databook}, YEAR = {2011}, ABSTRACT = {The FLaReNet Databook is not only the collection of all the factual material collected during the activities of the project, but also a set on innovative initiatives and instruments that will remain in place for the continuous collection of such "facts". The purpose of the Databook is in fact, on one side, to consolidate the analyses carried out in the project and, at the same time, to set up the proper mechanisms that will enable the provision of a continuous stream of relevant factual material, also after the end of the project.}, KEYWORDS = {Language Resources (LRs)}, PAGES = {1-8}, URL = {http://www.flarenet.eu/?q=FLaReNet_Databook}, } @TECHREPORT{BARONI_2011_TECHREPORT_B_206319, AUTHOR = {Baroni, P.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Financial Statement}, YEAR = {2011}, ABSTRACT = {Final financial statement of the FLaReNet project.}, KEYWORDS = {Financial Statement}, URL = {https://publications.cnr.it/doc/206319}, } @TECHREPORT{BARONI_2011_TECHREPORT_B_206467, AUTHOR = {Baroni, P.}, TITLE = {FLaReNet Web Site End Users Guide-Editing of a Group Wiki Doc using FCKeditor}, YEAR = {2011}, ABSTRACT = {In this guide you will find most of the common tasks used in FCKeditor, the WYSIWYG (What You See Is What You Get) HTML text editor integrated in the Drupal CMS (Content Management System) to provide most of the commonly used functions from desktop editors like Word to the Web.}, KEYWORDS = {WYSIWYG HTML text editor}, URL = {https://publications.cnr.it/doc/206467}, } @TECHREPORT{BARONI_2011_TECHREPORT_B_206503, AUTHOR = {Baroni, P.}, TITLE = {FLaReNet Web Statistics: 7th December 2008-31st August 2011}, YEAR = {2011}, ABSTRACT = {Statistics relating to the access to the FLaReNet Web site from 7th December 2008 to 31st August 2011.}, KEYWORDS = {Language Resources, Web Statistics}, PAGES = {15}, URL = {https://publications.cnr.it/doc/206503}, } @TECHREPORT{BARONI_2011_TECHREPORT_B_206505, AUTHOR = {Baroni, P.}, TITLE = {The FLaReNet Consortium and Network: 1st September 2008-31st August 2011}, YEAR = {2011}, ABSTRACT = {Some statistics relating to the Individual Subscribers, the Institutional Members and the National Contact Points of the FLaReNet network are reported and analysed.}, KEYWORDS = {Language Resources, Statistics}, PAGES = {12}, URL = {https://publications.cnr.it/doc/206505}, } @TECHREPORT{BARONI_2011_TECHREPORT_BSC_206273, AUTHOR = {Baroni, P. and Soria, C. and Calzolari, N.}, TITLE = {The FLaReNet Databook}, YEAR = {2011}, ABSTRACT = {A collection of all the factual material collected during the activities of the FLaReNet project and a set of innovative initiatives and instruments that will remain in place for the continuous collection of such "facts". Editors: Paola Baroni, Claudia Soria, Nicoletta Calzolari. Contributors: Victoria Arranz, Núria Bel, Gerhard Budin, Tommaso Caselli, Khalid Choukri, Riccardo Del Gratta, Elina Desypri, Gil Francopoulo, Francesca Frontini, Sara Goggi, Olivier Hamon, Erhard Hinrichs, Penny Labropoulou, Lothar Lemnizer, Steven Krauwer, Valerie Mapelli, Joseph Mariani, Monica Monachini, Jan Odijk, Jungyeul Park, Stelios Piperidis, Adam Przepiorkowski, Valeria Quochi, Eva Revilla, Laurent Romary, Francesco Rubino, Irene Russo, Helmut Schmidt, Hans Uszkoreit, Peter Wittenburg.}, KEYWORDS = {Language Resources, Language Technologies}, URL = {http://www.flarenet.eu/sites/default/files/FLaReNet_Databook.pdf}, } @TECHREPORT{BARONI_2011_TECHREPORT_BSC_206324, AUTHOR = {Baroni, P. and Soria, C. and Calzolari, N.}, TITLE = {The FLaReNet Databook: http: //www. flarenet. eu/?q=FLaReNet_Databook}, YEAR = {2011}, ABSTRACT = {A collection of all the factual material collected during the activities of the FLaReNet project and a set of innovative initiatives and instruments that will remain in place for the continuous collection of such "facts". Editors: Paola Baroni, Claudia Soria, Nicoletta Calzolari. Contributors: Victoria Arranz, Núria Bel, Gerhard Budin, Tommaso Caselli, Khalid Choukri, Riccardo Del Gratta, Elina Desypri, Gil Francopoulo, Francesca Frontini, Sara Goggi, Olivier Hamon, Erhard Hinrichs, Penny Labropoulou, Lothar Lemnizer, Steven Krauwer, Valerie Mapelli, Joseph Mariani, Monica Monachini, Jan Odijk, Jungyeul Park, Stelios Piperidis, Adam Przepiorkowski, Valeria Quochi, Eva Revilla, Laurent Romary, Francesco Rubino, Irene Russo, Helmut Schmidt, Hans Uszkoreit, Peter Wittenburg.}, KEYWORDS = {Language Resources}, URL = {http://www.flarenet.eu/?q=FLaReNet_Databook}, } @TECHREPORT{BARTOLINI_2011_TECHREPORT_BPHTPRTPB_206269, AUTHOR = {Bartolini, R. and Poch, M. and Hamon, O. and Toral, A. and Prokopidis, P. and Rubino, F. and Thurmair, G. and Papavassiliou, V. and Bel, N.}, TITLE = {D3. 3 Second version (v2) of the integrated platform and documentation}, YEAR = {2011}, ABSTRACT = {the integrated platform and documentation of panacea}, KEYWORDS = {platform}, URL = {https://publications.cnr.it/doc/206269}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CBCMMOPQS_206397, AUTHOR = {Calzolari, N. and Bel, N. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C.}, TITLE = {Final FLaReNet deliverable: Language Resources for the Future-The Future of Language Resources}, YEAR = {2011}, ABSTRACT = {Language Technologies (LT), together with their backbone, Language Resources (LR), provide an essential support to the challenge of Multilingualism and ICT of the future. The main task of language technologies is to bridge language barriers and to help creating a new environment where information flows smoothly across frontiers and languages, no matter the country, and the language, of origin. To achieve this goal, all players involved need to act as a community able to join forces on a set of shared priorities. However, until now the field of Language Resources and Technology has long suffered from an excess of individuality and fragmentation, with a lack of coherence concerning the priorities for the field, the direction to move, not to mention a common timeframe. The context encountered by the FLaReNet project was thus represented by an active field needing a coherence that can only be given by sharing common priorities and endeavours. FLaReNet has contributed to the creation of this coherence by gathering a wide community of experts and making them participate in the definition of an exhaustive set of recommendations.}, KEYWORDS = {language resources and technologies, infrastructures}, PAGES = {97}, URL = {https://publications.cnr.it/doc/206397}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CQS_206420, AUTHOR = {Calzolari, N. and Quochi, V. and Soria, C.}, TITLE = {FLaReNet Strategic Language Resource Agenda}, YEAR = {2011}, ABSTRACT = {Despite the complexity of handling its languages, the European Union has established that cultural and language differences are a unique asset to be preserved. Europe needs to find means - such as technological ones - to overcome the language barriers to support citizens and industry in a multilingual globalised world. The large majority of industrial technological applications that handle natural language, i.e. Machine Translation, Crosslingual Information Retrieval, Multilingual Information Extraction, Automatic Document Indexing, Question Answering, Natural Language Interfaces, etc., include Language Resources as critical components. Although Language Technologies may consist of language independent engines, they depend on the availability of language-dependent knowledge under the form of Language Resources for their real-life implementation. At the same time, it is proved that a critical mass of Language Resources can make advancement in research and technology development possible and quicker, making Europe the leader of the market related to multilingualism. Companies such as Google or Microsoft play a dominant role in this framework, as they have access to a huge amount of data in many different languages, devote considerable resources to Language Technologies, have massive computing power and a direct research-to-application pipeline using a new business model based on so-called "free" services. The fact that a US company like Google is delivering some of the most comprehensive Language Technology solutions to support multilingualism should raise concern among EU officials.}, KEYWORDS = {Language resources, infrastructures}, PAGES = {23}, URL = {https://publications.cnr.it/doc/206420}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CSBG_206274, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Goggi, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Final Report}, YEAR = {2011}, ABSTRACT = {Final report of the FLaReNet project.}, KEYWORDS = {Language Resources}, URL = {http://www.flarenet.eu/sites/default/files/D1.15.pdf}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CSBCMOPB_206254, AUTHOR = {Calzolari, N. and Soria, C. and Bel, N. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 5}, YEAR = {2011}, ABSTRACT = {Fifth semestrial report on the progress of the FLaReNet project.}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/206254}, } @TECHREPORT{CALZOLARI_2011_TECHREPORT_CSBCMOPBG_206276, AUTHOR = {Calzolari, N. and Soria, C. and Bel, N. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 6}, YEAR = {2011}, ABSTRACT = {Sixth semestrial report on the progress of the FLaReNet project.}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/206276}, } @TECHREPORT{CHIARELLA_2011_TECHREPORT_C_206464, AUTHOR = {Chiarella, D.}, TITLE = {Uno studio sullo standard 802. 11: introduzione di una rete wifi nella rete dell'U. O. S. ILC di Genova}, YEAR = {2011}, ABSTRACT = {Il termine wireless riferito ad una comunicazione indica che il trasferimento di informazioni avviene senza l'utilizzo di conduttori elettrici o cavi; infatti in una rete wireless lo strumento di comunicazione utilizzato è l'aria. Per estensione tutti i dispositivi che utilizzano questa modalità di comunicazione vengono detti wireless. La comunicazione wireless può permettere di portare un servizio dove una comunicazione cablata risulterebbe impossibile da implementare, sia per quanto riguarda la fattibilità tecnica, sia per quanto riguarda i costi oppure può risultare molto utile nel caso la dinamicità del numero di utenti che utilizzano una rete sia molto alta e l'infrastruttura hardware sia sottodimensionata rispetto al numero degli utenti (i.e. le prese ethernet siano esigue rispetto al numero di computer da cablare). Uno dei casi più comuni che rientra nell'estrema dinamicità sono i giorni in cui viene ospitato in una struttura un evento (i.e. una conferenza) che porta con sé diversi visitatori che necessitano di una connessione ad Internet: in questo caso una soluzione ottima sia per trade-off di tempo di fattibilità sia per facilità d'uso per gli utenti è la creazione di una LAN wireless (WLAN). Per queste motivazioni, nell'ambito della ricerca applicata per l'innovazione del networking e della sicurezza dell'Unità Operativa Staccata di Genova dell'Istituto di Linguistica Computazionale, è stata introdotta all'interno della rete cablata preesistente una rete wireless. Il seguente rapporto tecnico tratta nel primo paragrafo della situazione preesistente della rete, nel secondo paragrafo cerca di dare un'esaustiva panoramica delle reti wireless e delle loro tecnologie, nel terzo paragrafo descrive le soluzioni tecniche prese in considerazione e le scelte fatte per la creazione della rete wireless ILC-WIFI.}, KEYWORDS = {reti wireless, standard 802. 11, WEP, WPA, WPA2}, URL = {https://publications.cnr.it/doc/206464}, } @TECHREPORT{CUCURULLO_2011_TECHREPORT_CS_206469, AUTHOR = {Cucurullo, S. and Sassi, M.}, TITLE = {Archivio Elettronico delle Concordanze Diacroniche dell'Orlando Furioso}, YEAR = {2011}, ABSTRACT = {The project to build a digital electronic archiving of the concordances diachronic Orlando Furioso began in the '70s and had as its objective the study of a computational method for the treatment of variants.The basic text, provided by the "Accademia della Crusca", refers to the latest edition published in 1532 by the author, while the first 2 editions, published respectively in 1516 and in 1521, had not yet been the subject of electronic transcription. This has directed the staff of the project towards a reconstruction of the previous witnesses through accurate recording of the critical apparatus Debenedetti - Segre, 1960.}, KEYWORDS = {DBT, Orlando Furioso, banca-dati testuale}, PAGES = {11}, URL = {https://publications.cnr.it/doc/206469}, } @TECHREPORT{DELGROSSO_2011_TECHREPORT_D_385881, AUTHOR = {Del Grosso, A. M.}, TITLE = {The Web-Based System for the Management of Greek-Arabic Texts}, YEAR = {2011}, ABSTRACT = {The work carried out by the ILC-CNR unit in the first period of "Greek into Arabic" has been organized into three main activities, each one devoted to the development of a web-based framework for the study of and collaborative search on the pseudo-Theology of Aristotle and on its Greek sources}, KEYWORDS = {Digital Philology, Digital Humanities}, URL = {https://publications.cnr.it/doc/385881}, } @TECHREPORT{DESIPRI_2011_TECHREPORT_DGLPFMAMFD_206406, AUTHOR = {Desipri, E. and Gavrilidou, M. and Labropoulou, P. and Piperidis, S. and Frontini, F. and Monachini, M. and Arranz, V. and Mapelli, V. and Francopoulo, G. and Declerck, T.}, TITLE = {Documentation and User Manual of the META-SHARE Metadata Model}, YEAR = {2011}, ABSTRACT = {The current deliverable presents the META-SHARE metadata schema v1.0, as implemented in the META-SHARE XSD's v1.0 released to (META-NET and PSP partners) in July 2011 for text corpora and lexical/conceptual resources and its supplement for audio corpora, tools and language descriptions (simplified/refactored version) as implemented in November. It is meant to act as a user manual, providing explanations on the model contents for LRs providers and LRs curators that wish to describe their resources in accordance to it. Work on the schema is ongoing and changes/updates to the model are constantly being made; where appropriate, some changes that are already under way are documented in this deliverable.}, KEYWORDS = {Language resources, metadata, standards}, PAGES = {150}, URL = {https://publications.cnr.it/doc/206406}, } @TECHREPORT{MARIANI_2011_TECHREPORT_MBS_206326, AUTHOR = {Mariani, J. and Baroni, P. and Soria, C.}, TITLE = {Feedback from Contact Points on National Initiatives in the Area of Language Resources}, YEAR = {2011}, ABSTRACT = {A survey of existing initiatives on language resources all over the world promoted by the FLaReNet WG7 and carried out with contributions from the 102 FLaReNet National Contact Points.}, KEYWORDS = {Language Resources}, URL = {http://www.flarenet.eu/?q=Feedback_from_Contact_Points_on_National_Initiatives_in_the_Area_of_Language_Resources}, } @TECHREPORT{MARZI_2011_TECHREPORT_M_206256, AUTHOR = {Marzi, C.}, TITLE = {Understanding the Architecture of the Mental Lexicon: Integration of Existing Approaches}, YEAR = {2011}, ABSTRACT = {The 1st NetWordS Workshop, held on the 24th, 25th and 26th of November 2011 in the Research Area of the Italian National Research Council, brought together 37 participants (Scholars, Post-Docs, PhD students) from various European countries. Eighteen speakers, experts of various scientific domain and with different theoretical inclinations, discussed cross-disciplinary approaches to the Understanding of the Architecture of Mental Lexicon, reflecting the interdisciplinarity and synergy fostered by NetWordS, the European Research Networking Programme on Word Structure The workshop was organised with the ambitious goal of paving the way towards a European interdisciplinary research agenda on the Mental Lexicon for the coming 10 years, with particular emphasis on the three main challenges that NetWordS is intended to address: - Lexicon and Rules in the grammar - Word knowledge and word use - Words and meanings Leading scholars, mostly connected through NetWordS, were invited to address three basic questions: - What are, in the speaker's area of expertise, the most pressing open issues concerning the architecture of the Mental Lexicon? - What and how can progress in other research areas contribute to addressing these issues? - What can advancement in our understanding of these issues contribute to progress in other areas?}, KEYWORDS = {Mental Lexicon, Integration of existing approaches}, URL = {http://www.networds-esf.eu/index.php?page=1st-networds-workshop}, } @TECHREPORT{MONACHINI_2011_TECHREPORT_MFS_206457, AUTHOR = {Monachini, M. and Frontini, F. and Soria, C.}, TITLE = {KYOTO-LMF WordNet Representation Format}, YEAR = {2011}, ABSTRACT = {The format described in the following pages is the final revised proposal for representing wordnets inside the Kyoto project (henceforth "Kyoto-LMF wordnet format"). The reference model is Lexical Markup Framework (LMF), version 16, probably one of the most widely recognized standards for the representation of NLP lexicons. The goals of LMF are to provide a common model for the creation and use of such lexical resources, to manage the exchange of data between and among them, and to enable the merging of a large number of individual resources to form extensive global electronic respurces. LMF was specifically designed to accomodate as many models of lexical representations as possible. Purposefully, it is designed as a mea-model, i.e a high-level specification for lexical resources defining the structural constraints of a lexicon.}, KEYWORDS = {Wordnets, LMF, ISO, Representation formats, standards}, PAGES = {32}, URL = {https://publications.cnr.it/doc/206457}, } @TECHREPORT{MONACHINI_2011_TECHREPORT_MQCBBCCFHKLMOPPRSUW_206507, AUTHOR = {Monachini, M. and Quochi, V. and Calzolari, N. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Francopoulo, G. and Hinrichs, E. and Krauwer, S. and Lemnitzer, L. and Mariani, J. and Odijk, J. and Piperidis, S. and Przepiorkowski, A. and Romary, L. and Schmidt, H. and Uszkoreit, H. and Wittenburg, P.}, TITLE = {The Standards' Landscape Towards an Interoperability Framework}, YEAR = {2011}, ABSTRACT = {This document proposes an overview of the current scene towards an Interoperability Framework and acts as a reference point for the current standards that the community fosters and encourages to adopt/improve. This initiative is in close synchronization with other relevant initiatives such as CLARIN, ELRA, ISO and TEI and META-Share. The document builds on the CLARIN Standardisation Action Plan and adapts and extends it to the needs of the broader LT Community, beyond the SSH research areas including the industry. The main goal of this document is to give a practical orientation for various LT players, both commercial and academic; the main message being that a harmonized domain of language resources and technology can be achieved stepwise, but that an effort to adopt standards is necessary to overcome fragmentation. NB: This is to be intended by no means as a static, closed document, rather a dynamic one which needs to be constantly/periodically revised and updated by the community itself.}, KEYWORDS = {Standards, interoperability}, PAGES = {23}, URL = {https://publications.cnr.it/doc/206507}, } @TECHREPORT{MONTEMAGNI_2011_TECHREPORT_MW_206506, AUTHOR = {Montemagni, S. and Wieling, M.}, TITLE = {Definizione di un modello computazionale della variazione dialettale basato sull'integrazione di fattori socio-demografici e geografici}, YEAR = {2011}, ABSTRACT = {In this study, we used a mixed-effects logistic regression model in combination with generalized additive logistic modeling to predict lexical differences in Tuscan dialects with respect to standard Italian. We used lexical information for 170 concepts in 213 locations in Tuscany. Although geographical position is an important predictor with locations distant from Florence having lexical forms more likely to differ from standard Italian, several other factors emerged as significant. The model predicts that lexical variants used by older speakers and in smaller as well as poorer communities are more likely to differ from standard Italian. The impact of the demographic variables, however, varied from concept to concept. For a majority of concepts, smaller and poorer communities have lexical forms different from standard Italian. For a smaller minority of concepts, however, larger and richer communities have lexical forms different from standard Italian. Similarly, the effect of speaker age and the average community age also varied per concept. While not significant as a fixed effect, the concept frequency showed significant geographical variation. These results clearly identify important factors involved in dialect variation at the lexical level. In addition, this study illustrates the usefulness of mixed-effects regression techniques together with generalized additive modeling for analyzing lexical dialect data.}, KEYWORDS = {Dialettologia toscana, Dialettometria, variazione lessicale}, URL = {https://publications.cnr.it/doc/206506}, } @TECHREPORT{PROKOPIDIS_2011_TECHREPORT_PPTRFRT_327309, AUTHOR = {Prokopidis, P. and Papavassiliou, V. and Toral, A. and Riera, M. P. and Frontini, F. and Rubino, F. and Thurmair, G.}, TITLE = {WP-4. 4: Report on the revised Corpus Acquisition & Annotation subsystem and its components}, YEAR = {2011}, KEYWORDS = {corpus acquisition, corpus annotation}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D4.4.pdf}, } @TECHREPORT{PROKOPIDIS_2011_TECHREPORT_PPTRFRT_327310, AUTHOR = {Prokopidis, P. and Papavassiliou, V. and Toral, A. and Riera, M. P. and Frontini, F. and Rubino, F. and Thurmair, G.}, TITLE = {WP-4. 5: Final Report on the Corpus Acquisition & Annotation subsystem and its components}, YEAR = {2011}, KEYWORDS = {corpus acquisition, corpus annotation}, URL = {http://www.panacea-lr.eu/system/deliverables/PANACEA_D4.5.pdf}, } @TECHREPORT{SASSOLINI_2011_TECHREPORT_SC_206250, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {Metodologie di "thorough indexing" descrittivo, semantico e topologico delle risorse contenutistiche}, YEAR = {2011}, ABSTRACT = {D4 (deliverable) is the final result of task T2.1. of Smartcity project. It analyzes the principles and methodologies of "Thorough indexing" of the text materials in order to define possible new rules for the application of a systematic approach to tagging of knowledge base}, KEYWORDS = {Term extraction, Analisi Semantica}, URL = {https://publications.cnr.it/doc/206250}, } @TECHREPORT{SASSOLINI_2011_TECHREPORT_SC_206417, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {SmartCity-II relazione scientifica: report tecnico}, YEAR = {2011}, ABSTRACT = {This technical report presents the use of NLP techniques (text mining, text analysis) to develop specific tools that allow to create linguistic resources related to the cultural heritage domain, particularly "Empoli e dintorni".}, KEYWORDS = {Corpus (creation, annotation, etc.), Cultural Heritage}, URL = {https://publications.cnr.it/doc/206417}, } @TECHREPORT{SASSOLINI_2011_TECHREPORT_SS_206460, AUTHOR = {Sassolini, E. and Sbrulli, S.}, TITLE = {Primo report tecnico: tecnologie per il trattamento delle informazioni multimediali. ILC-CNR e STRAT-CRIT}, YEAR = {2011}, ABSTRACT = {Technologies for semantic annotation, automatic classification, clustering and browsing in textual databases, for realization of a multimedia on-line press review.}, KEYWORDS = {Information Extraction, Information Retrieval}, URL = {https://publications.cnr.it/doc/206460}, } @TECHREPORT{SORIA_2011_TECHREPORT_SC_206389, AUTHOR = {Soria, C. and Calzolari, N.}, TITLE = {Project presentation-results}, YEAR = {2011}, ABSTRACT = {International cooperation and re-creation of a community are the most important drivers for a coherent evolution of the Language Resource (LR) area in the next years. FLaReNet has been a European forum to facilitate interaction among LR stakeholders and its structure took into account the fact that LRs present various dimensions and must be approached from many perspectives: technical, but also organisational, economic, legal, political. The Network addressed also multicultural and multilingual aspects, essential when facing access and use of digital content in today's Europe. FLaReNet consolidated existing knowledge, presenting it analytically and visibly, and contributed to structuring the area of LRs of the future by discussing new strategies to: convert existing and experimental technologies related to LRs into useful economic and societal benefits; integrate so far partial solutions into broader infrastructures; consolidate areas mature enough for recommendation of best practices; anticipate the needs of new types of LRs. The outcomes of FLaReNet has been of a directive nature, to help identify those priority areas of LRs of major interest for the public that need public funding to develop or improve. A blueprint of actions has constituted the input to policy development both at EU and national level for identifying new language policies that support linguistic diversity in Europe, in combination with strengthening the language product market, e.g. for new products and innovative services, especially for less technologically advanced languages.}, KEYWORDS = {Language resources, infrastructures, international cooperation}, PAGES = {44}, URL = {https://publications.cnr.it/doc/206389}, } @TECHREPORT{SORIA_2011_TECHREPORT_SM_206391, AUTHOR = {Soria, C. and Mariani, J.}, TITLE = {Report on Existing Projects and Initiatives}, YEAR = {2011}, ABSTRACT = {It is of utmost importance for a project such as T4ME to get a comprehensive and reliable overview of the projects and initiatives addressing similar topics. Mainly in order to establish relationships, build on previous achievement, and get a reliable and up-to-date view about the currentstate of the art. This report surveys ongoing and recent projects and initiatives at the national, EU and transnational level addressing Machine Translation, multilingual issues, language resources and technologies, or infrastructural issues at large. Focus is on Europe but relevant initia-tives outside Europe have been reviewed as well.}, KEYWORDS = {language resources and technologies, infrastructures}, PAGES = {134}, URL = {http://www.meta-net.eu/public_documents/t4me/META-NET-D11.3-Final.pdf}, } @TECHREPORT{VOSSEN_2011_TECHREPORT_VBRASADHMBF_206329, AUTHOR = {Vossen, P. and Bosma, W. and Rigau, G. and Agirre, E. and Soroa, A. and Aliprandi, C. and De Jonge, J. and Hielkema, F. and Monachini, M. and Bartolini, R. and Frontini, F.}, TITLE = {KyotoCore: integrated system for knowledge mining from text}, YEAR = {2011}, ABSTRACT = {In this deliverable, we describe KyotoCore, an integrated system for applying text mining. We describe the software architecture of KyotoCore, the single modules and the process flows. Finally, we describe a use case where we apply the complete process toan English database on estuaries.}, KEYWORDS = {Knowledge and text mining software}, PAGES = {56}, URL = {https://publications.cnr.it/doc/206329}, } @MISC{DELGROSSO_2011_MISC_D_390786, AUTHOR = {Del Grosso, A. M.}, TITLE = {Moduli e componenti di Pinakes Text}, YEAR = {2011}, ABSTRACT = {Il contributo illustra i moduli dell'applicativo Pinakes Text implementati per il progetto ERC 2009 Advanced Grant n. 249431. Titolo: Greek into Arabic. Philosophical concepts and linguistic bridges.}, KEYWORDS = {computational philology, digital philology, software engineering, greek into arabic, erc}, URL = {https://publications.cnr.it/doc/390786}, } @MISC{PIRRELLI_2011_MISC_P_288014, AUTHOR = {Pirrelli, V.}, TITLE = {ESF Research Networking Programme: "The European Network on Word Structure. Cross-disciplinary approaches to understanding word structure in the languages of Europe-(NetWordS)}, YEAR = {2011}, URL = {https://publications.cnr.it/doc/288014}, } @ARTICLE{BONIN_2010_ARTICLE_BDVM_278419, AUTHOR = {Bonin, F. and Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {Singling out Legal Knowledge from World Knowledge}, YEAR = {2010}, PAGES = {217-229}, URL = {https://publications.cnr.it/doc/278419}, PUBLISHER = {Edizioni Scientifiche Italiane (Firenze, Italia)}, ISSN = {0390-0975}, JOURNAL = {Informatica e diritto}, } @ARTICLE{CALZOLARI_2010_ARTICLE_C_64554, AUTHOR = {Calzolari, N.}, TITLE = {FLaReNet-META-NET-CLARIN. La presenza Italiana nei più importanti progetti strategici e infrastrutturali di TAL in Europa}, YEAR = {2010}, ABSTRACT = {-}, KEYWORDS = {Trattamento Automatico della Lingua. Progetti Europei}, URL = {https://publications.cnr.it/doc/64554}, VOLUME = {Media}, } @ARTICLE{CALZOLARI_2010_ARTICLE_CI_30890, AUTHOR = {Calzolari, N. and Ide, N.}, TITLE = {LREC 2008: Selected Papers}, YEAR = {2010}, ABSTRACT = {Volume 44 marks the fifth year of the journal Language Resources and Evaluation, which in 2005 replaced Computers and the Humanities in both name and focus. At that time, especially in the light of the enormous success of the Language Resources and Evaluation Conference (LREC) that began in 1998, it seemed clear that the field lacked a journal dedicated to articles focused on resource creation, annotation, and evaluation. LRE was established to fill that gap. Since then, the journal's success has been literally overwhelming: since May 2005, 361 manuscripts have been submitted to the journal, of which 102 were eventually accepted for publication either as full journal papers or short notes. Many articles went through one or more iterations of revision in response to reviewer comments and resubmission before final acceptance. As a result of the efforts of both reviewers and authors, LRE is now regarded as one of the most highly respected journals in the field.}, KEYWORDS = {Language Resources and Evaluation}, PAGES = {181-293}, URL = {https://publications.cnr.it/doc/30890}, VOLUME = {44}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{CALZOLARI_2010_ARTICLE_CSD_112952, AUTHOR = {Calzolari, N. and Soria, C. and Del Gratta, R.}, TITLE = {The LREC 2010 Map of Language Resources and Tools}, YEAR = {2010}, KEYWORDS = {Linguistic Tools. Language Resources}, URL = {https://publications.cnr.it/doc/112952}, } @ARTICLE{CASTELFRANCHI_2010_ARTICLE_CPT_168367, AUTHOR = {Castelfranchi, C. and Pezzulo, G. and Tummolini, L.}, TITLE = {Behavioral Implicit Communication (BIC): Communicating with smart environments via our practical behaviors and its traces}, YEAR = {2010}, ABSTRACT = {A crucial part of the intelligence that smart environments should display is a specific form of social intelligence: the ability to read human’s behavior and its traces in terms of the underlying intentions and assumptions. Such ability is crucial to enable human users to tacitly coordinate and negotiate with smart and proactive digital environments. In this paper, we argue that the necessary tool for this is behavioral and stigmergic implicit (i.e. non-conventional) communication. We present the basic theory of such a fundamental interactive means: the theory of Behavioral Implicit Communication (BIC).}, KEYWORDS = {Tacit communication, stigmergy, coordination, mindreading, ambient intelligence, mind-reading, communication}, PAGES = {1-12}, URL = {https://publications.cnr.it/doc/168367}, VOLUME = {2}, } @ARTICLE{FERRO_2010_ARTICLE_FOPP_64549, AUTHOR = {Ferro, M. and Ognibene, D. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Reading as active sensing: a computational model of gaze planning in word recognition}, YEAR = {2010}, ABSTRACT = {We offer a computational model of gaze planning during reading that consists of two main components: a lexical representation network, acquiring lexical representations from input texts (a subset of the Italian CHILDES database), and a gaze planner, designed to recognize written words by mapping strings of characters onto lexical representations. The model implements an active sensing strategy that selects which characters of the input string are to be fixated, depending on the predictions dynamically made by the lexical representation network. We analyze the developmental trajectory of the system in performing the word recognition task as a function of both increasing lexical competence, and correspondingly increasing lexical prediction ability. We conclude by discussing how our approach can be scaled up in the context of an active sensing strategy applied to a robotic setting.}, KEYWORDS = {Reading, Language Learning, Mental Lexicon}, PAGES = {1-16}, URL = {https://publications.cnr.it/doc/64549}, VOLUME = {4}, PUBLISHER = {Frontiers Research Foundation (Lausanne, Svizzera)}, ISSN = {1662-5218}, JOURNAL = {Frontiers in neurorobotics}, } @ARTICLE{FERRO_2010_ARTICLE_FPP_64553, AUTHOR = {Ferro, M. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Morphology, Memory and the Mental Lexicon}, YEAR = {2010}, ABSTRACT = {Recent experimental evidence on morphological learning and processing has prompted a less deterministic and modular view of the interaction between stored word knowledge and on-line processing. Storing a word in the mental lexicon does not simply entail keeping a faithful memory image of that word in the most compact way. It also requires encoding and manipulating such image through topological structures that are optimally adapted to word production and comprehension. Temporal Self-Organizing Maps (THSOMs) are a novel model of artificial neural network that keeps time serial information through predictive activation chains of receptors encoding both spatial and temporal information of input stimuli. The impact of this model on issues of lexical organization and morphological processing is investigated in detail through a series of simulations shedding light on the dynamics between short-term memory (activation), long-term memory (learning) and morphological organization of stored word forms (topology).}, KEYWORDS = {Morphology, Word Processing, Word Learning, Mental Lexicon}, PAGES = {203-242}, URL = {https://publications.cnr.it/doc/64553}, VOLUME = {2}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{FORNACIARI_2010_ARTICLE_FCF_64550, AUTHOR = {Fornaciari, A. and Cignoni, L. and Fornaciari, G.}, TITLE = {STUDENTS' PARTICIPATION IN AN ARCHAEOANTHROPOLOGY COURSE USING A CONTENT AND LANGUAGE INTEGRATED LEARNING (CLIL) METHODOLOGY}, YEAR = {2010}, ABSTRACT = {In this paper we present the results of an ongoing archaeoanthropology project which was started in the year 2007 at Benabbio, a village located near Lucca, in northern Tuscany, Italy. Aim of the project is to exhume individuals buried in the medieval cemetery near the church of the Castle of Benabbio (also in course of excavation), and of people who died in the village during the cholera epidemic of the year 1855. Burials and artefacts, ranging between 12th and 19th centuries, have been recorded on a computer database to provide information about the cultural and physical aspects of this ancient human group. The discovery of some well preserved medieval houses led to the decision to examine the settlement in its entirety, exploring the spatial and chronological development of the site. Archaeological exploration of this vast area will involve not only the cemetery of the castle but also the houses and the manor, investigating the origins of the settlement and the early stages of encastlement, by which the local lord could dominate the surrounding countryside and control his neighbours. It will be possible to reconstruct the different phases of organization of the settlement and the life-style and diseases of the inhabitants of the castle. Furthermore, this project is an important testbed for the development of field techniques such as balloon photography, high-resolution georadar survey, GIS, as well as special laboratory techniques for palaeopathology such as computerized tomography (CT), electron microscopy (EM), stable isotope palaeonutrition and ancient DNA (aDNA), that can be used to diagnose ancient diseases. This paper has a dual purpose: on the one hand, it makes the first results of the excavations available to the widest possible audience; on the other hand, it describes the application of a CLIL methodology which uses a foreign language to study a specific discipline. In this case, English is used as supporting language to carry out a number of tasks on}, KEYWORDS = {archaeology}, PAGES = {2684-2692}, URL = {https://publications.cnr.it/doc/64550}, VOLUME = {1}, } @ARTICLE{FRANCESCONI_2010_ARTICLE_FMPT_30888, AUTHOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Tiscornia, D.}, TITLE = {Integrating a Bottom-Up and Top-Down Methodology for Building Semantic Resources for the Multilingual Legal Domain}, YEAR = {2010}, ABSTRACT = {This article presents a methodology for multilingual legal knowledge acquisition and modelling. It encompasses two comlementary strategies. On the one hand, there is the top-down definition of the conceptual structure of the legal domain under consideration on the basis of expert jugdment. This structure is language-independent, modeled as an ontology, and can be aligned with other ontologies that capture similar or complementary knowledge, in order to provide a wider conceptual embedding. Another top-down approach is the exploitation of the explicit structure of legal texts, which enables the targeted identification of text spans that play an ontological role and their subsequent inclusion in the knowledge model. On the other hand, the linguistically motivated, text-based bottom-up population and incremental refinement of this conceptual structure using (semi-)automatic NLP techniques, maximizes the completeness and domain-specificity of the resulting knowledge. The proposed methodology is concerned with the relation between these two differently derived types of knowledge, and defines a framework for interfacing lexical and ontological knowledge, the result of which offers various perspectives on multilingual legal knowledge. Two case-studies combining bottom-up and top-down methodologies for knowledge modelling and learning are presented as illustrations of the methodology.}, KEYWORDS = {Knowledge Modelling, Knowledge Acquisition, Natural Language Processing, Ontology Learning}, PAGES = {95-121}, URL = {https://publications.cnr.it/doc/30888}, VOLUME = {6036/}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{MARINELLI_2010_ARTICLE_MMTM_64547, AUTHOR = {Marinelli, R. and Mazzocchi, F. and Tiberi, M. and Motta, M.}, TITLE = {Il modello semantico di EuroWordNet come strumento per la strutturazione della relazione associativa nei thesauri}, YEAR = {2010}, ABSTRACT = {Thesauri are tools which semantically organize a domain of knowledge for operational purposes. Their relational semantics is concerned with methods that connect terms with related meanings and it is important to support information retrieval, enhancing the information recall performance and contributing to improve precision. In fact, the network of relations of a thesaurus has an important semantic function, providing a representation of the meaning of each thesaurus term and a map of the conceptual structure of a subject area. The traditional thesaurus format - as described in international standards - includes the hierarchical, associative and equivalence relationships. However, a rather widespread opinion is that this format should be refined, in order to cope with the current needs of information organization. This paper discusses the possibility of refining the associative relation into a number of sub-kinds by adopting the semantic model of EuroWordNet (EWN), as it was used, according to one of its national versions, ItalWordNet (IWN), taking into account the terminological database Mariterm, which contains terms belonging to the maritime domain. It is also stressed how RT designation and refinement appear to be domain dependent, in the sense that they are associated with the specific features of a knowledge field.}, KEYWORDS = {terminologia, thesauri, relazione associativa, risorse linguistiche, database semantico-lessicali}, PAGES = {249-263}, URL = {http://www.aib.it/aib/boll/2010/1003263.htm}, VOLUME = {50}, PUBLISHER = {Associazione italiana biblioteche (Roma, Italia)}, ISSN = {1121-1490}, JOURNAL = {Bollettino AIB}, } @ARTICLE{MARZI_2010_ARTICLE_MPS_64555, AUTHOR = {Marzi, C. and Pardelli, G. and Sassi, M.}, TITLE = {Grey literature and computational linguistics: From paper to net}, YEAR = {2010}, ABSTRACT = {The advent and exponential development of the World Wide Web has led to an increasing availability of unstructured knowledge and distributed information sources, meeting general public requirements that are hardly addressed by other more traditional information channels. This trend has concurrently raised a considerable interest in the application of Computational Linguistics (CL) methodologies to document access and retrieval, as they offer the unprecedented opportunity to make the subjective, user- centred information demands of Net citizens meet the ever changing and heterogeneous information flow of the web. Over the last five years, more and more Italian Universities have introduced CL courses into their Humanities curricula, making available on-line teaching materials, tutorials and language engineering software that appear to supply the lack of offer from traditional Italian publishing houses. In this paper, we consider in some detail the role played by this type of Grey Literature in bringing up a wider and increasingly more aware community of web users in Italy.}, KEYWORDS = {Grey Literature}, PAGES = {145-148}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-78149461778\&origin=inward}, VOLUME = {6}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{OGNIBENE_2010_ARTICLE_OPB_30887, AUTHOR = {Ognibene, D. and Pezzulo, G. and Baldassarre, G.}, TITLE = {Learning to Look in Different Environments: An Active-Vision Model which Learns and Readapts Visual Routines}, YEAR = {2010}, ABSTRACT = {One of the main claims of the active vision framework is that nding data on the basis of task requirements is more e?cient than reconstructing the whole scene by performing a complete visual scan. To be successful, this approach requires that agents learn visual routines to direct overt attention to locations with the information needed to accomplish the task. In ecological conditions, learning such visual routines is di?cult due to the partial observability of the world, the changes in the environment, and the fact that learning signals might be indirect. This paper uses a reinforcement-learning actor-critic model to study how visual routines can be formed, and then adapted when the environment changes, in a system endowed with a controllable gaze and reaching capabilities. The tests of the model show that: (a) the autonomouslydeveloped visual routines are strongly dependent on the task and the statistical properties of the environment; (b) when the statistics of the environment change, the performance of the system remains rather stable thanks to the re-use of previously discovered visual routines while the visual exploration policy remains for long time sub-optimal. We conclude that the model has a robust behaviour but the acquisition of an optimal visual exploration policy is particularly hard given its complex dependence on statistical properties of the environment, showing another of the di?culties that adaptive active vision agents must face.}, KEYWORDS = {adaptive vision, neural networks}, PAGES = {199-210}, URL = {https://publications.cnr.it/doc/30887}, VOLUME = {6226/}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{PEZZULO_2010_ARTICLE_PBLB_30892, AUTHOR = {Pezzulo, G. and Barca, L. and Lamberti Bocconi, A. and Borghi, A. M.}, TITLE = {When Affordances Climb into your Mind: Advantages of Motor Simulation in a Memory Task Performed by Novice and Expert Rock Climbers}, YEAR = {2010}, ABSTRACT = {Does the sight of multiple climbing holds laid along a path activate motor simulation of climbing that path? One way of testing whether multiple affordances and their displacement influence the formation of a motor simulation is to study acquired motor skills. We used a behavioral task in which expert and novice rock climbers were shown three routes: an easy route, a route impossible to climb but perceptually salient, and a difficult route. After a distraction task, they were then given a recall test in which they had to write down the sequence of holds composing each route.We found no difference between experts and novices on the easy and impossible routes, whereas on the difficult route, the performance of experts was better than that of novices. This suggests that seeing a climbing wall activates a motor, embodied simulation, which relies not on perceptual salience, but on motor competence. More importantly, our results show that the capability to form this simulation is modulated by individuals' motor repertoire and expertise, and that this strongly impacts recall.}, KEYWORDS = {Simulation Affordance Embodied cognition Grounded cognition Canonical neurons Mirror neurons Motor memory Memory for actions Motor chunks}, PAGES = {68-73}, URL = {https://publications.cnr.it/doc/30892}, VOLUME = {73}, DOI = {10.1016/j.bandc.2010.03.002}, PUBLISHER = {Academic Press (New York, Stati Uniti d'America)}, ISSN = {0278-2626}, JOURNAL = {Brain and cognition}, } @ARTICLE{PIRRELLI_2010_ARTICLE_P_157483, AUTHOR = {Pirrelli, V.}, TITLE = {Interdisciplinary Approaches to Understanding Word Processing and Storage}, YEAR = {2010}, ABSTRACT = {The present collection of papers originates from a successful application to the European Science Foundation Exploratory Workshop Programme for the "Words in Action" workshop. The workshop, convened in Pisa on the 12th and 13th of October 2009, brought together experts of various scientific domains and theoretical inclinations to advance the current awareness of theoretical, typological, psycholinguistic, computational and neuro-physiological issues in word processing and storage, with a view to promoting novel methods of research and assessment for grammar architecture and language physiology.}, KEYWORDS = {Morphology, Word Processing, Word Learning, Mental Lexicon}, PAGES = {91-95}, URL = {https://publications.cnr.it/doc/157483}, VOLUME = {IX}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{RUIMY_2010_ARTICLE_R_64548, AUTHOR = {Ruimy, N.}, TITLE = {Simple_PLUS: a network of lexical semantic relations Simple_PLUS: una red de relaciones léxico-semánticas}, YEAR = {2010}, ABSTRACT = {The present article deals with the Italian lexical-semantic database Simple_PLUS and focuses on its essential core, i.e. the network of lexical semantic relations. This lexical resource builds on Parole-Simple-Clips, a four-layered electronic lexicon of Italian, founded on the SIMPLE model. Simple_PLUS consists of 30,000 semantic entries, partly imported from the source lexicon and partly newly created, but all encoding a wide-ranging set of information provided by the underpinning model. In Simple_PLUS, this semantic representation has been enriched with significant relational information, in a largely automated, inexpensive process. More than 5,000 relationships between events and their participants and among co-participants in events, links which were not capturable previously through lack of suitable representational means, have been encoded with the appropriate descriptive vocabulary borrowed from the EuroWordNet lexical model. Such conceptual links, which efficiently enhance the predicative representation in the lexicon, provide crucial lexical knowledge for NLP systems and for the Semantic Web.}, KEYWORDS = {Lexicon, lexical semantic relations, SIMPLE model, EuroWordNet model}, PAGES = {99-106}, URL = {http://www.sepln.org/?page_id=348}, VOLUME = {44}, PUBLISHER = {Sociedad Española para el Procesamiento del Lenguaje Natural (Jaén, Spagna)}, ISSN = {1989-7553}, JOURNAL = {Procesamiento del lenguaje natural (Internet)}, } @BOOK{FERRO_2010_BOOK_F_283384, AUTHOR = {Ferro, M.}, TITLE = {High Efficiency Real-Time Sensor and Actuator Control and Data Processing: A Framework Solution for Control Systems in Biomimetic Autonomous Robots}, YEAR = {2010}, URL = {https://publications.cnr.it/doc/283384}, PUBLISHER = {VDM Verlag Dr. Müller (Saarbrücken, DEU)}, ISBN = {978-3-639-25356-6}, EDITOR = {Ferro, M.}, } @BOOK{FRANCESCONI_2010_BOOK_FMPT_170395, AUTHOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Tiscornia, D.}, TITLE = {Semantic Processing of Legal Texts: Where the Language of Law Meets the Law of Language}, YEAR = {2010}, ABSTRACT = {The last few years have seen a growing body of research and practice addressing aspects such as automated legal reasoning and argumentation, semantic and cross-language legal information retrieval, document classification, legal drafting, legal knowledge discovery and extraction. This State-of-the-Art Survey contains invited contributions of leading researchers and groups eminently active in the field, which were complemented with selected papers from the Workshop on Semantic Processing of Legal Texts, held in Marrakech, Morocco, in 2008, within the framework of the Sixth International Conference on Language Resources and Evaluation (LREC 2008). These publications mirror the state-of-the-art in linguistic technologies, tools and resources focusing on the automatic extraction of relevant information from legal texts, and the structured organization of this extracted knowledge for legal knowledge representation and scholarly activity, with particular emphasis on the crucial role played by language resources and human language technologies. The contents are organized in three topical sections on information extraction; construction of knowledge resources; and semantic indexing, summarization and translation.}, KEYWORDS = {Legal Text Processing, Ontology Learning, Information Extraction}, URL = {https://publications.cnr.it/doc/170395}, } @BOOK{NAHLI_2010_BOOK_N_288556, AUTHOR = {Nahli, O.}, TITLE = {lingua araba, il sistema verbale}, YEAR = {2010}, ABSTRACT = {Questo volume offre una descrizione del verbo arabo, in una prospettiva prevalentemente didattica seguendo la struttura di pensiero e le categorie linguistiche proprie della tradizione grammaticale araba. Emerge e si chiarisce così la logica che domina l'articolato sistema verbale arabo e che funge da cardine nell'organizzazione della sintassi e di buona parte della morfologia nominale. Per di più, al termine del libro, è presente un capitolo dedicato alla "frase araba", ovvero alla "frase verbale" e alla "frase nominale" ed ai loro rispettivi elementi costitutivi.}, PAGES = {140}, URL = {https://publications.cnr.it/doc/288556}, ISBN = {9788867413003}, } @INCOLLECTION{BOZZI_2010_INCOLLECTION_BS_136474, AUTHOR = {Bozzi, A. and Sandrucci, V.}, TITLE = {Uno strumento al servizio dell'archiviazione, lo studio, l'edizione e l'interrogazione di documenti digitali}, YEAR = {2010}, KEYWORDS = {Computational Philology}, URL = {https://publications.cnr.it/doc/136474}, PUBLISHER = {L'Harmattan (Paris, FRA)}, } @INCOLLECTION{CALZOLARI_2010_INCOLLECTION_CS_30886, AUTHOR = {Calzolari, N. and Soria, C.}, TITLE = {Planning the Future of Language Resources: The Role of the FLaReNet Network}, YEAR = {2010}, ABSTRACT = {In this paper we analyse the role of Language Resources (LR) and Language Technologies (LT) in today Human Language Technology field and try to speculate on some of the priorities for the next years, from the particular perspective of the FLaReNet project, that has been asked to act as an observatory to assess current status of the field on Language Resources and Technology and to indicate priorities of action for the future.}, KEYWORDS = {Language Resources and Technology, strategic initiatives, priorities}, PAGES = {1-11}, URL = {https://rdcu.be/dfs8V}, VOLUME = {6008}, DOI = {10.1007/978-3-642-12116-6_1}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-3-642-12116-6}, BOOKTITLE = {Computational Linguistics and Intelligent Text Processing. CICLing 2010}, EDITOR = {Gelbukh, A.}, } @INCOLLECTION{JEZEK_2010_INCOLLECTION_JF_136473, AUTHOR = {Jezek, E. and Frontini, F.}, TITLE = {From Pattern Dictionary to Patternbank}, YEAR = {2010}, KEYWORDS = {Ontology. Computational Semantics}, PAGES = {215-237}, URL = {https://publications.cnr.it/doc/136473}, BOOKTITLE = {A Way with Words: Recent Advances in Lexical Theory and Analysis}, EDITOR = {De Schryver, G.}, } @INCOLLECTION{MARINELLI_2010_INCOLLECTION_MSC_136468, AUTHOR = {Marinelli, R. and Spadoni, G. and Cucurullo, S.}, TITLE = {Visual information to improve a lexical-semantic terminological resource}, YEAR = {2010}, ABSTRACT = {The lexical semantic database MariTerm contains structured information about the specialized terminology of the maritime domain (maritime navigation and transport). This paper describes the main phases of a project which aims to enhance the terminological database by means of a set of images: a) the structure of the terminological database; b) the domain conceptual modelling; c) the database management tool which, among its various features, allows visualization on demand of the image which is associated with the term being sought, contributing to clarification of the meaning of the term and increasing its information and communication potential.}, KEYWORDS = {terminology, computational resources, terminological databases, visual information}, PAGES = {493-500}, URL = {https://publications.cnr.it/doc/136468}, VOLUME = {1}, ISBN = {978-84-8427-759-0}, BOOKTITLE = {Modos y formas de la comunicaciòn humana-Ways and modes of human communication}, EDITOR = {Rodriguez, R. C. and Sanz, M. J. P.}, } @INCOLLECTION{OLTRAMARI_2010_INCOLLECTION_OGHCLP_136475, AUTHOR = {Oltramari, A. and Gangemi, A. and Huang, C. and Calzolari, N. and Lenci, A. and Prévot, L.}, TITLE = {Synergizing ontologies and the lexicon: a roadmap}, YEAR = {2010}, KEYWORDS = {Ontology}, URL = {https://publications.cnr.it/doc/136475}, PUBLISHER = {Cambridge University Press (Cambridge, GBR)}, } @INCOLLECTION{PIRRELLI_2010_INCOLLECTION_PGB_136469, AUTHOR = {Pirrelli, V. and Guevara, E. and Baroni, M.}, TITLE = {Computational issues in compound processing}, YEAR = {2010}, ABSTRACT = {Understanding compounds is a challenging computational task, cutting across multiple levels of linguistic analysis and touching upon intricate issues of representation, grammar architecture and algorithmic processing. At the same time, compounds raise all these problems in the most direct and exemplar way. From this perspective, they are an ideal probe into core issues of language architecture, making us pause about the need for advanced processing models and multi-disciplinary ap- proaches to long-lasting linguistic cruces. The paper reviews some of the lessons that can be learned from reading twenty years of computa- tional literature on the topic and assesses them against the background of germane theoretical and cognitive issues.}, KEYWORDS = {Morphology, Compounding, Natural Language Processing, Mental Lexicon}, PAGES = {271-285}, URL = {https://publications.cnr.it/doc/136469}, PUBLISHER = {John Benjamins (Amsterdam, NLD)}, ISBN = {9789027248275}, BOOKTITLE = {Cross-disciplinary issues in compounding}, EDITOR = {Scalise, S. and Vogel, I.}, } @INCOLLECTION{PRVOT_2010_INCOLLECTION_PHCGLO_136467, AUTHOR = {Prévot, L. and Huang, C. and Calzolari, N. and Gangemi, A. and Lenci, A. and Oltramari, A.}, TITLE = {Ontology and the Lexicon: A Multidisciplinary Perspective}, YEAR = {2010}, ABSTRACT = {The relation between ontologies and language is currently at the forefront of natural language processing (NLP). Ontologies, as widely used models in semantic technologies, have much in common with the lexicon. A lexicon organizes words as a conventional inventory of concepts, while an ontology formalizes concepts and their logical relations. A shared lexicon is the prerequisite for knowledge-sharing through language, and a shared ontology is the prerequisite for knowledge-sharing through information technology. In building models of language, computational linguists must be able to accurately map the relations between words and the concepts that they can be linked to. This book focuses on the technology involved in enabling integration between lexical resources and semantic technologies. It will be of interest to researchers and graduate students in NLP, computational linguistics, and knowledge engineering, as well as in semantics, psycholinguistics, lexicology and morphology/syntax.}, KEYWORDS = {Ontology. Computational Lexicology}, PAGES = {3-24}, URL = {https://publications.cnr.it/doc/136467}, PUBLISHER = {Cambridge University Press (Cambridge, GBR)}, ISBN = {978-0-521-88659-8}, BOOKTITLE = {Ontology and the Lexicon: A Natural Language Processing Perspective}, EDITOR = {Huang, C. and Calzolari, N. and Gangenimi, A. and Lenci, A. and Oltramari, A. and Prévot, L.}, } @INCOLLECTION{RONZANO_2010_INCOLLECTION_RMMTC_169862, AUTHOR = {Ronzano, F. and Monachini, M. and Marchetti, A. and Tesconi, M. and Calzolari, N.}, TITLE = {Bootstrapping and Collaboratively Enriching the Italian Domain WordNet through the WiKyoto Knowledge Editor}, YEAR = {2010}, ABSTRACT = {Enhancing the development of multilingual resources is of utmost importance for use in computer applications. The need of ever growing resources for effective multilingual content processing has given impulse to a radical change in the perspective of language resource (LR) creation, structuring, exploitation and maintenance. The Web has played a key role in this process: indeed the possibility to access growing amounts of structured and unstructured data as well as the ease of creating and sharing contents between distributed communities of users have strongly affected the methodologies and techniques to bootstrap, enrich and access LRs. From static knowledge bases usually created and maintained by groups of experts and tailored to the specific exploitation contexts, LRs have turned into dynamic repositories of linguistic knowledge. Their content is usually easily accessible over the Web and often exploited aggregated and optimized on-the-fly by on-line information mining services. In this context, the adoption of standardized data formats to facilitate interoperability and data exchange is essential. Moreover, the creation and maintenance of these resources has taken great advantage from the possibility to harvest Web data in order to bootstrap or enrich them. Several new frameworks have been proposed to support access, search, integration and interoperability of "new generation" LRs. Wide distributed communities of Web users are more and more directly or indirectly involved in keeping language resources updated or in extending them. After a brief description of modern LRs, we focus our attention on two essential issues involving them: the need for standard formats that support interoperability in a distributed Web context and the possibility for the Web communities to collaboratively maintain and enrich these resources. In particular, we present the Italian WordNet (IWN) and its exploitation in the context of the KYOTO Project, as a real-world scenario where standardization, interlinking, enrichment as well as collaborative editing are put into practice.}, KEYWORDS = {NLP, collaborative editing, wordnet, knowledge representation, wiki}, PAGES = {181-208}, URL = {http://www.racai.ro/Multilinguality%20and%20Interoperability/TOC.html}, PUBLISHER = {Romanian Academy Publishing House (Bucharest, ROU)}, ISBN = {978-973-27-1972-5}, BOOKTITLE = {Multilinguality and Interoperability in Language Processing with Emphasis on Romanian}, EDITOR = {Tufis, D. and Forascu, I.}, } @INCOLLECTION{RUIMY_2010_INCOLLECTION_RBCN_136470, AUTHOR = {Ruimy, N. and Bouillon, P. and Cartoni, B. and Namer, F.}, TITLE = {Construction (semi-)automatique d'un lexique sémantique du français: inférences interlinguistiques et morphologie}, YEAR = {2010}, ABSTRACT = {Nous décrivons deux méthodes complémentaires pour la dérivation semi-automatique d'un lexique du français, suivant le modèle du Lexique Génératif. La première méthode exploite la similitude entre les langues. Les informations lexicales françaises sont dérivées à partir d'un dictionnaire sémantique électronique de l'italien. À cet effet, nous combinons 2 stratégies :pour les mots construits, nous exploitons la cognacité de certains suffixes. Après avoir recherché dans un dictionnaire bilingue la traduction française du mot italien avec le suffixe français correspondant (ex. : costruzione ? construction), nous générons l'entrée lexicale française en y transférant les informations sémantiques de l'entrée italienne ; . pour les mots non construits polysémiques (ex. : frazione [fraction, hameau]), nous utilisons les indicateurs de sens fournis dans les dictionnaires bilingues (pour frazione: (mat.); (centro abitato)) de manière à identifier, dans le lexique électronique italien, l'entrée lexicale appropriée qui donnera naissance à l'entrée du sens français équivalent. La seconde méthode fait appel aux principes de la morphologie constructionnelle du français et permet potentiellement de coder des mots construits dont les équivalents italiens ne figurent pas dans le lexique source. Ces mots nouveaux, extraits de corpus, sont soumis à l'analyseur dérivationnel DériF dont le résultat fournit une grande partie de l'information lexicale permettant la construction de nouvelles entrées françaises}, KEYWORDS = {Computational Lexicography, Computational Morphology}, PAGES = {71-88}, URL = {https://publications.cnr.it/doc/136470}, PUBLISHER = {Presses de l'Université du Québec (Québec, CAN)}, ISBN = {978-2-7605-2569-6}, BOOKTITLE = {Multilinguisme et traitement des langues naturelles}, EDITOR = {Biskri, I. and Jebali, A.}, } @INCOLLECTION{VENTURI_2010_INCOLLECTION_V_30893, AUTHOR = {Venturi, G.}, TITLE = {Legal Language and Legal Knowledge Management Applications}, YEAR = {2010}, ABSTRACT = {This work is an investigation into the peculiarities of legal language with respect to ordinary language. Based on the idea that a shallow parsing approach can help to provide enough detailed linguistic information, this work presents the results obtained by shallow parsing (i.e. chunking) corpora of Italian and English legal texts and comparing them with corpora of ordinary language. In particular, this paper puts the emphasis of how understanding the syntactic and lexical characteristics of this specialised language has practical importance in the development of domain-specific Knowledge Management applications.}, KEYWORDS = {Parsing Legal Texts, Natural Language Processing, Legal Language, Knowledge Management Applications}, PAGES = {3-26}, URL = {https://publications.cnr.it/doc/30893}, VOLUME = {6036}, PUBLISHER = {Springer-Verlag (Berlin Heidelberg, DEU)}, ISBN = {3-642-12836-X}, BOOKTITLE = {Semantic Processing of Legal Texts. Where the Language of Law Meets the Law of Language}, EDITOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Tiscornia, D.}, } @EDITORIAL{CALZOLARI_2010_EDITORIAL_CBMS_136417, AUTHOR = {Calzolari, N. and Baroni, P. and Monachini, M. and Soria, C.}, TITLE = {Proceedings of the 2nd European Language Resources and Technologies Forum: Language Resources of the future-the future of Language Resources}, YEAR = {2010}, ABSTRACT = {Proceedings of the second FLaReNet forum on the European Language Resources and Technologies, held in Barcelona, at the Institut d'Estudis Catalans, on 11-12 February 2010.}, KEYWORDS = {Language Resources, Language Technologies, Future}, PAGES = {120}, URL = {http://www.flarenet.eu/sites/default/files/FLaReNet_Forum_2010_Proceedings.pdf}, } @EDITORIAL{CALZOLARI_2010_EDITORIAL_CCMMOPRT_136478, AUTHOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, TITLE = {Seventh International Conference on Language Resources and Evaluation}, YEAR = {2010}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/136478}, } @EDITORIAL{FRANCESCONI_2010_EDITORIAL_FMPT_186091, AUTHOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Tiscornia, D.}, TITLE = {Semantic Processing of Legal Texts: Where the Language of Law Meets the Law of Language}, YEAR = {2010}, ABSTRACT = {The last few years have seen a growing body of research and practice addressing aspects such as automated legal reasoning and argumentation, semantic and cross-language legal information retrieval, document classification, legal drafting, legal knowledge discovery and extraction. This State-of-the-Art Survey contains invited contributions of leading researchers and groups eminently active in the field, which were complemented with selected papers from the Workshop on Semantic Processing of Legal Texts, held in Marrakech, Morocco, in 2008, within the framework of the Sixth International Conference on Language Resources and Evaluation (LREC 2008). These publications mirror the state-of-the-art in linguistic technologies, tools and resources focusing on the automatic extraction of relevant information from legal texts, and the structured organization of this extracted knowledge for legal knowledge representation and scholarly activity, with particular emphasis on the crucial role played by language resources and human language technologies. The contents are organized in three topical sections on information extraction; construction of knowledge resources; and semantic indexing, summarization and translation.}, PAGES = {249}, URL = {https://publications.cnr.it/doc/186091}, VOLUME = {6036}, ISBN = {978-3-642-12836-3}, } @EDITORIAL{FRANCESCONI_2010_EDITORIAL_FMPW_136477, AUTHOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Wyner, A.}, TITLE = {Proceedings of the LREC 2010 Workshop on SEMANTIC PROCESSING OF LEGAL TEXTS (SPLeT-2010)}, YEAR = {2010}, KEYWORDS = {Legal Knowledge Extraction, Natural Language Processing}, URL = {https://publications.cnr.it/doc/136477}, } @EDITORIAL{FRANCESCONI_2010_EDITORIAL_FMRT_136476, AUTHOR = {Francesconi, E. and Montemagni, S. and Rossi, P. and Tiscornia, D.}, TITLE = {Proceedings of the 4th Workshop on Legal Ontologies and Artificial Intelligence Techniques (LOAIT 2010)}, YEAR = {2010}, KEYWORDS = {Legal Ontologies, Ontology Learning, Legal Knowledge Extraction, Legal Knowledge Modelling}, URL = {https://publications.cnr.it/doc/136476}, } @EDITORIAL{HUANG_2010_EDITORIAL_HCGLO_157484, AUTHOR = {Huang, C. and Calzolari, N. and Gangemi, A. and Lenci, A. and Oltramari, A.}, TITLE = {Ontology and the lexicon: a natural language processing perspective}, YEAR = {2010}, KEYWORDS = {Ontology}, PAGES = {339}, URL = {http://www.cambridge.org/br/academic/subjects/languages-linguistics/computational-linguistics/ontology-and-lexicon-natural-language-processing-perspective}, PUBLISHER = {Cambridge University Press (Cambridge, GBR)}, ISBN = {9780521886598}, } @EDITORIAL{HUANG_2010_EDITORIAL_HCCGLOP_288563, AUTHOR = {Huang and Chu Ren and Calzolari, N. and Gangemi, A. and Lenci, A. and Oltramari, A. and Prévot, L.}, TITLE = {Ontology and the Lexicon: A Natural Language Processing Perspective}, YEAR = {2010}, ABSTRACT = {The relation between ontologies and language is at the forefront of both natural language processing (NLP) and knowledge engineering. Ontologies, as widely used models in semantic technologies, have much in common with the lexicon. A lexicon organizes words as a conventional inventory of concepts, while an ontology formalizes concepts and their logical relations. A shared lexicon is the prerequisite for knowledge-sharing through language, and a shared ontology is the prerequisite for knowledge-sharing through information technology. In building models of language, computational linguists must be able to map accurately the relations between words and the concepts that they can be linked to. This book focuses on the integration of lexical resources and semantic technologies. It will be of interest to researchers and graduate students in NLP, computational linguistics and knowledge engineering, as well as in semantics, psycholinguistics, lexicology and morphology/syntax. CHU-REN HUANG is Chair Professor in the Department of Chinese and Bilingual Studies at the Hong Kong Polytechnic University, and Research Fellow at the Institute of Linguistics, Academia Sinica in Taiwan. NICOLETTA CALZOLARI is Director of Research in Istituto di Linguistica Computazionale at the CNR in Italy. ALDO GANGEMI is senior researcher at the Institute of Cognitive Science and Technology of CNR (Italy), and coordinator of the Semantic Technology Lab. ALESSANDRO LENCI is a researcher in the Department of Linguistics at the University of Pisa. ALESSANDRO OLTRAMARI is a Research Fellow in the Laboratory for Applied Ontology, Institute of Cognitive Science and Technology at the Italian National Research Council. LAURENT PRÉVOT is an Associate Professor at the Université de Provence.}, KEYWORDS = {Knowledge engineering, Natural Language Processing. Ontology}, PAGES = {1-335}, URL = {https://publications.cnr.it/doc/288563}, PUBLISHER = {Cambridge university press (Cambridge, GBR)}, ISBN = {978-0-521-88659-8}, } @EDITORIAL{PIRRELLI_2010_EDITORIAL_P_273429, AUTHOR = {Pirrelli, V.}, TITLE = {Interdisciplinary Approaches to Understanding Word Processing and Storage}, YEAR = {2010}, ABSTRACT = {The present collection of papers originates from a successful application to the European Science Foundation Exploratory Workshop Programme for the "Words in Action" workshop. The workshop, convened in Pisa on the 12th and 13th of October 2009, brought together experts of various scientific domains and theoretical inclinations to advance the current awareness of theoretical, typological, psycholinguistic, computational and neuro-physiological issues in word processing and storage, with a view to promoting novel methods of research and assessment for grammar architecture and language physiology.}, PAGES = {91-240}, URL = {https://publications.cnr.it/doc/273429}, PUBLISHER = {Societa Editrice il Mulino (Bologna, ITA)}, } @INPROCEEDINGS{AGIRRE_2010_INPROCEEDINGS_ALFHTMVVS_172865, AUTHOR = {Agirre, E. and López, D. L. O. and Fellbaum, C. and Hsieh, S. and Tesconi, M. and Monachini, M. and Vossen, P. and Vossen, P. and Segers, R.}, TITLE = {SemEval-2010 task 17: All-words word sense disambiguation on a specific domain}, YEAR = {2010}, ABSTRACT = {Domain portability and adaptation of NLP components and Word Sense Disambiguation systems present new challenges. The difficulties found by supervised systems to adapt might change the way we assess the strengths and weaknesses of supervised and knowledge-based WSD systems. Unfortunately, all existing evaluation datasets for specific domains are lexical-sample corpora. This task presented all-words datasets on the environment domain for WSD in four languages (Chinese, Dutch, English, Italian). 11 teams participated, with supervised and knowledge-based systems, mainly in the English dataset. The results show that in all languages the participants where able to beat the most frequent sense heuristic as estimated from general corpora. The most successful approaches used some sort of supervision in the form of hand-tagged examples from the domain.}, KEYWORDS = {I. 2. 7 Natural Language Processing, Word Sense Disambiguation systems, Semantic Annotation, Word-sense disambiguation}, PAGES = {75-80}, URL = {https://publications.cnr.it/doc/172865}, ISBN = {978-1-932432-70-1}, CONFERENCE_NAME = {ACL 2010-SemEval 2010: 5th International Workshop on Semantic Evaluation}, CONFERENCE_PLACE = {Uppsala, Sweden}, CONFERENCE_DATE = {15-16 Luglio 2010}, EDITOR = {Erk, K. and Strapparava, C.}, } @INPROCEEDINGS{ATTARDI_2010_INPROCEEDINGS_ADDLMS_84775, AUTHOR = {Attardi, G. and Dei Rossi, S. and Di Pietro, G. and Lenci, A. and Montemagni, S. and Simi, M.}, TITLE = {A Resource and Tool for Super-sense Tagging of Italian Texts}, YEAR = {2010}, KEYWORDS = {Corpus (creation, annotation, etc.), Tools, Systems, Applications, Statistical and machine learning methods}, URL = {https://publications.cnr.it/doc/84775}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{ATTIA_2010_INPROCEEDINGS_ATTMV_84787, AUTHOR = {Attia, M. and Toral, A. and Tounsi, L. and Monachini, M. and Van Genabith, J.}, TITLE = {An Automatically Built Named Entity Lexicon for Arabic}, YEAR = {2010}, ABSTRACT = {We have successfully adapted and extended the automatic Multilingual, Interoperable Named Entity Lexicon approach to Arabic, using Arabic WordNet (AWN) and Arabic Wikipedia (AWK). First, we extract AWN's instantiable nouns and identify the corresponding categories and hyponym subcategories in AWK. Then, we exploit Wikipedia inter-lingual links to locate correspondences between articles in ten different languages in order to identify Named Entities (NEs). We apply keyword search on AWK abstracts to provide for Arabic articles that do not have a correspondence in any of the other languages. In addition, we perform a post-processing step to fetch further NEs from AWK not reachable through AWN. Finally, we investigate diacritization using matching with geonames databases, MADA-TOKAN tools and different heuristics for restoring vowel marks of Arabic NEs. Using this methodology, we have extracted approximately 45,000 Arabic NEs and built, to the best of our knowledge, the largest Named Entity repository available.}, KEYWORDS = {Acquisition, Lexicon, database, Named Entity recognition}, URL = {https://publications.cnr.it/doc/84787}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{BARCA_2010_INPROCEEDINGS_BPC_84779, AUTHOR = {Barca, L. and Pezzulo, G. and Castelli, E.}, TITLE = {Egocentric and allocentric spatial references in children with Cerebral Palsy}, YEAR = {2010}, KEYWORDS = {cerebral palsy, spatial, egocentric, allocentric}, URL = {https://publications.cnr.it/doc/84779}, CONFERENCE_NAME = {Cogsci 2010-annual meeting of the Cognitive Science Society}, CONFERENCE_PLACE = {Portland, Oregon}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{BONIN_2010_INPROCEEDINGS_BDMV_84796, AUTHOR = {Bonin, F. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {A Contrastive Approach to Multi-word Extraction from Domain-specific Corpora}, YEAR = {2010}, ABSTRACT = {In this paper we present a novel approach to multi-word terminology extraction combining a well-known automatic term recognition approach, the C-NC value method, with a contrastive ranking technique, aimed at refining obtained results either by filtering noise due to common words or by discerning between semantically different types of terms within heterogeneous terminologies. The proposed methodology has been tested in two case studies carried out in the History of Art and Legal domains with promising results.}, KEYWORDS = {Terminology Extraction, Domain-specific Corpora, Multi-word Expression}, PAGES = {3222-3229}, URL = {https://publications.cnr.it/doc/84796}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {19-21 maggio 2010}, } @INPROCEEDINGS{BONIN_2010_INPROCEEDINGS_BDVM_84802, AUTHOR = {Bonin, F. and Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {Contrastive filtering of domain specific multi-word terms from different types of corpora}, YEAR = {2010}, ABSTRACT = {In this paper we tackle the challenging task of Multi-word term (MWT) extraction from different types of specialized corpora. Contrastive filtering of previously extracted MWTs results in a considerable increment of acquired domain-specific terms.}, KEYWORDS = {multi-word terms extraction, corpora}, PAGES = {76-79}, URL = {https://publications.cnr.it/doc/84802}, ISBN = {978-7-900268-00-6}, CONFERENCE_NAME = {The 23rd International Conference on Computational Linguistics (COLING 2010). Multiword Expressions: from Theory to Applications (MWE 2010)}, CONFERENCE_PLACE = {Beijing, China}, CONFERENCE_DATE = {28 agosto 2010}, } @INPROCEEDINGS{BOSCO_2010_INPROCEEDINGS_BMMDL_84799, AUTHOR = {Bosco, C. and Montemagni, S. and Mazzei, A. and Dell'Orletta, F. and Lenci, A.}, TITLE = {Evalita'09 Parsing Task: comparing dependency parsers and treebanks}, YEAR = {2010}, KEYWORDS = {dependency parsing, dependency treebank}, URL = {https://publications.cnr.it/doc/84799}, CONFERENCE_NAME = {Evaluation of NLP and Speech Tools for Italian. EVALITA 2009}, CONFERENCE_PLACE = {Reggio Emilia, Italy}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{BOSCO_2010_INPROCEEDINGS_BMMLDLLASLHNN_84789, AUTHOR = {Bosco, C. and Montemagni, S. and Mazzei, A. and Lombardo, V. and Dell'Orletta, F. and Lenci, A. and Lesmo, L. and Attardi, G. and Simi, M. and Lavelli, A. and Hall, J. and Nilsson, J. and Nivre, J.}, TITLE = {Comparing the Influence of Different Treebank Annotations on Dependency Parsing}, YEAR = {2010}, KEYWORDS = {Parsing, Corpus (creation, annotation, etc.), Evaluation methodologies}, URL = {https://publications.cnr.it/doc/84789}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{BOZZI_2010_INPROCEEDINGS_B_112959, AUTHOR = {Bozzi, A.}, TITLE = {L'archiviazione, lo studio, l'edizione e l'interrogazione di documenti digitali: Pinakes Text}, YEAR = {2010}, ABSTRACT = {aaa}, KEYWORDS = {Filologia computazionale}, PAGES = {254}, URL = {https://publications.cnr.it/doc/112959}, VOLUME = {3. 2010}, PUBLISHER = {L'Erma di Bretschneider (Roma, Italia)}, ISSN = {1825-8980}, ISBN = {9788864940205}, CONFERENCE_NAME = {Metodologie Informatiche per le Scienze Umane}, CONFERENCE_PLACE = {Reggio Calabria}, CONFERENCE_DATE = {05/12/2008}, BOOKTITLE = {Polis (Roma)}, } @INPROCEEDINGS{BUNT_2010_INPROCEEDINGS_BACCFHPPRSTL_84794, AUTHOR = {Bunt, H. and Alexandersson, J. and Carletta, J. and Choe, J. and Fang, A. C. and Hasida, K. and Petukhova, V. and Popescu Belis, A. and Romary, L. and Soria, C. and Traum, D. and Lee, K.}, TITLE = {Towards an ISO Standard for Dialogue Act Annotation}, YEAR = {2010}, ABSTRACT = {This paper describes an ISO project developing an international standard for annotating dialogue with semantic information, in particular concerning the communicative functions of the utterances, the kind of content they address, and the dependency relations to what was said and done earlier in the dialogue. The project, registered as ISO 24617-2 Semantic annotation framework, Part 2: Dialogue acts", is currently at DIS stage.}, KEYWORDS = {Dialogue, Corpus (creation, annotation, etc.), Semantics}, PAGES = {2548-2555}, URL = {https://publications.cnr.it/doc/84794}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, BOOKTITLE = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{CALZOLARI_2010_INPROCEEDINGS_C_84767, AUTHOR = {Calzolari, N.}, TITLE = {Introduction of the LREC 2010 Conference Chair}, YEAR = {2010}, KEYWORDS = {Language Resources and Evaluation}, URL = {https://publications.cnr.it/doc/84767}, CONFERENCE_NAME = {7th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{CALZOLARI_2010_INPROCEEDINGS_CS_84792, AUTHOR = {Calzolari, N. and Soria, C.}, TITLE = {Preparing the field for an Open Resource Infrastructure: the role of the FLaReNet Network of Excellence}, YEAR = {2010}, ABSTRACT = {In order to overcome the fragmentation that affects the field of Language Resources and Technologies, an Open and Distributed Resource Infrastructure is the necessary step for building on each other achievements, integrating resources and technologies and avoiding dispersed or conflicting efforts. Since this endeavour represents a true cultural turnpoint in the LRs field, it needs a careful preparation, both in terms of acceptance by the community and thoughtful investigation of the various technical, organisational and practical aspects implied. To achieve this, we need to act as a community able to join forces on a set of shared priorities and we need to act at a worldwide level. FLaReNet - Fostering Language Resources Network - is a Thematic Network funded under the EU eContent program that aims at developing the needed common vision and fostering a European and International strategy for consolidating the sector, thus enhancing competitiveness at EU level and worldwide. In this paper we present the activities undertaken by FLaReNet in order to prepare and support the establishment of such an Infrastructure, which is becoming now a reality within the new MetaNet initiative.}, KEYWORDS = {LR Infrastructures and Architectures, LR national/international projects, organizational/policy issues}, PAGES = {1295-1300}, URL = {https://publications.cnr.it/doc/84792}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {19-21/05/2010}, BOOKTITLE = {Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10)}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{CALZOLARI_2010_INPROCEEDINGS_CSDGQRCMP_84809, AUTHOR = {Calzolari, N. and Soria, C. and Del Gratta, R. and Goggi, S. and Quochi, V. and Russo, I. and Choukri, K. and Mariani, J. and Piperidis, S.}, TITLE = {The LREC Map of Language Resources and Technologies}, YEAR = {2010}, ABSTRACT = {In this paper we present the LREC Map of Language Resources (data and tools), an innovative feature introduced in conjunction with the LREC 2010 Conference. The purpose of the Map is to shed light on the vast amount of resources that represent the background of the research presented at LREC, in the attempt to fill in a gap in the community knowledge about the resources that are used or created worldwide. It also aims at a change of culture in the field, actively engaging each researcher in the documentation task about resources. The Map has been developed on the basis of the information provided by LREC authors during the submission of papers to the LREC 2010 conference and the LREC workshops, and contains information about almost 2000 resources. The paper illustrates the motivation behind this initiative, its main characteristics, its relevance and future impact in the field, the metadata used to describe the resources, and finally presents some of the most relevant findings.}, KEYWORDS = {LR national/international projects, organizational/policy issues}, PAGES = {949-956}, URL = {http://www.lrec-conf.org/proceedings/lrec2010/index.html}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {LREC 2010 Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-23 May 2010}, BOOKTITLE = {LREC'10-Seventh International Conference on Language Resources and Evaluation. Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{CASELLI_2010_INPROCEEDINGS_CP_84766, AUTHOR = {Caselli, T. and Prodanof, I.}, TITLE = {Annotating Event Anaphora: A Case Study}, YEAR = {2010}, KEYWORDS = {Anaphora, Coreference, Corpus (creation, annotation, etc.), Semantics}, URL = {https://publications.cnr.it/doc/84766}, CONFERENCE_NAME = {Seventh conference on International Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{CIERI_2010_INPROCEEDINGS_CCCLLPIP_84788, AUTHOR = {Cieri, C. and Choukri, K. and Calzolari, N. and Langendoen, D. T. and Leveling, J. and Palmer, M. and Ide, N. and Pustejovsky, J.}, TITLE = {A Road Map for Interoperable Language Resource Metadata}, YEAR = {2010}, ABSTRACT = {LRs remain expensive to create and thus rare relative to demand across languages and technology types. The accidental re-creation of an LR that already exists is a nearly unforgiveable waste of scarce resources that is unfortunately not so easy to avoid. The number of catalogs the HLT researcher must search, with their different formats, make it possible to overlook an existing resource. This paper sketches the sources of this problem and outlines a proposal to rectify along with a new vision of LR cataloging that will to facilitates the documentation and exploitation of a much wider range of LRs than previously considered.}, KEYWORDS = {LR national/international projects, organizational/policy issues, LR Infrastructures and Architectures}, PAGES = {2506-2509}, URL = {https://publications.cnr.it/doc/84788}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-23 May 2010}, } @INPROCEEDINGS{DELGRATTA_2010_INPROCEEDINGS_DDBCEMQSTC_84782, AUTHOR = {Del Gratta, R. and D'Onofrio, L. and Bartolini, R. and Caselli, T. and Enea, A. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A. and Calzolari, N.}, TITLE = {A Web-based Architecture for Interoperability of Lexical Resources}, YEAR = {2010}, ABSTRACT = {In this paper we present aWeb Service Architecture for managing high level interoperability of Language Resources (LRs) by means of a Service Oriented Architecture (SOA) and the use of ISO standards, such as ISO LMF. We propose a layered architecture which separates the management of legacy resources (data collection) from data aggregation (workflow) and data access (user requests). We provide a case study to demonstrate how the proposed architecture is capable of managing data exchange among different lexical services in a coherent way and show how the use of a lexical standard becomes of primary importance when a protocol of interoperability is defined.}, KEYWORDS = {Interoperability, Web sercives, Lexical resources}, PAGES = {53-62}, URL = {http://weblab.iit.cnr.it/kyoto/www2.let.vu.nl/twiki/pub/Kyoto/Publications/icgl2010_DOnofrioetal.pdf}, PUBLISHER = {City university of Hong Kong press (Hong Kong, CHN)}, ISBN = {978-962-442-323-5}, CONFERENCE_NAME = {2nd International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {18-20 January 2010}, BOOKTITLE = {2nd International Conference on Global Interoperability for Language Resources, ICGL 2010}, EDITOR = {Fang, A. C. and Ide, N. and Webster, J.}, } @INPROCEEDINGS{FORNACIARI_2010_INPROCEEDINGS_FCF_184482, AUTHOR = {Fornaciari, A. and Cignoni, L. and Fornaciari, G.}, TITLE = {STUDENTS' PARTICIPATION IN AN ARCHAEOANTHROPOLOGY COURSE USING A CONTENT AND LANGUAGE INTEGRATED LEARNING (CLIL) METHODOLOGY}, YEAR = {2010}, ABSTRACT = {In this paper we present the results of an ongoing archaeoanthropology project which was started in the year 2007 at Benabbio, a village located near Lucca, in northern Tuscany, Italy. Aim of the project is to exhume individuals buried in the medieval cemetery near the church of the Castle of Benabbio (also in course of excavation), and of people who died in the village during the cholera epidemic of the year 1855. Burials and artefacts, ranging between 12th and 19th centuries, have been recorded on a computer database to provide information about the cultural and physical aspects of this ancient human group. The discovery of some well preserved medieval houses led to the decision to examine the settlement in its entirety, exploring the spatial and chronological development of the site. Archaeological exploration of this vast area will involve not only the cemetery of the castle but also the houses and the manor, investigating the origins of the settlement and the early stages of encastlement, by which the local lord could dominate the surrounding countryside and control his neighbours. It will be possible to reconstruct the different phases of organization of the settlement and the life-style and diseases of the inhabitants of the castle. Furthermore, this project is an important testbed for the development of field techniques such as balloon photography, high-resolution georadar survey, GIS, as well as special laboratory techniques for palaeopathology such as computerized tomography (CT), electron microscopy (EM), stable isotope palaeonutrition and ancient DNA (aDNA), that can be used to diagnose ancient diseases. This paper has a dual purpose: on the one hand, it makes the first results of the excavations available to the widest possible audience; on the other hand, it describes the application of a CLIL methodology which uses a foreign language to study a specific discipline. In this case, English is used as supporting language to carry out a number of tasks on the archaeological site, within an environment of interaction where the peers, the archaeologist, the content and language teachers are working in collaboration. The language is embedded within the activities, in particular two separate groups of students will be working on the two different sites (the cemetery and the residential complex), communicating in English with students graduating in the same discipline in Great Britain and the United States. The two groups will be asked to keep a running diary of the daily activities, and to document this information, using the blogging features of Office Word, a familiar device offering a wide range of writing tools that can help create an ongoing online record of their experiences. The project, organized and funded by the Division of Palaeopathogy of Pisa University, has been approved by the Faculty of Humanities of the University of Pisa.}, PAGES = {2684-2692}, URL = {https://publications.cnr.it/doc/184482}, ISBN = {978-84-613-5536-5}, CONFERENCE_NAME = {International Technology, Education and Development Conference}, CONFERENCE_PLACE = {Valencia, Spagna}, CONFERENCE_DATE = {8-10 marzo 2010}, BOOKTITLE = {Proceedings of International Technology, Education and Development Conference}, EDITOR = {Chova, L. G. and Belenguer, D. M. and Torres, I. C.}, } @INPROCEEDINGS{GIGLIOTTA_2010_INPROCEEDINGS_GPN_30891, AUTHOR = {Gigliotta, O. and Pezzulo, G. and Nolfi, S.}, TITLE = {Emergence of an internal model in evolving robots subjected to sensory deprivation}, YEAR = {2010}, ABSTRACT = {In this study we show how simulated robots evolved to display a navigation skills can spontaneously develop an internal model and rely on it to complete their task when sensory stimulation is temporarily unavailable. The analysis of some of the best evolved agents indicates that their internal model operates by anticipating functional properties of the next sensory state rather than the exact state that sensors would have assumed. The characteristics of the states that are anticipated and of the sensory-motor rules that determine how the agents react to the experienced states, however, ensure that the agents produce very similar behaviour during normal and blind phases in which sensory stimulation is available or is self-generated by the agent itself, respectively. The characteristics of the agents' internal models also ensure an effective transition during the phases in which agents' internal dynamics is decoupled and re-coupled with the sensory-motor flow.}, KEYWORDS = {internal model, neural networks, evolutionary robotics}, PAGES = {575-586}, URL = {https://publications.cnr.it/doc/30891}, VOLUME = {6226}, DOI = {10.1007/978-3-642-15193-4_54}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, ISBN = {978-3-642-15193-4}, CONFERENCE_NAME = {11th International Conference on Simulation of Adaptive Behavior, SAB 2010}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {August 25-28, 2010}, BOOKTITLE = {From Animals to Animats 11}, } @INPROCEEDINGS{GIOVANNETTI_2010_INPROCEEDINGS_G_84798, AUTHOR = {Giovannetti, E.}, TITLE = {An unsupervised approach for semantic relation interpretation}, YEAR = {2010}, ABSTRACT = {In this work we propose a hybrid unsupervised approach for semantic relation extraction from Italian and English texts. The system takes as input pairs of "distributionally similar" terms, possibly involved in a semantic relation. To validate and label the anonymous relations holding between the terms in input, the candidate pairs of terms are looked for on the Web in the context of reliable lexico-syntactic patterns. This paper focuses on the definition of the patterns, on the measures used to assess the reliability of the suggested specific semantic relation and on the evaluation of the implemented system.}, KEYWORDS = {Information Extraction, Information Retrieval, Knowledge Representation, Ontologies}, PAGES = {3811-3816}, URL = {https://publications.cnr.it/doc/84798}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {19-21 maggio 2010}, BOOKTITLE = {Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10)}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odijk, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{HAWAYEK_2010_INPROCEEDINGS_HDC_84797, AUTHOR = {Hawayek, A. and Del Gratta, R. and Cappelli, G.}, TITLE = {A bilingual dictionary Mexican Sign Language-Spanish/Spanish-Mexican Sign Language}, YEAR = {2010}, ABSTRACT = {We present a three-part bilingual specialized dictionary Mexican Sign Language-Spanish / Spanish-Mexican Sign Language. This dictionary will be the outcome of a three-years agreement between the Italian "Consiglio Nazionale delle Ricerche" and the Mexican Conacyt. Although many other sign language dictionaries have been provided to deaf communities, there are no Mexican Sign Language dictionaries in Mexico, yet. We want to stress on the specialized feature of the proposed dictionary: the bilingual dictionary will contain frequently used general Spanish forms along with scholastic course specific specialized words whose meanings warrant comprehension of school curricula. We emphasize that this aspect of the bilingual dictionary can have a deep social impact, since we will furnish to deaf people the possibility to get competence in official language, which is necessary to ensure access to school curriculum and to become full-fledged citizens. From a technical point of view, the dictionary consists of a relational database, where we have saved the sign parameters and a graphical user interface especially designed to allow deaf children to retrieve signs using the relevant parameters and,thus, the meaning of the sign in Spanish.}, KEYWORDS = {Sign Language Recognition/Generation, Lexicon, Lexical database, Acquisition}, PAGES = {3055-3062}, URL = {http://www.lrec-conf.org/proceedings/lrec2010/index.html}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-23/05/2010}, } @INPROCEEDINGS{JEZEK_2010_INPROCEEDINGS_JQ_84783, AUTHOR = {Jezek, E. and Quochi, V.}, TITLE = {Capturing Coercions in Texts: a First Annotation Exercise}, YEAR = {2010}, ABSTRACT = {In this paper we report the first results of an annotation exercise of argument coercion phenomena performed on Italian texts. Our corpus consists of ca 4000 sentences from the PAROLE sottoinsieme corpus (Bindi et al. 2000) annotated with Selection and Coercion relations among verb-noun pairs formatted in XML according to the Generative Lexicon Mark-up Language (GLML) format (Pustejovsky et al., 2008). For the purposes of coercion annotation, we selected 26 Italian verbs that impose semantic typing on their arguments in either Subject, Direct Object or Complement position. Every sentence of the corpus is annotated with the source type for the noun arguments by two annotators plus a judge. An overall agreement of 0.87 kappa indicates that the annotation methodology is reliable. A qualitative analysis of the results allows us to outline some suggestions for improvement of the task: 1) a different account of complex types for nouns has to be devised and 2) a more comprehensive account of coercion mechanisms requires annotation of the deeper meaning dimensions that are targeted in coercion operations, such as those captured by Qualia relations.}, KEYWORDS = {Corpus (creation, annotation, etc.), Knowledge Discovery/Representation, Semantics}, PAGES = {1464-1471}, URL = {http://www.lrec-conf.org/proceedings/lrec2010/summaries/713.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-23 Maggio 2010}, BOOKTITLE = {Proceedings of the Seventh International Conference on Language Resources and Evaluation-LREC'10}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{MARINELLI_2010_INPROCEEDINGS_M_84804, AUTHOR = {Marinelli, R.}, TITLE = {Lexical Resources and Ontological Classifications for the Recognition of Proper Names Sense Extension}, YEAR = {2010}, ABSTRACT = {Particular uses of PNs with sense extension are focussed on and inspected taking into account the presence of PNs in lexical semantic databases and electronic corpora. Methodology to select ad include PNs in semantic databases is described; the use of PNs in corpora of Italian Language is examined and evaluated, analyzing the behaviour of a set of PNs in different periods of time. Computational resources can facilitate our study in this field in an effective way by helping codify, translate and handle particular cases of polysemy, but also guiding in metaphorical and metonymic sense recognition, supported by the ontological classification of the lexical semantic entities. The relationship between the "abstract" and the "concrete", which is at the basis of the Conceptual Metaphor perspective, can be considered strictly related to the variation of the ontological values found in our analysis of the PNs and their belonging classes which are codified in the ItalWordNet database.}, KEYWORDS = {Lexicon, Lexical Database, Corpus creation and annotation, Ontologies}, PAGES = {518-522}, URL = {http://www.lrec-conf.org/proceedings/lrec2010/index.html}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation LREC2010}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-23 May 2010}, BOOKTITLE = {LREC'10-Seventh International Conference on Language Resources and Evaluation (Valletta, Malta, 17-23 May 2010)}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{MARINELLI_2010_INPROCEEDINGS_M_169643, AUTHOR = {Marinelli, R.}, TITLE = {Ontological variation and sense extension in proper names}, YEAR = {2010}, ABSTRACT = {The lexical coverage of the semantic database ItalWordNet has been extended by adding to the first nucleus of nouns and verbs, a set of adjectives and adverbs and also a set of Proper Names (PNs). Particular cases of polysemy were found in codifying PNs. Sense variation in PNs was verified in linguistic resources and new semantic relations were created in order to represent the sense extension phenomenon in the database. The relationship between the PNs present in IWN and the ontological classification of their belonging classes was highlighted: correspondence between sense variations and ontological variations was examined and the reference to the ontological structure of IWN was of help in recognizing many cases of PNs sense extension.}, KEYWORDS = {lexical semantic databases, proper names, cognitive linguistics, metaphor, metonymy}, PAGES = {687-698}, URL = {http://dialnet.unirioja.es/servlet/articulo?codigo=3411098}, ISBN = {978-84-8158-479-0}, CONFERENCE_NAME = {XXVIII AESLA Conference "Analizar datos Describir variacion-Analysing data Describing variation", Vigo, University of Vigo. (Vigo, Spain, 15 th-17 th April 2010)}, CONFERENCE_PLACE = {Vigo, Spain}, CONFERENCE_DATE = {15 th-17 th April 2010}, BOOKTITLE = {XXVIII AESLA Conference, "Analizar datos Describir variacion-Analysing data Describing variation", Vigo, University of Vigo. (Vigo, Spain, 15 th-17 th April 2010). Proceedings, vol. 1}, EDITOR = {Alonso, J. L. B. and Alvarez, D. G. and Torrado, U. K. and Insua, A. E. M. and Guerra, J. P. and Martinez, E. R. and Vasquez, R. R.}, } @INPROCEEDINGS{MARINELLI_2010_INPROCEEDINGS_MRSC_84770, AUTHOR = {Marinelli, R. and Roventini, A. and Spadoni, G. and Cucurullo, S.}, TITLE = {Lexical Semantic Resources in a Terminological Network}, YEAR = {2010}, ABSTRACT = {A research has been carried on and is still in progress aimed at the construction of three specialized lexicons organized as databases of relational type. The three databases contain terms belonging to the specialized knowledge fields of maritime terminology (technical-nautical and maritime transport domain), taxation law, and labour law with union labour rules, respectively. The EuroWordNet/ItalWordNet model was firstly used to structure the terminological database of maritime domain. The methodology experimented for its construction was applied to construct the next databases. It consists in i) the management of corpora of specialized languages and ii) the use of generic databases to identify and extract a set of candidate terms to be codified in the terminological databases. The three specialized resources are described highlighting the various kinds of lexical semantic relations linking each term to the others within the single terminological database and to the generic resources WordNet and ItalWordNet. The construction of these specialized lexicons was carried on in the framework of different projects; but they can be seen as a first nucleus of an organized network of generic and specialized lexicons with the purpose of making the meaning of each term clearer from a cognitive point of view.}, KEYWORDS = {Lexicon, Semantics, Lexical database, Ontologies}, PAGES = {2288-2291}, URL = {https://publications.cnr.it/doc/84770}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {La Valletta, Malta}, CONFERENCE_DATE = {17-23 May 2010}, BOOKTITLE = {LREC'10-Seventh International Conference on Language Resources and Evaluation (La Valletta, Malta, 17-23 May 2010). Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{MARINELLI_2010_INPROCEEDINGS_MSC_84795, AUTHOR = {Marinelli, R. and Spadoni, G. and Cucurullo, S.}, TITLE = {Adding information to a terminological database by means of image files}, YEAR = {2010}, ABSTRACT = {A lexical semantic database containing terms belonging to the specialized lexicon of the maritime navigation and maritime transport was built according to WordNet/EuroWordNet model. Our paper present a project planning the enrichment of the terminological database by means of a set of images. A short description is given about a) the structure of the terminological database and the domain conceptual modelling; b) the various features of the database management tool, and, among all, the possibility of visualizing, on demand, the image which is associated with the term being sought, contributing to clarify and refine the meaning of the term, increasing its information and communication effectiveness.}, KEYWORDS = {terminology, lexical databases, computational resources, images}, PAGES = {347-353}, URL = {http://www.cfilt.iitb.ac.in/gwc2010/pdfs/30_adding_image_info_to_wordnet__Marinelli.pdf}, PUBLISHER = {Narosa Publishing House (New Delhi, IND)}, ISBN = {978-81-8487-083-1}, CONFERENCE_NAME = {5th Global WordNet Conference}, CONFERENCE_PLACE = {Mumbai}, CONFERENCE_DATE = {January 31-February 4 2010}, BOOKTITLE = {Proceedings of the 5th Global WordNet Conference, Mumbai India, 2010}, EDITOR = {Bhattacharyya, P. and Fellbaum, C. and Vossen, P.}, } @INPROCEEDINGS{MARZI_2010_INPROCEEDINGS_MPS_84790, AUTHOR = {Marzi, C. and Pardelli, G. and Sassi, M.}, TITLE = {Grey Literature and Computational Linguistics: From Paper to Net}, YEAR = {2010}, ABSTRACT = {The advent and exponential development of the World Wide Web has led to an increasing availability of unstructured knowledge and distributed information sources, meeting general public requirements that are hardly addressed by other more traditional information channels. This trend has concurrently raised a considerable interest in the application of Computational Linguistics (CL) methodologies to document access and retrieval, as they offer the unprecedented opportunity to make the subjective, user-centred information demands of Net citizens meet the ever changing and heterogeneous information flow of the web. Over the last five years, more and more Italian Universities have introduced CL courses into their Humanities curricula, making available on-line teaching materials, tutorials and language engineering software that appear to supply the lack of offer from traditional Italian publishing houses. In this paper, we consider in some detail the role played by this type of Grey Literature in bringing up a wider and increasingly more aware community of web users in Italy.}, KEYWORDS = {Computational Linguistics, Grey, Web-based information}, PAGES = {81-84}, URL = {https://publications.cnr.it/doc/84790}, VOLUME = {11}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISSN = {1386-2316}, ISBN = {978-90-77484-13-5}, CONFERENCE_NAME = {Eleventh International Conference on Grey Literature. The Grey Mosaic, Piecing it All Together}, CONFERENCE_PLACE = {Washington, DC}, CONFERENCE_DATE = {14-15 dicembre 2009}, BOOKTITLE = {The Grey Mosaic, Piecing it All Together}, EDITOR = {Farace, D. J. and Frantzen, J.}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_M_84772, AUTHOR = {Montemagni, S.}, TITLE = {Esplorazioni computazionali nello spazio della variazione lessicale in Toscana}, YEAR = {2010}, ABSTRACT = {Il passaggio dalla descrizione della distribuzione geografica di singole parole a un livello di descrizione più astratto volto a formulare generalizzazioni relative alla variazione diatopica è oggi reso possibile dal ricorso a tecnologie linguistico-computazionali affiancate da tecniche di analisi statistica multivariata. L'uso combinato di queste tecniche si è dimostrato particolarmente promettente nello studio della variazione linguistica (principalmente fonetica e lessicale) di diverse lingue e dialetti, tipologicamente anche molto distanti. Tali tecniche sono state anche proficuamente utilizzate per l'analisi del contatto tra varietà linguistiche e una norma di riferimento. L'articolo si colloca all'interno di questo filone di ricerca, riportando i risultati di esplorazioni computazionali nello spazio della variazione lessicale in Toscana. Tali esplorazioni intendono ripercorrere i passi di Gabriella Giacomelli, ideatrice e direttrice dell'impresa dell'Atlante Lessicale Toscano (ALT) e profonda conoscitrice della realtà dialettale toscana, nel suo studio sulle aree lessicali toscane (Giacomelli 1975). Questa rivisitazione dello studio sulle aree lessicali toscane di Giacomelli si è avvalsa, più di tre decenni dopo, di due importanti elementi di novità, ovvero: i) sul versante dei dati, si è basata sull'intero corpus dei materiali dialettali dell'ALT disponibili nel sito di ALT-Web (http://serverdbt.ilc.cnr.it/altweb); ii) sul versante degli strumenti di analisi, è stata condotta attraverso l'uso combinato di tecnologie linguistiche e tecniche di analisi statistica multivariata che rendono possibile un'analisi aggregata di corpora di materiali dialettali anche di vaste dimensioni. Lo studio si focalizza su due dei tre aspetti indicati come fondamentali da Giacomelli per l'analisi delle aree lessicali toscane, ovvero quello dei "rapporti interni, tra aree subregionali" e quello dei "rapporti con la lingua".}, KEYWORDS = {Computational Dialectology, Lexical Variation}, PAGES = {609-634}, URL = {https://publications.cnr.it/doc/84772}, PUBLISHER = {Centro Editoriale e Librario (Arcavacata di Rende, ITA)}, ISBN = {9788874581030}, CONFERENCE_NAME = {Convegno 'Parole. Il lessico come strumento per organizzare e trasmettere gli etnosaperi'}, CONFERENCE_PLACE = {Rende, Università della Calabr}, CONFERENCE_DATE = {2-4 luglio 2009}, BOOKTITLE = {Parole. Il lessico come strumento per organizzare e trasmettere gli etnosaperi}, EDITOR = {Prantera, N. and Mendicino, A. and Citraro, C.}, } @INPROCEEDINGS{PARETI_2010_INPROCEEDINGS_PP_84786, AUTHOR = {Pareti, S. and Prodanof, I.}, TITLE = {Annotating Attribution Relations: Towards an Italian Discourse Treebank}, YEAR = {2010}, KEYWORDS = {Information Extraction, Information Retrieval}, URL = {https://publications.cnr.it/doc/84786}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{PASSAROTTI_2010_INPROCEEDINGS_PD_84781, AUTHOR = {Passarotti, M. and Dell'Orletta, F.}, TITLE = {Improvements in Parsing the Index Thomisticus Treebank. Revision, Combination and a Feature Model for Medieval Latin}, YEAR = {2010}, KEYWORDS = {Parsing, Corpus (creation, annotation, etc.)}, URL = {https://publications.cnr.it/doc/84781}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, } @INPROCEEDINGS{PEZZULO_2010_INPROCEEDINGS_PBLB_84776, AUTHOR = {Pezzulo, G. and Barca, L. and Lamberti Bocconi, A. and Borghi, A. M.}, TITLE = {Motor Simulation in a Memory Task: Evidence from Rock Climbing}, YEAR = {2010}, KEYWORDS = {simulation, affordance, embodied cognition, grounded cognition, canonical neurons, mirror neurons, motor memory, memory for actions, motor chunks}, URL = {https://publications.cnr.it/doc/84776}, CONFERENCE_NAME = {Proceedings of the 2010 Annual Meeting of the Cognitive Science Society}, CONFERENCE_PLACE = {Portland, Oregon}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{PICCHI_2010_INPROCEEDINGS_PS_84806, AUTHOR = {Picchi, E. and Sassolini, E.}, TITLE = {"Text power": Tools for the cultural heritage}, YEAR = {2010}, ABSTRACT = {This article presents NLP techniques (text mining, text analysis) to create tools for the avaluation, analysis and classification of text materials available on the web. In particular we developed tools for the automatic extraction of mistic relevant information related to the cultural heritage domain and tools for linguistic resouces creation. On this knowledge basis, we also developed a system for text browsing.}, KEYWORDS = {information extraction, named entity recognition, text bvrowsing}, PAGES = {435-439}, URL = {http://www.cairocongress.com/}, VOLUME = {1}, ISBN = {978-88-96680-31-5}, CONFERENCE_NAME = {4-th Intl. Congr. Science and Technology for the Safeguard of Cultural Heritage in the Mediterranean Basin}, CONFERENCE_PLACE = {Il Cairo}, CONFERENCE_DATE = {6-7-8 dicembre 2009}, BOOKTITLE = {Proceedings in: CHC 2010-4-th Intl. Congr. Science and Technology for the Safeguard of Cultural Heritage in the Mediterranean Basin (Il Cairo, 6-7-8/12/2009)}, EDITOR = {Ferrari, A.}, } @INPROCEEDINGS{PUSTEJOVSKY_2010_INPROCEEDINGS_PRPJBQ_84771, AUTHOR = {Pustejovsky, J. and Rumshisky, A. and Plotnick, A. and Jezek, E. and Batiukova, O. and Quochi, V.}, TITLE = {SemEval-2010 Task 7: Argument Selection and Coercion}, YEAR = {2010}, ABSTRACT = {The paper describes the Argument Selection and Coercion task for the SemEval-2010 evaluation exercise, which involves characterizing the type of compositional operation that exists between a predicate and the arguments it selects. Specifically, the goal is to identify whether the type that a verb selects is satisfied directly by the argument, or whether the argument must change type to satisfy the verb typing.}, KEYWORDS = {semantic annotation, verb coercion}, URL = {http://www.aclweb.org/anthology/S10-1005}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-932432-70-1}, CONFERENCE_NAME = {Fifth International Workshop on Semantic Evaluation (SemEval 2010)}, CONFERENCE_PLACE = {Uppsala}, CONFERENCE_DATE = {15-16 Luglio 2010}, BOOKTITLE = {Proceedings of the 5th International Workshop on Semantic Evaluation}, EDITOR = {Erk, K. and Strapparava, C.}, } @INPROCEEDINGS{RUIMY_2010_INPROCEEDINGS_R_84784, AUTHOR = {Ruimy, N.}, TITLE = {Formalizzazione, strutturazione e standardizzazione in lessicografia computazionale: un esempio italiano}, YEAR = {2010}, ABSTRACT = {Descrivere una lingua naturale significa raccogliere un numero sterminato di informazioni. In effetti, per la loro ricchezza e complessità, le lingue naturali richiedono una descrizione linguistica a vari livelli. Per la lingua scritta, ad esempio, le informazioni morfologiche, sintattiche e semantiche sono essenziali. Ad ogni livello di conoscenza linguistica, esiste inoltre un'infinità di informazioni idiosincratiche che, sommate alle proprietà condivise da intere classi di parole, rendono la mole complessiva dei dati ingestibile senza un'adeguata strutturazione. Perché un utente umano possa utilizzare, anche in minima parte, questi dati, la descrizione linguistica deve essere coerente, ordinata e formulata in un metalinguaggio descrittivo esplicito. L'essere umano, tuttavia, possiede la capacità di fare inferenze e di interpretare contenuti non immediatamente afferrabili; se invece la gestione e l'uso di una descrizione linguistica sono demandati ad un elaboratore, i suddetti requisiti diventano imprescindibili. Nell'ambito delle Tecnologie del Linguaggio, lo sviluppo di applicazioni richiede un'infrastruttura di cui le risorse lessicali costituiscono l'elemento fondamentale. I sistemi di reperimento ed estrazione di informazioni da corpora, di question answering, di traduzione automatica, ecc. presuppongono, infatti, l'accesso a vasti depositi di conoscenza lessicale espressa in un linguaggio formale sufficientemente ricco in capacità espressive, sotto forma di una rappresentazione rigorosamente strutturata, univoca e computazionalmente trattabile. La formalizzazione non può tuttavia essere estesa a tutti gli aspetti della lingua; d'altra parte, non tutte le proprietà formalizzabili sono pertinenti ad ogni tipo di applicazione. La formalizzazione deve quindi avvenire in funzione di un obiettivo preciso. Trattamento del linguaggio generico o di specializzazione? Traduzione automatica o indicizzazione di testi? I fenomeni da analizzare e le informazioni rilevanti sono diversi a seconda del tipo di linguaggio e di applicazione. Il livello di granularità dell'informazione dipende anch'esso dall'obiettivo, oltre che da fattori di costo, ma è soprattutto determinato dalla capacità di calcolo del sistema: non ha senso introdurre nella descrizione linguistica distinzioni molto sottili ma non computabili. L'assoluta necessità di adottare un formalismo conforme a degli standard internazionali per la costruzione di risorse lessicali è ormai ampiamente riconosciuta nella comunità scientifica. L'uso di un modello formale e standardizzato di rappresentazione permette infatti una formulazione rigorosa e organicamente strutturata della conoscenza lessicale, consentendo quindi la riusabilità del lessico nonché la sua interoperabilità ed integrazione con altre risorse lessicali. Il lessico elettronico PAROLE-SIMPLE-CLIPS è la più vasta risorsa lessicale multilivello dell'italiano; è stato costruito in base agli standard internazionali definiti nel modello lessicale PAROLE-SIMPLE che ha permesso lo sviluppo di dodici lessici per altrettante lingue europee. Il lessico è articolato in quattro moduli indipendenti che corrispondono rispettivamente ai livelli fonologico, morfologico, sintattico e semantico di rappresentazione linguistica. La completa descrizione di un'unità lessicale consta pertanto di un minimo di quattro entrate interrelate, ognuna contenente un ricco insieme strutturato di informazioni inerenti al livello di descrizione che la ospita. I moduli sintattici e semantici, particolarmente ricchi ed innovativi, consentono una descrizione approfondita del comportamento delle unità lessicali che mette in risalto la stretta correlazione tra i due livelli. A livello sintattico, un'entrata descrive un comportamento sintattico di un'unità lessicale, in termini di proprietà inerenti e contestuali. Queste ultime sono espresse in un quadro di sottocategorizzazione nel quale sono formalizzate le proprietà sintattiche di ogni complemento. A livello semantico, il quadro teorico è quello del Lessico Generativo (Pustejovsky, 1995). Il lessico semantico è strutturato in base ad un'ontologia i cui tipi sono organizzati secondo principi ortogonali - mediante i quattro ruoli della Struttura Qualia (formale, costitutivo, telico ed agentivo) - al fine di catturare la multidimensionalità del significato. Ogni singolo senso è descritto in un'entrata semantica caratterizzata da un'ampia tipologia di informazioni di cui fanno parte il dominio d'uso, la classe azionale dei verbi, la formalizzazione della polisemia regolare e una fitta rete di relazioni semantiche basate sui ruoli qualia. La struttura argomentale dei termini predicativi è definita in termini di ruolo semantico e preferenze di selezioni sugli argomenti. Il collegamento fra i livelli semantico e sintattico avviene attraverso la proiezione della struttura argomentale sul quadro di sottocategorizzazione, mediante un raffinato sistema di coindicizzazione tra argomenti e complementi.}, KEYWORDS = {Lessicografia Computazionale, Formalizzazione, Strutturazione}, PAGES = {246-256}, URL = {https://publications.cnr.it/doc/84784}, VOLUME = {07}, PUBLISHER = {CUEC Editrice (Cagliari, ITA)}, ISBN = {978-88-8467-597-2}, CONFERENCE_NAME = {Forme e formalizzazioni Atti del XVI congresso nazionale}, CONFERENCE_PLACE = {Cagliari}, CONFERENCE_DATE = {September 10-12 2009}, BOOKTITLE = {Forme e formalizzazioni}, EDITOR = {Storari, G. P. and Gola, E.}, } @INPROCEEDINGS{RUSSO_2010_INPROCEEDINGS_R_84774, AUTHOR = {Russo, I.}, TITLE = {Discovering Polarity for Ambiguous and Objective Adjectives through Adverbial Modification}, YEAR = {2010}, KEYWORDS = {Emotion Recognition/Generation, Corpus (creation, annotation, etc.), Semantics}, URL = {https://publications.cnr.it/doc/84774}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{SASSI_2010_INPROCEEDINGS_SPBCG_171547, AUTHOR = {Sassi, M. and Pardelli, G. and Biagioni, S. and Carlesi, C. and Goggi, S.}, TITLE = {A Digital Archive of Research Papers in Computer Science}, YEAR = {2010}, ABSTRACT = {This paper presents the results of a terminological work conducted by the authors on a Digital Archives Net of the Italian National Research Council (CNR) in the field of Computer Science. In particular, the research tends to analyse the use of certain terms in Computer Science in order to verify their change over the time with the aim of retrieving from the net the very essence of documentation. Its main source is a reference corpus made up of 13,500 documents which collects the scientific productions of three CNR research Institutes. They are ISTI (Institute of Information Science and Technologies), IIT (Institute of Informatics and Telematics) and ILC (Institute of Computational Linguistics), all of them born from the "Centro Studi sulle Calcolatrici Elettroniche (CSCE)" and now belonging to the CNR Department of Information \& Communication Technologies and Cultural Identity. This study is divided in three sections: 1) an introductory one dedicated to the data extracted from the scientific documentation: the data have in common the use of some terms proper of the Computer Science lexicon although these term belong to different branches (Linguistics, Informatics and Telematics); 2) the second section is devoted to the description of the contents managed by the PUMA (Publication Management System) system; 3) the third part contains a statistical representation of terms extracted from archive: some comparison tables between the occurrences of the most used terms in the scientific documentation produced by the three Institutes will be created and diagrams with percentages about the most frequently used terms will be displayed too. Lastly, indexes and concordances will allow to reflect on the use of certain terms in this field and give possible keys for having access to the extraction of knowledge in the digital era.}, KEYWORDS = {Digital libraries, Document Classification, Text categorisation, Text mining, Natural Language Processing. Text analysis}, PAGES = {1245-1248}, URL = {http://www.lrec-conf.org/proceedings/lrec2010/summaries/945.html}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-23 Maggio 2010}, BOOKTITLE = {Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10)}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Rosner, M. and Tapias, D.}, } @INPROCEEDINGS{SASSOLINI_2010_INPROCEEDINGS_SC_84768, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {Cultural Heritage: Knowledge Extraction from Web Documents}, YEAR = {2010}, ABSTRACT = {This article presents the use of NLP techniques (text mining, text analysis) to develop specific tools that allow to create linguistic resources related to the cultural heritage domain. The aim of our approach is to create tools for the building of an online "knowledge network", automatically extracted from text materials concerning this domain. A particular methodology was experimented by dividing the automatic acquisition of texts, and consequently, the creation of reference corpus in two phases. In the first phase, on-line documents have been extracted from lists of links provided by human experts. All documents extracted from the web by means of automatic spider have been stored in a repository of text materials. On the basis of these documents, automatic parsers create the reference corpus for the cultural heritage domain. Relevant information and semantic concepts are then extracted from this corpus. In a second phase, all these semantically relevant elements (such as proper names, names of institutions, names of places, and other relevant terms) have been used as basis for a new search strategy of text materials from heterogeneous sources. In this case also specialized crawlers (TP-crawler) have been used to work on a bulk of text materials available on line.}, KEYWORDS = {Information Extraction, Information Retrieval, Text mining, Named Entity recognition}, PAGES = {3363-3368}, URL = {https://publications.cnr.it/doc/84768}, ISBN = {978-2-9517408-6-0}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-23/05/2010}, } @INPROCEEDINGS{SAVAS_2010_INPROCEEDINGS_SHMSC_84807, AUTHOR = {Savas, B. and Hayashi, Y. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {An LMF-based Web Service for Accessing WordNet-type Semantic Lexicons}, YEAR = {2010}, ABSTRACT = {This paper describes a Web service for accessing WordNet-type semantic lexicons. The central idea behind the service design is: given a query, the primary functionality of lexicon access is to present a partial lexicon by extracting the relevant part of the target lexicon. Based on this idea, we implemented the system as a RESTful Web service whose input query is specified by the access URI and whose output is presented in a standardized XML data format. LMF, an ISO standard for modeling lexicons, plays the most prominent role: the access URI pattern basically reflects the lexicon structure as defined by LMF; the access results are rendered based on Wordnet-LMF, which is a version of LMF XML-serialization. The Web service currently provides accesses to Princeton WordNet, Japanese WordNet, as well as the EDR Electronic Dictionary as a trial. To accommodate the EDR dictionary within the same framework, we modeled it also as a WordNet-type semantic lexicon. This paper thus propose modifications to LMF.}, KEYWORDS = {Standards for LRs, Lexicon, Lexical database, Web Services}, URL = {https://publications.cnr.it/doc/84807}, ISBN = {2-9517408-6-7}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {17-21/05/2010}, } @INPROCEEDINGS{SOROA_2010_INPROCEEDINGS_SALBVMLH_84769, AUTHOR = {Soroa, A. and Agirre, E. and López, D. L. O. and Bosma, W. and Vossen, P. and Monachini, M. and Lo, J. and Hsieh, S.}, TITLE = {Kyoto: An Integrated System for Specific Domain WSD}, YEAR = {2010}, ABSTRACT = {This document describes the preliminary release of the integrated Kyoto system for specific domain WSD. The system uses concept miners (Tybots) to extract domain-related terms and produces a domain-related thesaurus, followed by knowledge-based WSD based on wordnet graphs (UKB). The resulting system can be applied to any language with a lexical knowledge base, and is based on publicly available software and resources. Our participation in Semeval task #17 focused on producing running systems for all languages in the task, and we attained good results in all except Chinese. Due to the pressure of the time-constraints in the competition, the system is still under development, and we expect results to improve in the near future.}, KEYWORDS = {Semantic Annotation, Word-sense disambiguation}, PAGES = {417-420}, URL = {https://publications.cnr.it/doc/84769}, ISBN = {978-1-932432-70-1}, CONFERENCE_NAME = {SemeEval2010-5th International Workshop on Semantic Evaluation}, CONFERENCE_PLACE = {Uppsala, Sweden}, CONFERENCE_DATE = {15-16 Luglio 2010}, EDITOR = {Erk, K. and Strapparava, C.}, } @INPROCEEDINGS{STRIKLIEVERS_2010_INPROCEEDINGS_S_84800, AUTHOR = {Strik Lievers, F.}, TITLE = {From lexicological to lexicographical issues: Italian verbs with predicative complement}, YEAR = {2010}, KEYWORDS = {lexicography, event structure, Italian}, URL = {https://publications.cnr.it/doc/84800}, CONFERENCE_NAME = {Euralex 2010}, CONFERENCE_PLACE = {Leeuwarden, The Netherlands}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{STRIKLIEVERS_2010_INPROCEEDINGS_SJ_84791, AUTHOR = {Strik Lievers, F. and Jezek, E.}, TITLE = {Verbi sintagmatici in italiano antico e moderno: un'analisi corpus-based}, YEAR = {2010}, KEYWORDS = {Phrasal verbs, Italian, lexicography, lexical semantics}, URL = {https://publications.cnr.it/doc/84791}, CONFERENCE_NAME = {XXV Congrès International de Linguistique et de Philologie Romanes}, CONFERENCE_PLACE = {Innsbruck}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{TORAL_2010_INPROCEEDINGS_TBMS_84773, AUTHOR = {Toral, A. and Bracale, S. and Monachini, M. and Soria, C.}, TITLE = {Rejuvenating the ItalianWordNet: upgrading, standardising, extending}, YEAR = {2010}, ABSTRACT = {This paper reports on recent activities carried out within the KYOTO project aimed at enhancing the Italian WordNet Language Resource. On the one hand we study the formalisation of this lexicon according to the LMF ISO standard and explore its application into a real-world scenario by means of representing it in the WN-LMF dialect. On the other hand, we report on a semiautomatic procedure to upgrade the connections of the lexicon to WordNet, which obtains over 98% accuracy.}, KEYWORDS = {Lexical Resources, Standards for LRs}, URL = {http://www.globalwordnet-iitb2010.in/proceedings.php}, CONFERENCE_NAME = {5th Global Wordnet Conference}, CONFERENCE_PLACE = {Mumbai (India)}, CONFERENCE_DATE = {31/01-4/02-2010}, EDITOR = {Bhattacharyya, P. and Fellbaum, C. and Vossen, P.}, } @INPROCEEDINGS{TORAL_2010_INPROCEEDINGS_TMSCRBV_84801, AUTHOR = {Toral, A. and Monachini, M. and Soria, C. and Cuadros, M. and Rigau, G. and Bosma, W. and Vossen, P.}, TITLE = {Linking a domain thesaurus toWordNet and conversion toWordNet-LMF}, YEAR = {2010}, ABSTRACT = {We present a methodology to link domain thesauri to general-domain lexica. This is applied in the framework of the KYOTO project to link the Species2000 thesaurus to the synsets of the English WordNet. Moreover, we study the formalisation of this thesaurus according to the ISO LMF standard and its dialect WordNet-LMF. This conversion will allow Species2000 to communicate with the other resources available in the KYOTO architecture.}, KEYWORDS = {Lexical Resources, Thesaurus}, PAGES = {157-165}, URL = {https://publications.cnr.it/doc/84801}, ISBN = {978-962-442-323-5}, CONFERENCE_NAME = {ICGL 2010-The Second International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {18-20 Gennaio 2010}, BOOKTITLE = {ICGL2010-Proceedings of the Second International Conference on Global Interoperability for Language Resources-5th Joint ISO-ACL/SIGSEM Workshop on Interoperable Semantic Annotation}, EDITOR = {Fang, A. C. and Ide, N. and Webster, J.}, } @INPROCEEDINGS{VOSSEN_2010_INPROCEEDINGS_VRASMB_184375, AUTHOR = {Vossen, P. and Rigau, G. and Agirre, E. and Soroa, A. and Monachini, M. and Bartolini, R.}, TITLE = {KYOTO: an Open Platform for Mining Facts}, YEAR = {2010}, ABSTRACT = {This paper describes an open text-mining system that was developed for the Asian-European project KYOTO. The KYOTO system uses an open text representation format and a central ontology to enable extraction of knowledge and facts from large volumes of text in many different languages. We implemented a semantic tagging approach that performs off-line reasoning. Mining of facts and knowledge is achieved through a flexible pattern matching module that can work in much the same way for different languages, can handle efficiently large volumes of documents and is not restricted to a specific domain. We applied the system to an English database on estuaries}, URL = {https://publications.cnr.it/doc/184375}, ISBN = {978-7-900268-00-6}, CONFERENCE_NAME = {OntoLex 2010}, CONFERENCE_PLACE = {Beijing}, CONFERENCE_DATE = {2010}, BOOKTITLE = {ONTOLEX-COLING 2010}, } @INPROCEEDINGS{WITTENBURG_2010_INPROCEEDINGS_WBBBCHKLMPPPSTVVW_84805, AUTHOR = {Wittenburg, P. and Bel, N. and Borin, L. and Budin, G. and Calzolari, N. and Hajicova, E. and Koskenniemi, K. and Lemnitzer, L. and Maegaard, B. and Piasecki, M. and Pierrel, J. and Piperidis, S. and Skadina, I. and Tufis, D. and Van Veenendaal, R. and Váradi, T. and Wynne, M.}, TITLE = {Resource and Service Centres as the Backbone for a Sustainable Service Infrastructure}, YEAR = {2010}, KEYWORDS = {LR Infrastructures and Architectures, Standards for LRs, Tools, Systerms, Applications}, URL = {https://publications.cnr.it/doc/84805}, CONFERENCE_NAME = {Seventh International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Valletta, Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{BONIN_2010_INPROCEEDINGS_BDMV_112966, AUTHOR = {Bonin, F. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Lessico settoriale e lessico comune nell'estrazione di terminologia specialistica da corpora di dominio}, YEAR = {2010}, KEYWORDS = {Automatic Term Extraction}, URL = {https://publications.cnr.it/doc/112966}, CONFERENCE_NAME = {XLIV Congresso Internazionale di Studi della Società di Linguistica Italiana}, CONFERENCE_PLACE = {Viterbo, Università degli Stud}, } @INPROCEEDINGS{DELLORLETTA_2010_INPROCEEDINGS_DMVV_173723, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Vecchi, E. M. and Venturi, G.}, TITLE = {Tecnologie linguistico-computazionali per il monitoraggio delle competenze linguistiche di apprendenti l'italiano come L2}, YEAR = {2010}, KEYWORDS = {Natural Language Processing, Educational Linguistics, Language Learning}, URL = {https://publications.cnr.it/doc/173723}, CONFERENCE_NAME = {Congresso "IT. L2: italiano lingua seconda nell'università, nella scuola e sul territorio. Esperienze didattiche e ricerche" Università del Piemonte Orientale "Amedeo Avogadro", Facoltà di Lettere e Filosofia}, CONFERENCE_PLACE = {Vercelli}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{FRONTINI_2010_INPROCEEDINGS_F_112965, AUTHOR = {Frontini, F.}, TITLE = {Statistical profiling of Italian L2 texts: competence and native language}, YEAR = {2010}, KEYWORDS = {Text categorization}, URL = {https://publications.cnr.it/doc/112965}, CONFERENCE_NAME = {20th Annual Conference of the European Second Language Association}, CONFERENCE_PLACE = {Reggio Emilia}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{MARINELLI_2010_INPROCEEDINGS_M_281811, AUTHOR = {Marinelli, R.}, TITLE = {Costruzione di risorse terminologiche: criteri, risultati e prodotti}, YEAR = {2010}, URL = {https://publications.cnr.it/doc/281811}, CONFERENCE_NAME = {XX Convegno dell'Associazione Italiana per la Terminologia "Terminologie specialistiche e prodotti terminologici"}, CONFERENCE_PLACE = {Orvieto}, CONFERENCE_DATE = {28-29 maggio 2010}, } @INPROCEEDINGS{MARZI_2010_INPROCEEDINGS_MPS_186131, AUTHOR = {Marzi, C. and Pardelli, G. and Sassi, M.}, TITLE = {A Terminology Based Re-Definition of Grey Literature}, YEAR = {2010}, ABSTRACT = {The Luxembourg Convention on Grey Literature held in 1997 offered the following definition of Grey Literature (expanded in New York, 2004): "Information produced and distributed on all levels of government, academics, business and industry in electronic and print formats not controlled by commercial publishing, i.e. where publishing is not the primary activity of the producing body". Is this definition still valuable? Is it so far completely satisfactory? Or does it rather need important modifications? We suggest that an interesting re-definition of GL can be based upon careful examination of the longitudinal trend of 10 years of terminological creativity in the proceedings of the GL international Conference. Our empirical basis is the Corpus of GreyText Inhouse Archive, available on http://www.greynet.org/opensiglerepository.html consisting of titles, themes, keywords and full abstracts, for a total amount of more than sixty thousand word tokens. In the full version of our paper, we intend to focus on a set of automatically-acquired terms (both single-word and multi-word terms) obtained by subjecting our reference Corpus to a number of pre-processing steps of automated text analysis, such as concordances, frequency lists and lexical association scores (e.g. Mutual Information on word pairs). To anticipate some of our results, the following three terms, that appear to be shared by various disciplinary sub-fields, mark, in our view, important stages in the evolution of our current understanding of GL: digital, access and web. The attribute digital, an increasingly popular synonym of the now obsolete electronic, emphasises the growing importance of computer-based encoding as the standard medium of GL. The noun access (defining the process of accessing text documents) is seen in the company of adjectives like easy, full, grey and open to shape up important conceptual innovations in the way GL material is distributed: e.g. open access focuses on the free accessibility of digital contents. Coupled with information, document and repository (note, however, that repository is generally understood as a technical synonym of open archive), access points to a conception of world-wide available, structured cultural contents. Finally, reference to the web lays emphasis on the huge importance of the World Wide Web as the standard means of disseminating GL. All these aspects are not fully taken into account in the standard definition of GL reported above. Our inquiry is intended to pave the way to a bottom-up re-definition of GL, stemming from the terminological creativity and lexical innovation monitored over ten years of technical work in the field.}, KEYWORDS = {Terminology extraction, Grey Literature definition, GL Conference corpus}, PAGES = {24-28}, URL = {https://publications.cnr.it/doc/186131}, VOLUME = {12}, ISSN = {1385-2308}, ISBN = {978-90-77484-15-9}, CONFERENCE_NAME = {Twelfth International Conference on Grey Literature: Trasparency in Grey Literature, Grey Tech Approaches to High Tech Issues}, CONFERENCE_PLACE = {Prague}, CONFERENCE_DATE = {6-7/12/2010}, BOOKTITLE = {Trasparency in Grey Literature, Grey Tech Approaches to High Tech Issues}, EDITOR = {Farace, D. J. and Fratzen, J.}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_M_112955, AUTHOR = {Montemagni, S.}, TITLE = {Ontology Learning. An introduction}, YEAR = {2010}, KEYWORDS = {Legal Text Processing, Ontology Learning, NLP}, URL = {https://publications.cnr.it/doc/112955}, CONFERENCE_NAME = {Summer School LEX2010-Managing Legal Resources in the Semantic Web, Session "Ontology in the Legal Domain"}, CONFERENCE_PLACE = {Ravenna}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_M_112957, AUTHOR = {Montemagni, S.}, TITLE = {Tecnologie linguistico-computazionali per il monitoraggio della lingua italiana}, YEAR = {2010}, KEYWORDS = {Language Variation, Natural Language Processing}, URL = {https://publications.cnr.it/doc/112957}, CONFERENCE_NAME = {Giornata di Studio "Lo stato della lingua. Il CNR e l'italiano nel terzo millennio" organizzata dal Consiglio Nazionale delle Ricerche-Dipartimento Identità Culturale}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_M_112958, AUTHOR = {Montemagni, S.}, TITLE = {The BioLexicon: a Large-Scale Domain-Specific Lexical Resource for Biomedical Text Mining}, YEAR = {2010}, KEYWORDS = {Text Mining, Knowledge Extraction, Lexical Resources}, URL = {https://publications.cnr.it/doc/112958}, CONFERENCE_NAME = {LREC 2010 2nd Workshop on Building and evaluating resources for biomedical text mining}, CONFERENCE_PLACE = {Malta}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_M_112962, AUTHOR = {Montemagni, S.}, TITLE = {Design, Construction and Use of an Italian Dependency Treebank: Methodological Issues and Empirical Results}, YEAR = {2010}, KEYWORDS = {Syntactic Annotation, Treebanks}, URL = {https://publications.cnr.it/doc/112962}, CONFERENCE_NAME = {TheCopenhagen Dependency Treebank Workshop on "Designing Treebanks"}, CONFERENCE_PLACE = {Copehagen (DK)}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{MONTEMAGNI_2010_INPROCEEDINGS_MWDN_112967, AUTHOR = {Montemagni, S. and Wieling, M. and De Jonge, B. and Nerbonne, J.}, TITLE = {Modelli di variazione dialettale e analisi dei tratti linguistici sottostanti: un nuovo approccio dialettometrico}, YEAR = {2010}, KEYWORDS = {Computational dialectology}, URL = {https://publications.cnr.it/doc/112967}, CONFERENCE_NAME = {XI Congresso Silfi-Congresso della Società Internazionale di Linguistica e Filologia Italiana}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {2010}, } @INPROCEEDINGS{PICCHI_2010_INPROCEEDINGS_PSBG_120718, AUTHOR = {Picchi, E. and Sassi, M. and Biagioni, S. and Giannini, S.}, TITLE = {Extending the "Facets" concept by applying NLP tools to catalog records of scientific literature}, YEAR = {2010}, ABSTRACT = {The prototype of an "intelligent" navigation system, which has been implemented on the contents of PUMA (http://puma.isti.cnr.it), a digital library of scientific literature, is presented. The system has been implemented by integrating our core textual search engine (known as DBT) with the TextPower (TP) technology. TP is based on NLP techniques and linguistic resources and provides tools specialized for the evaluation, analysis, classification and browsing of scientific literature. TP extends the facet concept by extracting "field + content" pairs not only from structured fields but also from free text, eg. abstracts, using a linguistic-statistical approach to annotate relevant terminology, named entities, etc. The enriched text can be queried, analysed, and classified using a new version of the DBT System known as "DBT\&Facets". DBT\&Facets has been implemented on the full bibliographic records of the documents archived in the PUMA digital library of the Italian National Research Council (CNR). PUMA is a user-focused, service-oriented infrastructure which manages 30 CNR institutional repositories containing about 25,000 published or open access documents in a wide variety of disciplines. In an open domain like scientific documentation, our approach based on the criteria of "semantic similarity" is useful - and perhaps more objective than one based on hierarchical elements - as it makes it possible to link different types of information, also across domains if necessary. DBT\&Facets is an advanced search tool that permits the user to query and refine their results, and to identify particular relations between them. The aim of the project has been to structure a knowledge system of domain-specific information which assists the user by suggesting possible directions for their search.}, KEYWORDS = {NLP tools, Digital libraries}, PAGES = {82-87}, URL = {https://publications.cnr.it/doc/120718}, ISBN = {978-90-77484-15-9}, CONFERENCE_NAME = {Twelfth International Conference on Grey Literature}, CONFERENCE_PLACE = {Praga}, CONFERENCE_DATE = {6-7 December 2010}, EDITOR = {Farace, D. J. and Frantzen, J. and Greynet}, } @INPROCEEDINGS{PICCHI_2010_INPROCEEDINGS_PS_112960, AUTHOR = {Picchi, E. and Sassolini, E.}, TITLE = {La tecnologia TextPower per la navigazione intelligente}, YEAR = {2010}, ABSTRACT = {Compito dell'ILC è di creare una rete di conoscenza linguistica, terminologica e semantica, estratta dai documenti, fatta di concetti che sintetizzano il valore semantico del documento. Questa rete di conoscenza individuata automaticamente costituisce la base conoscitiva necessaria alla classificazione e alla navigazione "intelligente" e rappresenta la ricchezza dello strumento e del servizio che l'Osservatorio può offrire.}, KEYWORDS = {Text power, navigazione intelligente}, PAGES = {419-425}, URL = {http://oraal.ittig.cnr.it/oraal/}, VOLUME = {1}, ISBN = {88-14-17365-6}, CONFERENCE_NAME = {Convegno di inaugurazione dell'Osservatorio sulle Regole dell'Agricoltura e dell'Alimentazione}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {22-23 GENNAIO 2010}, BOOKTITLE = {PER UNO STUDIO INTERDISCIPLINARE SU AGRICOLTURA E ALIMENTAZIONE Atti del Convegno di inaugurazione dell'Osservatorio}, EDITOR = {Sirsi, M. G. E.}, } @INPROCEEDINGS{STRIKLIEVERS_2010_INPROCEEDINGS_S_112964, AUTHOR = {Strik Lievers, F.}, TITLE = {On the event structure of Italian verbs with predicative complement}, YEAR = {2010}, KEYWORDS = {event structure, verbs, Italian, generative lexicon}, URL = {https://publications.cnr.it/doc/112964}, CONFERENCE_NAME = {Representation of Events}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {2010}, } @TECHREPORT{BARONI_2010_TECHREPORT_B_157475, AUTHOR = {Baroni, P.}, TITLE = {FLaReNet Web Statistics: 7 December 2008-31 August 2010}, YEAR = {2010}, ABSTRACT = {Statistics relating to the access to the FLaReNet Web site from 7 December 2008 to 31 August 2010.}, KEYWORDS = {Language Resources, Web Statistics}, PAGES = {8}, URL = {https://publications.cnr.it/doc/157475}, } @TECHREPORT{BARONI_2010_TECHREPORT_B_157476, AUTHOR = {Baroni, P.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Pre-financing Request No. 1}, YEAR = {2010}, KEYWORDS = {Financial Statement}, URL = {https://publications.cnr.it/doc/157476}, } @TECHREPORT{BARONI_2010_TECHREPORT_B_157482, AUTHOR = {Baroni, P.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Pre-financing Request No. 2}, YEAR = {2010}, KEYWORDS = {Financial Statement}, URL = {https://publications.cnr.it/doc/157482}, } @TECHREPORT{CALZOLARI_2010_TECHREPORT_CSB_157481, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Annual Report No. 2}, YEAR = {2010}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157481}, } @TECHREPORT{CALZOLARI_2010_TECHREPORT_CSBBBCMOP_157478, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 3}, YEAR = {2010}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157478}, } @TECHREPORT{CALZOLARI_2010_TECHREPORT_CSBQBBCMOP_157488, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Quochi, V. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 4}, YEAR = {2010}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157488}, } @TECHREPORT{CASELLI_2010_TECHREPORT_C_157487, AUTHOR = {Caselli, T.}, TITLE = {It-TimeML: TimeML Annotation Scheme for Italian-Version 1. 3. 1}, YEAR = {2010}, KEYWORDS = {annotazione, timeml, temporal annotation}, URL = {https://publications.cnr.it/doc/157487}, } @TECHREPORT{DELGROSSO_2010_TECHREPORT_DG_354196, AUTHOR = {Del Grosso, A. M. and Giovannetti, E.}, TITLE = {Sistema di gestione della banca dati lessicale}, YEAR = {2010}, ABSTRACT = {Il documento descrive l'analisi e lo studio preliminare svolto sul sistema di gestione della banca dati lessicale sviluppato nell'ambito dei progetti PAROLE, SIMPLE e CLIPS}, KEYWORDS = {Lessico elettronico, piattaforma web}, PAGES = {21}, URL = {https://publications.cnr.it/doc/354196}, } @TECHREPORT{MARINELLI_2010_TECHREPORT_MRC_157479, AUTHOR = {Marinelli, R. and Roventini, A. and Cucurullo, S.}, TITLE = {Descrizione attività Progetto Servizi innovativi di Business Intelligence e p. m. i.: la costituzione di un Sistema di Aziende Estese (BISAE): il modulo ILC}, YEAR = {2010}, KEYWORDS = {Risorse linguistiche, terminologia, database semantico-lessicali, corpora}, URL = {https://publications.cnr.it/doc/157479}, } @TECHREPORT{MARZI_2010_TECHREPORT_MM_157480, AUTHOR = {Marzi, C. and Marchi, S.}, TITLE = {Procedura Web per la generazione automatica dei bandi di concorso per Assegno di Ricerca in formato pdf}, YEAR = {2010}, ABSTRACT = {Lo sviluppo di una procedura web per la generazione automatica e gestione dei testi dei bandi di concorso per il conferimento di Assegni di Ricerca risponde alla necessità di uniformare i testi dei bandi di concorso al Disciplinare dell'Ente in continuo aggiornamento. Ogni modifica apportata al regolamento viene immediatamente recepita e convertita in modifica al modello automatico di bando. La procedura "Bandi" consente, inoltre, ad ogni Gruppo di ricerca, Laboratorio, o Commessa, o anche singolo Ricercatore, di avviare la richiesta per un Assegno di Ricerca generando autonomamente una bozza di testo in formato pdf, da sottoporre agli utenti Validatori per approvazione, correzione e/o integrazione, e la conseguente generazione e stampa del testo definitivo in formato pdf.}, KEYWORDS = {Tool, Procedura web creazione bandi}, URL = {http://bandi.ilc.cnr.it/form/login.php}, } @TECHREPORT{MONTEMAGNI_2010_TECHREPORT_M_157485, AUTHOR = {Montemagni, S.}, TITLE = {Computational Models of Dialectal Variation and Underlying Linguistic Features}, YEAR = {2010}, KEYWORDS = {Computational Dialectology, Language Variation}, URL = {https://publications.cnr.it/doc/157485}, } @TECHREPORT{PIRRELLI_2010_TECHREPORT_PLMDGM_367784, AUTHOR = {Pirrelli, V. and Lenci, A. and Montemagni, S. and Dell'Orletta, F. and Giovannetti, E. and Marchi, S.}, TITLE = {ConnectToLife (modulo semantico)-Rapporto tecnico finale}, YEAR = {2010}, ABSTRACT = {Il presente documento costituisce il rapporto tecnico finale del progetto Connect-To-Life (modulo semantico) relativo alle attività svolte dall'unità ILC-CNR.}, KEYWORDS = {annotazione linguistica, estrazione di termini, clustering semantico, trattamento automatico della lingua, costruzione di ontologie}, PAGES = {16}, URL = {https://publications.cnr.it/doc/367784}, } @TECHREPORT{VOSSEN_2010_TECHREPORT_VHARFMIBHJD_157490, AUTHOR = {Vossen, P. and Hielkema, F. and Aliprandi, C. and Rigau, G. and Fellbaum, C. and Monachini, M. and Isahara, H. and Bond, F. and Hsieh, S. and Jones Walters, L. and De Boom, K.}, TITLE = {Exploitation and Dissemination Plan}, YEAR = {2010}, KEYWORDS = {Ontologie}, URL = {https://publications.cnr.it/doc/157490}, } @TECHREPORT{VOSSEN_2010_TECHREPORT_VSHHRAECLKM_157489, AUTHOR = {Vossen, P. and Segers, R. and Hicks, A. and Herold, A. and Rigau, G. and Agirre, E. and Estarrona, A. and Cuadros, M. and Laparra, E. and Kanzaki, K. and Monachini, M.}, TITLE = {Wordnets mapped to central ontology-revised}, YEAR = {2010}, KEYWORDS = {Ontologie}, URL = {https://publications.cnr.it/doc/157489}, } @MISC{CIGNONI_2010_MISC_CF_157486, AUTHOR = {Cignoni, L. and Fornaciari, G.}, TITLE = {Fondamenti della Lingua Inglese}, YEAR = {2010}, KEYWORDS = {Grammatica inglese}, PAGES = {1-85}, URL = {http://www.paleopatologia.it/Documenti/GrammaticaInglese.pdf}, } @MISC{FERRO_2010_MISC_FMP_157477, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Word self-organization in time and space? Algorithms and evaluation}, YEAR = {2010}, ABSTRACT = {ABSTRACT: Words are time-bound signals and are amenable to temporal processing. The human brain has an innate ability to encode serial events into spatial patterns of neural activity (David Beiser \& James Houk, 1998). Temporal Hebbian SOMs (THSOMs) allow us to take the two assumptions seriously. They provide a novel computational framework accounting for many paradigm-based generalizations in a natural and insightful way. This claim is validated on inflectional data from German, English and Italian.}, KEYWORDS = {Morphology, Word Processing and Learning, Mental Lexicon, L1, SOMs}, URL = {https://publications.cnr.it/doc/157477}, } @MISC{MARZI_2010_MISC_MM_176395, AUTHOR = {Marzi, C. and Marchi, S.}, TITLE = {Procedura Web per la generazione automatica dei bandi di concorso per Assegno di Ricerca}, YEAR = {2010}, ABSTRACT = {Lo sviluppo di una procedura web per la generazione automatica e gestione dei testi dei bandi di concorso per il conferimento di Assegni di Ricerca risponde alla necessità di uniformare i testi dei bandi di concorso al Disciplinare dell'Ente in continuo aggiornamento. Ogni modifica apportata al regolamento viene immediatamente recepita e convertita in modifica al modello automatico di bando. La procedura "Bandi" consente, inoltre, ad ogni Gruppo di ricerca, Laboratorio, o Commessa, o anche singolo Ricercatore, di avviare la richiesta per un Assegno di Ricerca generando autonomamente una bozza di testo in formato pdf, da sottoporre agli utenti Validatori per approvazione, correzione e/o integrazione, e la conseguente generazione e stampa del testo definitivo in formato pdf.}, KEYWORDS = {Tool, Procedura web creazione bandi}, URL = {http://bandi.ilc.cnr.it/form/login.php}, } @MISC{RUIMY_2010_MISC_RP_157474, AUTHOR = {Ruimy, N. and Pardelli, G.}, TITLE = {Un modello lessicale da customizzare per lo sviluppo di un thesaurus lessico elettronico della terminologia saussuriana}, YEAR = {2010}, ABSTRACT = {Progetto di ricerca PRIN 2008: Per un'edizione digitale dei manoscritti di Ferdinand de Saussure. Unità di Ricerca Istituto di Linguistica Computazionale Antonio Zampolli CNR -Pisa Linea di attività II.}, KEYWORDS = {Ontology. Computational Semantics}, PAGES = {1}, URL = {https://publications.cnr.it/doc/157474}, } @ARTICLE{CALZOLARI_2009_ARTICLE_C_64544, AUTHOR = {Calzolari, N.}, TITLE = {FLaReNet: Shaping the Future of a Multilingual Digital Europe}, YEAR = {2009}, ABSTRACT = {The new EC eContentPlus Thematic Network (ECP-2007-LANG-617001) FLaReNet - Fostering Language Resources Network - aims at developing a common vision in the field of Language Resources (LRs) and Language Technologies (LTs) and fostering a European strategy for consolidating this area. the goal is to enhance competitiveness both in the EU and worldwide.}, KEYWORDS = {Language Resources and Language Technologies}, PAGES = {66-66}, URL = {https://publications.cnr.it/doc/64544}, VOLUME = {77}, } @ARTICLE{CALZOLARI_2009_ARTICLE_CB_172402, AUTHOR = {Calzolari, N. and Bel, N.}, TITLE = {FLaReNet: una red para fomentar los recursos lingüísticos (Fostering Language Resources Network: FLaReNet)}, YEAR = {2009}, ABSTRACT = {FLaReNet is a thematic network whose objective is the preparation of strategies and recommendations for the promotion and development of language technologies and the associated language resources because of their importance for minimizing the impact of the linguistic diversity in a digital and multilingual Europe. The results of this joint process of reflection by researchers and professionals of all around the world will be the basisi of European agreed policies for funding and promoting this sector.}, KEYWORDS = {Language Resources, Language Technology, Language Resources and Technology}, PAGES = {383-384}, URL = {https://publications.cnr.it/doc/172402}, VOLUME = {43}, } @ARTICLE{FRANCOPOULO_2009_ARTICLE_FBGCMPS_30882, AUTHOR = {Francopoulo, G. and Bel, N. and George, M. and Calzolari, N. and Monachini, M. and Pet, M. and Soria, C.}, TITLE = {Multilingual resources for NLP in the Lexical Markup Framework (LMF)}, YEAR = {2009}, ABSTRACT = {Optimizing the production, maintenance and extension of lexical resources is one the crucial aspects impacting Natural Language Processing (NLP). A second aspect involves optimizing the process leading to their integration in applications. With this respect, we believe that a consensual specification on monolingual, bilingual and multilingual lexicons can be a useful aid for the various NLP actors. Within ISO, one purpose of Lexical Markup Framework (LMF, ISO-24613) is to define a standard for lexicons that covers multilingual lexical data.}, KEYWORDS = {LMF, Standardization, ISO-TC37}, PAGES = {57-70}, URL = {https://publications.cnr.it/doc/30882}, VOLUME = {43}, DOI = {10.1007/s10579-008-9077-5}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{PEZZULO_2009_ARTICLE_P_30878, AUTHOR = {Pezzulo, G.}, TITLE = {DiPRA: A Layered Agent Architecture which Integrates Practical Reasoning and Sensorimotor Schemas}, YEAR = {2009}, ABSTRACT = {We introduce the layered agent architecture DiPRA (Distributed Practical Reasoning Architecture), composed of an 'intentional' layer, which includes beliefs, plans and goals, and a 'sensorimotor' layer, which includes schemas for situated action. DiPRA's functioning is illustrated and evaluated in a simulated guards-and-thieves scenario. We also discuss the efficacy of the main features of DiPRA, such as the division of labour between off-line planning and on-line specification of action, the grounding of beliefs in sensorimotor interaction and anticipation, the use of bounded resources and knowledge, and the realization of deliberation and means-ends reasoning as intertwined processes.}, KEYWORDS = {layered architecture practical reasoning grounding anticipation schema}, PAGES = {297-326}, URL = {https://publications.cnr.it/doc/30878}, VOLUME = {21}, DOI = {10.1080/09540090902954170}, PUBLISHER = {Carfax Publishing (Abingdon (P. O. Box 25, Abingdon, Oxfordshire OX14 3UE), Regno Unito)}, ISSN = {0954-0091}, JOURNAL = {Connection science (Print)}, } @ARTICLE{PEZZULO_2009_ARTICLE_PBSB_30880, AUTHOR = {Pezzulo, G. and Butz, M. and Sigaud, O. and Baldassarre, G.}, TITLE = {From Sensorimotor to Higer Level Cognitive Processes: an Introduction to Anticipatory Behavior Systems}, YEAR = {2009}, ABSTRACT = {This book continues the enhanced post-workshop proceedings series on "Anticipatory Behavior in Adaptive Learning System" (ABiALS), published as Springer LNAI 2684 and LNAI 4520 [3,5]. The proceedings offer a multidisciplinary perspective on anticipatory mechanisms in cognitive, social, learning, and behavioral processes, with contributions from key researchers in psychology and computer science. This introduction offers a conceptual terminology on anticipatory mechanisms and involved predictive capabilities. Moreover, it provides an overview of the book contributions, highlighting some of their peculiarities and complementarities}, KEYWORDS = {Anticipation-anticipatory behavior-prediction-simulation-goal-directed behaviour}, PAGES = {1-9}, URL = {https://publications.cnr.it/doc/30880}, VOLUME = {Vol}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{PEZZULO_2009_ARTICLE_PBSB_170393, AUTHOR = {Pezzulo, G. and Butz, M. and Sigaud, O. and Baldassarre, G.}, TITLE = {Anticipatory Behavior in Adaptive Learning Systems: From Psychological Theories to Artificial Cognitive Systems}, YEAR = {2009}, ABSTRACT = {This book constitutes the thoroughly refereed post-workshop proceedings of the 4th International Workshop on Anticipatory Behavior in Adaptive Learning Systems, ABiALS 2008, held in Munich, Germany, in June 2008, in collaboration with the six-monthly Meeting of euCognition 'The Role of Anticipation in Cognition'. The 18 revised full papers presented were carefully selected during two rounds of reviewing and improvement for inclusion in the book. The introductory chapter of this state-of-the-art survey not only provides an overview of the contributions included in this volume but also revisits the current available terminology on anticipatory behavior and relates it to the available system approaches. The papers are organized in topical sections on anticipation in psychology with focus on the ideomotor view, conceptualizations, anticipation and dynamical systems, computational modeling of psychological processes in the individual and social domains, behavioral and cognitive capabilities based on anticipation, and computational frameworks and algorithms...}, KEYWORDS = {Adaptive Learning Systems}, URL = {https://publications.cnr.it/doc/170393}, VOLUME = {Vol}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{PEZZULO_2009_ARTICLE_PC_30881, AUTHOR = {Pezzulo, G. and Castelfranchi, C.}, TITLE = {Intentional Action: from Anticipation to Goal-Directed Behavior}, YEAR = {2009}, ABSTRACT = {No abstract available}, KEYWORDS = {cognitive modeling}, PAGES = {437-440}, URL = {https://publications.cnr.it/doc/30881}, VOLUME = {73}, PUBLISHER = {Springer (Heidelberg, Germania)}, ISSN = {0340-0727}, JOURNAL = {Psychological research (Print)}, } @ARTICLE{PEZZULO_2009_ARTICLE_PC_30883, AUTHOR = {Pezzulo, G. and Castelfranchi, C.}, TITLE = {Thinking as the Control of Imagination: a Conceptual Framework for Goal-Directed Systems}, YEAR = {2009}, ABSTRACT = {This paper offers a conceptual framework which (re)integrates goal-directed control, motivational processes, and executive functions, and suggests a developmentalpathway from situated action to higher level cognition. We first illustrate a basic computational (control-theoretic) model of goal-directed action that makes use of internalmodeling. We then show that by adding the problem of selection among multiple actionalternatives motivation enters the scene, and that the basic mechanisms of executivefunctions such as inhibition, the monitoring of progresses, and working memory, arerequired for this system to work. Further, we elaborate on the idea that the off-line re-enactment of anticipatory mechanisms used for action control gives rise to (embodied)mental simulations, and propose that thinking consists essentially in controlling mental simulations rather than directly controlling behavior and perceptions. We concludeby sketching an evolutionary perspective of this process, proposing that anticipationleveraged cognition, and by highlighting specific predictions of our model.}, KEYWORDS = {cognitive modeling}, PAGES = {559-577}, URL = {https://publications.cnr.it/doc/30883}, VOLUME = {73}, PUBLISHER = {Springer (Heidelberg, Germania)}, ISSN = {0340-0727}, JOURNAL = {Psychological research (Print)}, } @ARTICLE{QUOCHI_2009_ARTICLE_Q_288752, AUTHOR = {Quochi, V.}, TITLE = {Usage scenarios and basic workflows}, YEAR = {2009}, PAGES = {5-5}, URL = {http://www.clarin.eu/sites/default/files/CLARIN_Newsletter_no_6.pdf}, VOLUME = {6}, JOURNAL = {CLARIN Newsletter}, } @ARTICLE{QUOCHI_2009_ARTICLE_QDSBMC_30876, AUTHOR = {Quochi, V. and Del Gratta, R. and Sassolini, E. and Bartolini, R. and Monachini, M. and Calzolari, N.}, TITLE = {A Standard Lexical-Terminological Resource for the Bio Domain}, YEAR = {2009}, ABSTRACT = {The present paper describes a large-scale lexical resource for the biology domain designed both for human and for machine use. This lexicon aims at semantic interoperability and extendability, through the adoption of ISO-LMF standard for lexical representation and through a granular and distributed encoding of relevant information. The first part of this contribution focuses on three aspects of the model that are of particular interest to the biology community: the treatment of term variants, the representation on bio events and the alignment with a domain ontology. The second part of the paper describes the physical implementation of the model: a relational database equipped with a set of automatic uploading procedures. Peculiarity of the BioLexicon is that it combines features of both terminologies and lexicons. A set verbs relevant for the domain is also represented with full details on their syntactic and semantic argument structure.}, KEYWORDS = {Lexical representation model, Lexical Database, Computational Lexicography, Special Domains, Standards}, PAGES = {325-335}, URL = {https://publications.cnr.it/doc/30876}, VOLUME = {5603}, DOI = {10.1007/978-3-642-04235-5_28}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{SORIA_2009_ARTICLE_SMBCHHMT_170611, AUTHOR = {Soria, C. and Monachini, M. and Bertagna, F. and Calzolari, N. and Huang, C. and Hsieh, S. and Marchetti, A. and Tesconi, M.}, TITLE = {Exploring Interoperability of Language Resources: the Case of Cross-lingual Semi-automatic Enrichment of Wordnets}, YEAR = {2009}, ABSTRACT = {In this paper we present an application fostering the integration and interoperability of computational lexicons, focusing on the particular case of mutual linking and cross-lingual enrichment of two wordnets, the ItalWordNet and Sinica BOW lexicons. This is intended as a case study investigating the needs and requirements of semi-automatic integration and interoperability of lexical resources, in the view of developing a prototype web application to support the GlobalWordNet Grid Initiative.}, KEYWORDS = {H. 3 INFORMATION STORAGE AND RETRIEVAL. Linguistic processing, Distributed language resources, Interoperable lexical resources, Language services}, PAGES = {87-96}, URL = {https://publications.cnr.it/doc/170611}, VOLUME = {43}, DOI = {10.1007/s10579-009-9082-3}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @INCOLLECTION{AGNOLONI_2009_INCOLLECTION_ABFPMV_173012, AUTHOR = {Agnoloni, T. and Bacci, L. and Francesconi, E. and Peters, W. and Montemagni, S. and Venturi, G.}, TITLE = {A two-level Knowledge approach to support multilingual legislative drafting}, YEAR = {2009}, KEYWORDS = {DALOS project, Ontological-linguistic}, URL = {https://publications.cnr.it/doc/173012}, } @INCOLLECTION{CIGNONI_2009_INCOLLECTION_CF_136466, AUTHOR = {Cignoni, L. and Fornaciari, G.}, TITLE = {Teaching Funerary Archaeology through a Foreign Language: a Proposal for a Balanced Content and Language Integrated Learning (CLIL)-Based Course}, YEAR = {2009}, ABSTRACT = {Laura Cignoni and Gino Fornaciari's paper outlines a proposal for a CLIL-based University course in Funerary Archaeology, at the University of Pisa. In this case as well, the subject teacher - a university instructor with solid knowledge of the language - will be assisted by a native speaker who will enhance the qualitative dimension of the CLIL methodology being adopted.}, KEYWORDS = {CLIL, Teaching, Funerary Archaeology, English}, PAGES = {113-124}, URL = {https://publications.cnr.it/doc/136466}, PUBLISHER = {Guerra Edizioni (Perugia, ITA)}, ISBN = {978-88-557-0271-3}, BOOKTITLE = {CLIL Methodology in University Instruction: online andin the Classroom. An emerging framework}, EDITOR = {Sisti, F.}, } @INCOLLECTION{DELLORLETTA_2009_INCOLLECTION_DLMMP_184585, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: una piattaforma linguistico-computazionale per l'estrazione di conoscenza da testi}, YEAR = {2009}, ABSTRACT = {The paper describes the automatic extraction of domain knowledge from Italian document collections and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Term extraction, Ontology Learning}, PAGES = {285-300}, URL = {https://publications.cnr.it/doc/184585}, PUBLISHER = {Bulzoni (Roma, ITA)}, ISBN = {978-88-7870-469-5}, EDITOR = {Ferrari, G. and Benatti, R. and Mosca, M.}, } @INCOLLECTION{FERRO_2009_INCOLLECTION_FP_283387, AUTHOR = {Ferro, M. and Pioggia, G.}, TITLE = {A biologically-based framework for distributed sensory fusion and data processing}, YEAR = {2009}, PAGES = {337-364}, URL = {https://publications.cnr.it/doc/283387}, DOI = {10.5772/6586}, ISBN = {978-3-902613-52-3}, BOOKTITLE = {Sensor and Data Fusion}, EDITOR = {Milisavljevic, N.}, } @INCOLLECTION{LENCI_2009_INCOLLECTION_LMP_186141, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Annotazione sintattica di corpora: aspetti metodologici}, YEAR = {2009}, ABSTRACT = {Un assunto sempre più condiviso nell'ambito degli studi sull'acquisizione sia di L1 che di L2 è che l'evidenza empirica privilegiata debba essere rappresentata da corpora di produzioni scritte o orali degli apprendenti, estensivamente annotate a molteplici livelli di rappresentazione linguistica. Più in generale, corpora lemmatizzati e annotati a livello morfosintattico fanno ormai parte dello strumentario comune del linguista. Accanto ad essi, si fa però strada l'esigenza di disporre di risorse testuali più sofisticate dal punto di vista delle modalità di esplorazione linguistica, come ad esempio corpora annotati a livello sintattico (le cosiddette treebank). Questi consentono infatti di osservare i processi di convergenza degli apprendenti verso la lingua "obiettivo" anche a livello di specifici tratti grammaticali astratti o di macro-strutture linguistiche. L'articolo propone uno schema di annotazione sintattica caratterizzato da un doppio livello di codifica. Si tratta di un approccio originale che differisce dalla maggior degli schemi di annotazione sintattica esistenti per due aspetti: 1. la separazione della dimensione relazionale da quella a costituenti, che sono trattati a livelli di annotazione indipendenti, ma al tempo stesso correlati, in modo tale che lo stesso testo è simultaneamente interrogabile ai due livelli; 2. la rappresentazione a costituenti fornisce una rappresentazione del testo come sequenza di proto-costituenti sintagmatici non ricorsivi. Questa strategia di annotazione permette una fattorizzazione di diversi aspetti e dimensioni della struttura sintattica che risulta promettente da un lato per l'annotazione di corpora di lingua "non-standard" come quelli contenenti produzioni di apprendenti di L1 o L2, sia come punto di partenza per successivi processi di estrazione di informazione linguistica dal testo. Dopo aver illustrato le motivazioni sottostanti allo schema proposto, ciascun livello di rappresentazione (chunking e dipendenze funzionali) viene illustrato in dettaglio, mostrandone anche la possibilità di combinazione sullo stesso testo. L'articolo si chiude con la discussione di prospettive di uso di corpora annotati secondo lo schema di annotazione proposto.}, KEYWORDS = {Corpora annotati, annotazione sintattica}, PAGES = {25-46}, URL = {https://publications.cnr.it/doc/186141}, PUBLISHER = {Guerra Edizioni (Perugia, ITA)}, ISBN = {978-88-557-0168-6}, BOOKTITLE = {CORPORA DI ITALIANO L2: TECNOLOGIE, METODI, SPUNTI TEORICI}, EDITOR = {Andorno, C. and Rastelli, S.}, } @INCOLLECTION{LENCI_2009_INCOLLECTION_LMPV_136465, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Ontology learning from Italian legal texts}, YEAR = {2009}, ABSTRACT = {The paper reports on the methodology and preliminary results of a case study in automatically extracting ontological knowledge from Italian legislative texts. We use a fully-implemented ontology learning system (T2K) that includes a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine language learning. Tools are dynamically integrated to provide an incremental representation of the content of vast repositories of unstructured documents. Evaluated results, however preliminary, show the great potential of NLP-powered incremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.}, KEYWORDS = {Ontology Learning, document management, legal knowledge extraction}, PAGES = {75-94}, URL = {https://publications.cnr.it/doc/136465}, VOLUME = {188}, DOI = {10.3233/978-1-58603-942-4-75}, ISBN = {978-1-58603-942-4}, BOOKTITLE = {Law, Ontologies and the Semantic Web-Channelling the Legal Information Flood}, EDITOR = {Breuker, J. and Casanovas, P. and Klein, M. C. A. and Francesconi, E.}, } @INCOLLECTION{MARINELLI_2009_INCOLLECTION_M_136463, AUTHOR = {Marinelli, R.}, TITLE = {Ontological Structure and Digital Corpora for Metaphorical Sense Recognition}, YEAR = {2009}, ABSTRACT = {Based on the EuroWordNet/ItalWordNet model, a terminological database which contains terms belonging to the specialized lexicon of the maritime navigation and transport domain was created. Our lexicographic research aims to give prominence to the frequency of idiomatic expressions and metaphors used in everyday language that come from this cultural environment. This paper deals with a) idiomatic expressions and metaphors of the maritime domain which is considered as "source" domain, b) the use and the frequency of this type of idiomatic expressions and metaphors in a large corpus of Italian contemporary language, c) the semantic codification of metaphors and idiomatic expressions in the terminological database, d) the way the ontological structure of the database can help in metaphorical sense recognition.}, KEYWORDS = {Ontology, Corpora, Terminology, Figurative language, Metaphorical sense recognition}, PAGES = {1409-1419}, URL = {https://publications.cnr.it/doc/136463}, PUBLISHER = {AESLA, Universidad de Almeria (Almeria, ESP)}, ISBN = {978-84-692-1479-4}, BOOKTITLE = {Applied Linguistics Now: Understanding Language and Mind / La Lingüística Aplicada actual: Comprendiendo el lenguaje y la mente}, EDITOR = {Callejas, C. M. B. and Sánchez, J. F. F. and Ibáñez, J. R. I. and Sánchez, M. E. G. and De Los Ríos, M. E. C. and Ramiro, S. S. and Martínez, M. S. C. and Honeyman, N. P. and Márquez, B. C.}, } @INCOLLECTION{WITTENBURG_2009_INCOLLECTION_WAFC_157470, AUTHOR = {Wittenburg, P. and Arppe, A. and Forsstrom, P. and Calzolari, N.}, TITLE = {NEERI 09 Report}, YEAR = {2009}, KEYWORDS = {Interoperability}, URL = {https://publications.cnr.it/doc/157470}, } @EDITORIAL{CALZOLARI_2009_EDITORIAL_CBBBCGMMOPQST_183877, AUTHOR = {Calzolari, N. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Goggi, S. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Proceedings of the 1st European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, ABSTRACT = {Proceedings of the first FLaReNet Forum on the European Language Resources and Technologies, held in Vienna, at the Austrian Academy of Science, on 12-13 February 2009.}, KEYWORDS = {Language Resources, Language Technologies, Multilingual, Digital}, PAGES = {105}, URL = {http://www.flarenet.eu/sites/default/files/Vienna09_Proceedings.pdf}, } @EDITORIAL{CASELLAS_2009_EDITORIAL_CFHM_143540, AUTHOR = {Casellas, N. and Francesconi, E. and Hokstra, R. and Montemagni, S.}, TITLE = {Proceedings of the 3rd Workshop on Legal Ontologies and Artificial Intelligence Techniques (LOAIT '09) joint with the 2nd Workshop on Semantic Processing of Legal Texts}, YEAR = {2009}, URL = {https://publications.cnr.it/doc/143540}, VOLUME = {2}, } @INPROCEEDINGS{ATTARDI_2009_INPROCEEDINGS_AD_84753, AUTHOR = {Attardi, G. and Dell'Orletta, F.}, TITLE = {Reverse Revision and Linear Tree Combination for Dependency Parsing}, YEAR = {2009}, KEYWORDS = {Dependency parsing, revision parsing, dependency parsing combination}, URL = {https://publications.cnr.it/doc/84753}, CONFERENCE_NAME = {North American Chapter of the Association for Computational Linguistics-Human Language Technologies}, CONFERENCE_PLACE = {Boulder, Colorado}, } @INPROCEEDINGS{ATTARDI_2009_INPROCEEDINGS_ADSDV_84761, AUTHOR = {Attardi, G. and Dell'Orletta, F. and Simi, M. and Dei Rossi, S. and Vecchi, E. M.}, TITLE = {The Tanl Named Entity Recognizer at Evalita 2009}, YEAR = {2009}, KEYWORDS = {Named Entity Recognizer}, URL = {https://publications.cnr.it/doc/84761}, CONFERENCE_NAME = {Evaluation of NLP and Speech Tools for Italian 2009}, CONFERENCE_PLACE = {Reggio Emilia, Italy}, } @INPROCEEDINGS{ATTARDI_2009_INPROCEEDINGS_ADST_84734, AUTHOR = {Attardi, G. and Dell'Orletta, F. and Simi, M. and Turian, J.}, TITLE = {Accurate Dependency Parsing with a Stacked Multilayer Perceptron}, YEAR = {2009}, KEYWORDS = {Dependency Parsing, Parsing, Multilayer Perceptron}, URL = {https://publications.cnr.it/doc/84734}, CONFERENCE_NAME = {Evaluation of NLP and Speech Tools for Italian 2009}, CONFERENCE_PLACE = {Reggio Emilia, Italy}, } @INPROCEEDINGS{BOSMA_2009_INPROCEEDINGS_BVSRTMMA_173501, AUTHOR = {Bosma, W. and Vossen, P. and Soroa, A. and Rigau, G. and Tesconi, M. and Marchetti, A. and Monachini, M. and Aliprandi, C.}, TITLE = {KAF: a generic semantic annotation format}, YEAR = {2009}, ABSTRACT = {We present KAF, the KYOTO Annotation Format. KAF is a layered and extendible linguistic annotation format that is specifically developed to arrive at semantic interoperability. KAF is used in seven languages in several applications throughout the KYOTO (Knowledge Yielding Ontologies for Transition-based Organization) project. The goal of these applications is to derive semantic data from linguistically processed text. Separate annotation layers are defined for each annotation process but these can be combined to arrive at a higher level of semantic representation. This paper gives an outline of KAF and a description of how it is applied in the KYOTO project.}, KEYWORDS = {I. 2. 7 Natural Language Processing, Natural language processing, Semantic annotation, Text mining, Semantic Annotation, Standard Formats}, PAGES = {145-152}, URL = {https://publications.cnr.it/doc/173501}, CONFERENCE_NAME = {5th International Conference on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {17-19 September 2009}, } @INPROCEEDINGS{CALZOLARI_2009_INPROCEEDINGS_C_84744, AUTHOR = {Calzolari, N.}, TITLE = {Introduction by the FLaReNet Coordinator}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/84744}, CONFERENCE_NAME = {Fostering Language Resources Network Forum 2009}, CONFERENCE_PLACE = {Vienna}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{CALZOLARI_2009_INPROCEEDINGS_CPR_84748, AUTHOR = {Calzolari, N. and Pustejovsky, J. and Rumshisky, A.}, TITLE = {Proceedings of the 5th International Conference on Generative Approaches to the Lexicon}, YEAR = {2009}, KEYWORDS = {Lexicon}, URL = {https://publications.cnr.it/doc/84748}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{CALZOLARI_2009_INPROCEEDINGS_CS_84758, AUTHOR = {Calzolari, N. and Soria, C.}, TITLE = {The FLaReNet Thematic Network: a Global Forum for Cooperation}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/84758}, CONFERENCE_NAME = {ACL-IJCNLP 2009-7th Workshop on Asian Language Resources}, CONFERENCE_PLACE = {Suntec, Singapore}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{CASELLI_2009_INPROCEEDINGS_CDP_84755, AUTHOR = {Caselli, T. and Dell'Orletta, F. and Prodanof, I.}, TITLE = {TETI: a TimeML Compliant TimEx Tagger for Italian}, YEAR = {2009}, KEYWORDS = {temporal expression, information extraction}, URL = {https://publications.cnr.it/doc/84755}, CONFERENCE_NAME = {International Multiconference on Computer Science and Information Technology}, CONFERENCE_PLACE = {Mragowo, Polonia}, } @INPROCEEDINGS{CASELLI_2009_INPROCEEDINGS_CDP_84756, AUTHOR = {Caselli, T. and Dell'Orletta, F. and Prodanof, I.}, TITLE = {Temporal Relations with Signals: the Case of Italian Temporal Prepositions}, YEAR = {2009}, KEYWORDS = {temporal relations, taggers, information extraction}, URL = {https://publications.cnr.it/doc/84756}, CONFERENCE_NAME = {16th International Symposium on Temporal Representation and Reasoning}, CONFERENCE_PLACE = {Brixen/Bressanone, Italia}, } @INPROCEEDINGS{CIGNONI_2009_INPROCEEDINGS_CF_84737, AUTHOR = {Cignoni, L. and Fornaciari, G.}, TITLE = {Combining different Technologies in a Funerary Archaeology content and language integrated Learning (CLIL) Course}, YEAR = {2009}, KEYWORDS = {Funerary archaeology, CLIL, Language teaching, Computer technology, Field archaeology}, PAGES = {4284-4293}, URL = {https://publications.cnr.it/doc/84737}, ISBN = {978-84-612-9802-0}, CONFERENCE_NAME = {International Conference on Education and New Learning Technologies}, CONFERENCE_PLACE = {Barcelona, Spain}, CONFERENCE_DATE = {6th-8th July 2009}, BOOKTITLE = {International Conference on Education and New Learning Technologies}, EDITOR = {Chova, L. G. and Belenguer, D. M. and Torres, I. C.}, } @INPROCEEDINGS{CIGNONI_2009_INPROCEEDINGS_CPS_84740, AUTHOR = {Cignoni, L. and Pardelli, G. and Sassi, M.}, TITLE = {Grey Literature for Natural Language Processing: a Terminological and Statistical Approach}, YEAR = {2009}, ABSTRACT = {This paper presents the results of a study on grey literature (GL) in the field of Natural Language Processing (NLP). Our data has been collected in a corpus of ca 13,000 records corresponding to the titles of papers presented at International Conferences from 1950 to June 2008. A statistical representation of the most significant terms relative to GL in NLP and other interrelated disciplines associates old and new words, highlighting the terminological changes that have taken place in the course of time. Aim of our study is to contribute to the creation of language resources for the extraction of GL coming from the Web in order to help prevent the disappearance of documents containing NLP words that have undergone rapid development over the last decades. This paper is organised as follows: after a general introduction to our work, section 2 provides a historical overview of NLP; sections 3 and 4 offer an account of the most relevant terms used by specialists in different periods, and indicative of the changes that have taken place; section 5 describes the methodology we have used and also contains information on our GL database and a graphical representation of the data. Finally, the conclusions stress the need to integrate pre-existing or obsolete words and expressions, creating NLP synonym relations.}, KEYWORDS = {Computational Linguistics, Terminology, Grey Literature}, PAGES = {93-100}, URL = {https://publications.cnr.it/doc/84740}, VOLUME = {10}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-11-1}, CONFERENCE_NAME = {Tenth International Conference on Grey Literature: Designing the Grey Grid for Information Society}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {DEC 08-09, 2008}, BOOKTITLE = {Designing the Grey Grid for Information Society}, EDITOR = {Farace, D. J. and Frantzen, J.}, } @INPROCEEDINGS{DELLORLETTA_2009_INPROCEEDINGS_D_84733, AUTHOR = {Dell'Orletta, F.}, TITLE = {Ensemble system for Part-of-Speech tagging}, YEAR = {2009}, KEYWORDS = {Part-of-Speech tagging, Ensemble system}, URL = {https://publications.cnr.it/doc/84733}, CONFERENCE_NAME = {Evaluation of NLP and Speech Tools for Italian, 2009}, CONFERENCE_PLACE = {Reggio Emilia, Italy}, } @INPROCEEDINGS{GIGLIOTTA_2009_INPROCEEDINGS_GPN_84741, AUTHOR = {Gigliotta, O. and Pezzulo, G. and Nolfi, S.}, TITLE = {How internal modeling arises when 'the world is not enough': an evolutionary robotics study}, YEAR = {2009}, KEYWORDS = {cognitive robotics}, URL = {https://publications.cnr.it/doc/84741}, CONFERENCE_NAME = {Ninth International Conference on Epigenetic Robotics: Modeling Cognitive Development in Robotic Systems}, CONFERENCE_PLACE = {Venice}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{IDE_2009_INPROCEEDINGS_IPCS_84760, AUTHOR = {Ide, N. and Pustejovsky, J. and Calzolari, N. and Soria, C.}, TITLE = {The SILT and FLaReNet International Collaboration for Interoperability}, YEAR = {2009}, KEYWORDS = {Interoperability, Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/84760}, CONFERENCE_NAME = {ACL-IJCNLP 2009-3rd Linguistic Annotation Workshop}, CONFERENCE_PLACE = {Suntec, Singapore}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{JEZEK_2009_INPROCEEDINGS_JQC_84752, AUTHOR = {Jezek, E. and Quochi, V. and Calzolari, N.}, TITLE = {Relevance of Qualia Relations in Coercive Contexts}, YEAR = {2009}, KEYWORDS = {annotation, annotation scheme, semantics, type shift}, URL = {https://publications.cnr.it/doc/84752}, CONFERENCE_NAME = {5th International Conference on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{MARINELLI_2009_INPROCEEDINGS_M_84747, AUTHOR = {Marinelli, R.}, TITLE = {La terminologia condivisa: uno strumento trasversale di lavoro}, YEAR = {2009}, KEYWORDS = {Terminologia, Data Base Semantico Lessicale}, URL = {https://publications.cnr.it/doc/84747}, CONFERENCE_NAME = {Arte di Ascoltare Laboratorio Interattivo}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{MARINELLI_2009_INPROCEEDINGS_M_172611, AUTHOR = {Marinelli, R.}, TITLE = {Costruzione di un database terminologico di dominio fiscale: descrizione e metodologia}, YEAR = {2009}, KEYWORDS = {Terminologia, Database relazionale, Data Base Semantico Lessicale}, URL = {https://publications.cnr.it/doc/172611}, CONFERENCE_NAME = {Ottava giornata REI: “Le parole per dirlo: terminologia e normalizzazione linguistica”. Presentazione dei risultati dei gruppi di lavoro REI}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{MORGAVI_2009_INPROCEEDINGS_MMCMTB_167344, AUTHOR = {Morgavi, G. and Marconi, L. and Cutugno, P. and Morando, M. and Turrini, G. and Baroni, P.}, TITLE = {WIKIMEMO: A Portal for Italian Language and Culture Heritage Conservation}, YEAR = {2009}, ABSTRACT = {Since 150 years Italian people migrated abroad. Today the community with people with Italian origin add up to 10 million of people, including immigrant descendants in different countries. For these community the link with their original root can be useful for maintaining the self identity. In this paper we present the architecture for the design of Wikimemo, a portal for Italian Language and Culture Heritage conservation. From one side it aims to supply instruments to retrieve the records of the immigration experiences allowing their story telling through voices, sounds, pictures, documents, objects, from the others it offers didactical instruments (like texts, frequency lexicon, form vocabulary, anagrams lists etc.) useful for ameliorating the language knowledge. The whole project is focused on the autobiographical writing, the importance of free sharing of experiences and memories, of linguistic and cultural resources, of tools supporting the Italian language learning. The problems connected with the importance of a friendly user interface and of the usage of security tools is underlined.}, KEYWORDS = {Patrimonio Culturale, Italiano}, PAGES = {298-302}, URL = {http://www.santiago.cu/hosting/linguistica/descargar.php?d=634}, PUBLISHER = {Centro de linguística aplicada, Ministerio de ciencia, tecnología y medio ambiente (Santiago de Cuba, CUB)}, ISBN = {978-959-7174-14-1}, CONFERENCE_NAME = {XI simposio Internacional de Comunicación Social Santiago de Cuba}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {19-23 gennaio 2009}, BOOKTITLE = {Actas, XI Simposio Internacional Comunicación Social}, EDITOR = {Alvarez Silva, M. R. and Alvares Moreno, C. and Ruiz Miyares, L.}, } @INPROCEEDINGS{MORGAVI_2009_INPROCEEDINGS_MMMC_79934, AUTHOR = {Morgavi, G. and Marconi, L. and Morando, M. and Cutugno, P.}, TITLE = {From creative cognitive learning to adaptable artificial system design}, YEAR = {2009}, ABSTRACT = {Background: Over the last decade, a number of researchers have suggested a developmental perspective on AI and robotics. The ultimate shared goal among them seems to be the idea of bootstrapping high-level cognition through a process in which the agent interacts with a real physical environment over extended periods of time [2]. These studies generated epigenetic robotics, a new AI/ robotics field which includes the two-fold goal of understanding biological systems by the interdisciplinary integration between social/life and engineering sciences and, simultaneously, that of enabling robots and other artificial systems to autonomously develop skills for any particular environment (instead of programming them to solve particular goals for a specific environment). Interdisciplinary theory and empirical evidence are used to inform epigenetic robotic models, and these models can be used as theoretical tools to make experimental predictions in developmental psychology and other disciplines studying cognitive development in living systems. One of the fundamental methodological assumptions is that cognition is embodied, which means that it arises from bodily interactions with the real world[1]. The next logical step along the road towards truly autonomous robots that can dive in unpredictable environments is to investigate how one might design robots that are capable of `growing up' through experience. A living artifact grows up when its capabilities, abilities/knowledge, shift to a further level of complexity [3]. Following different psychological points of view, growing up implies: adaptation, change of functional meaning; increased complexity; enlargement of the internal knowledge map; abstraction and insight.}, KEYWORDS = {creative processes, abstraction, growing up}, PAGES = {257-260}, URL = {http://www.isefc.rnu.tn/cem09/Downloads/Cem09_Abstracts_Book.pdf}, ISBN = {978-9973-13-009-9}, CONFERENCE_NAME = {CEM09 International Congress on Cognition, Emotion \& Motivation}, CONFERENCE_PLACE = {Hammamet, Tunisia}, CONFERENCE_DATE = {2-5 Novembre 2009}, EDITOR = {Masmoudi, S. and Naceur, A.}, } @INPROCEEDINGS{NAMER_2009_INPROCEEDINGS_NBJR_84750, AUTHOR = {Namer, F. and Bouillon, P. and Jacquey, E. and Ruimy, N.}, TITLE = {Morphology-based Enhancement of a French SIMPLE Lexicon}, YEAR = {2009}, ABSTRACT = {In this paper, we propose a semi-automatic methodology for acquiring a French SIMPLE lexicon based on the morphological properties of complex words. This method combines the results of the French morphological analyzer DériF with information from general lexical resources and corpora, when available. It is evaluated on a set of neologisms extracted from Le Monde newspaper corpora}, KEYWORDS = {Morphology, SIMPLE, French electronic lexicon, Italian electronic lexicon}, URL = {https://publications.cnr.it/doc/84750}, CONFERENCE_NAME = {5th International Conference on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {17-19/ 09/2009}, } @INPROCEEDINGS{OGNIBENE_2009_INPROCEEDINGS_OPB_84739, AUTHOR = {Ognibene, D. and Pezzulo, G. and Baldassarre, G.}, TITLE = {How Are Representations Affected by Scene Statistics in an Adaptive Active Vision System?}, YEAR = {2009}, KEYWORDS = {cognitive robotics}, URL = {https://publications.cnr.it/doc/84739}, CONFERENCE_NAME = {Ninth International Conference on Epigenetic Robotics: Modeling Cognitive Development in Robotic Systems}, CONFERENCE_PLACE = {Venice}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{PARDELLI_2009_INPROCEEDINGS_PSGO_84738, AUTHOR = {Pardelli, G. and Sassi, M. and Goggi, S. and Orsolini, P.}, TITLE = {Computational Linguistics Terminology}, YEAR = {2009}, ABSTRACT = {The aim of this article is to provide a statistical representation of significant terms used in the field of Natural Language Processing from the 1960's till nowadays, in order to draft a survey on the most significant research trends in that period. By retrieving these keywords it should be possible to highlight the ebb and flow of some thematic topics. The NLP terminological sample derives from a database - created for this purpose using the DBT software (Textual Data Base, ILC patent). Scientific presentations at the above-mentioned conferences point out a frequent recurrence of expressions such as mécanisation des études lexicologique, les machines à cartes perforées et leurs application lexicologique which trace back to the origin of electronic processing of linguistic data and to some solutions of linguistic-literary problems, to lexicographic researches, to the scientific terminology, to automatic dictionaries, to homographs, synonyms and the possibility of producing indexes and concordances by means of an electronic processor: Terms such as meccanizzazione, mechanical translation, machine à traduire used by experts of the field in the 1950s and 1960s seem to well testify the change, the shift, the beginning and then the final consecration of a rapidly evolving field: Natural Language Processing.}, KEYWORDS = {Computational Linguistics, Terminology}, PAGES = {303-307}, URL = {https://publications.cnr.it/doc/84738}, PUBLISHER = {Centro de linguística aplicada, Ministerio de ciencia, tecnología y medio ambiente (Santiago de Cuba, CUB)}, ISBN = {978-959-7174-14-1}, CONFERENCE_NAME = {XI Simposio Internacional de Communicación Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {19-23 de enero de 2009}, EDITOR = {Silvia, M. R. A. and Moreno, C. A. and Miyares, L. R.}, } @INPROCEEDINGS{RUIMY_2009_INPROCEEDINGS_R_84762, AUTHOR = {Ruimy, N.}, TITLE = {Une Ressource Lexicale Exploitable}, YEAR = {2009}, ABSTRACT = {In this paper, we give an overview of a wide electronic lexicon of Italian that provides a great number of information concerning four levels of linguistic description. The syntactic and semantic modules are particularly rich and interesting ; they offer a very fine-grained representation of the behaviour of lexical units and highlight the close relationship existing between the two description levels. Thanks to the richness of its content and to the excellence of its model, this lexicon can be profitably exploited in various domains, both in NLP applications and for the derivation of similar lexicons in other languages.}, KEYWORDS = {Lexique électronique, syntaxe, Lexique Génératif, sémantique, structure argumentale}, PAGES = {290-300}, URL = {https://publications.cnr.it/doc/84762}, VOLUME = {4}, PUBLISHER = {University of Bergen (Bergen, NOR)}, ISSN = {1890-4580}, CONFERENCE_NAME = {The 28th Conference on Lexis and Grammar}, CONFERENCE_PLACE = {Bergen, Norway}, CONFERENCE_DATE = {29/09-03/10/2009}, BOOKTITLE = {The 28th Conference on Lexis and Grammar}, EDITOR = {Skogseth Clausen, C. and Alvsåker Didriksen, A. and Müller Gjesdal, A. and Moss, B.}, } @INPROCEEDINGS{SASSI_2009_INPROCEEDINGS_S_84746, AUTHOR = {Sassi, M.}, TITLE = {La obra de Alejo Carpentier en versión digital: historial, descripción y propuestas}, YEAR = {2009}, KEYWORDS = {Banca dati Testuale, Alejo Carpentier}, URL = {https://publications.cnr.it/doc/84746}, CONFERENCE_NAME = {XI Simposio Internacional de Comunicación social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{SASSI_2009_INPROCEEDINGS_SPG_84757, AUTHOR = {Sassi, M. and Pardelli, G. and Goggi, S.}, TITLE = {Terminology Extraction from the web}, YEAR = {2009}, ABSTRACT = {This paper presents the results of a study on textual resources in the field of Human Language Technology (HLT). A statistical representation of the most significant terms in HLT and other interrelated disciplines associates old and new words, highlighting the terminological changes that have taken place in the course of time. Aim of our study is to contribute to the creation of language resources for the extraction of documentation coming from the Web in order to help preventing the disappearance of documents containing HLT words that have undergone rapid development over the last decades. This paper is organised as follows: after a general introduction to our work, section 2 provides a historical overview of HLT; sections 3 and 4 offer an account of the most relevant terms used by specialists in different periods, and those indicative of the changes that have taken place; section 5 describes the methodology we have used and also contains information on our database and a graphical representation of the data. Finally, the conclusions stress the need to integrate pre-existing or obsolete words and expressions, creating HLT synonym relations.}, KEYWORDS = {Terminology, Computational Linguistics, Web-based information}, PAGES = {417-420}, URL = {https://publications.cnr.it/doc/84757}, ISBN = {978-83-7177-746-2}, CONFERENCE_NAME = {4th Language Technology Conference: Human Language Technology as a challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan, PL}, CONFERENCE_DATE = {November 6-8, 2009}, EDITOR = {Vetulani, Z.}, } @INPROCEEDINGS{SASSOLINI_2009_INPROCEEDINGS_SP_84749, AUTHOR = {Sassolini, E. and Picchi, E.}, TITLE = {Text Power: tools for the Cultural Heritage}, YEAR = {2009}, KEYWORDS = {Text power, Text mining, Cultural Heritage}, URL = {https://publications.cnr.it/doc/84749}, CONFERENCE_NAME = {4}, CONFERENCE_PLACE = {Cairo-Egypt}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{SASSOLINI_2009_INPROCEEDINGS_SPEG_84742, AUTHOR = {Sassolini, E. and Picchi, E. and Ensini, M. and Guerriero, L.}, TITLE = {Il progetto SUBITO e l'analisi semantica come strumento utile all'innovazione biomedica}, YEAR = {2009}, KEYWORDS = {Biomedicina, Analisi Semantica, Progetto"Subito"}, URL = {https://publications.cnr.it/doc/84742}, CONFERENCE_NAME = {10° Congresso Nazionale @ITIM 2009/, Monserrato, Cagliari, Italy}, CONFERENCE_PLACE = {Monserrato(CA)}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{SORIA_2009_INPROCEEDINGS_SMV_84764, AUTHOR = {Soria, C. and Monachini, M. and Vossen, P.}, TITLE = {Wordnet-LMF: Fleshing out a Standardized Format for Wordnet Interoperability}, YEAR = {2009}, ABSTRACT = {In this paper we present Wordnet-LMF, a dialect of ISO Lexical Markup Framework that instantiates LMF for representing wordnets. Wordnet-LMF was developed in the framework of the EU KYOTO project for the specific purpose of endowing a set of wordnets with a standardized interoperability format allowing the interchange of lexicosemantic information encoded in each of them. The aim of this format is twofold a) to give a preliminary assessment of LMF, by large-scale application to real lexical resources; b) to endow WordNet with a format representation that will allow easier integration among resources sharing the same structure (i.e other wordnets) and, more importantly, across resources with different theoretical and implementation approaches.}, KEYWORDS = {Standards, Lexical Markup Framework, Lexical resources, Wordnets, Intercultural collaboration}, PAGES = {139-146}, URL = {https://publications.cnr.it/doc/84764}, PUBLISHER = {ACM, Association for computing machinery (New York, USA)}, ISBN = {978-1-60558-198-9}, CONFERENCE_NAME = {International Workshop on Intercultural Collaboration}, CONFERENCE_PLACE = {Palo Alto, California, USA}, CONFERENCE_DATE = {20-21 Febbraio 2009}, BOOKTITLE = {Proceedings of the International Workshop on Intercultural Collaboration}, } @INPROCEEDINGS{TOKUNAGA_2009_INPROCEEDINGS_TKCMSSCTXHHK_84751, AUTHOR = {Tokunaga, T. and Kaplan, D. and Calzolari, N. and Monachini, M. and Soria, C. and Sornlertlamvanich, V. and Charoenporn, T. and Tesconi, M. and Xia, Y. and Huang, C. and Hsieh, S. and Kiyoaki, S.}, TITLE = {Query Expansion using LMF-Compliant Lexical Resources}, YEAR = {2009}, ABSTRACT = {This paper reports prototype multilingual query expansion system relying on LMF compliant lexical resources. The system is one of the deliverables of a three-year project aiming at establishing an international standard for language resources which is applicable to Asian languages. Our important contributions to ISO 24613, standard Lexical Markup Framework (LMF) include its robustness to deal with Asian languages, and its applicability to cross-lingual query tasks, as illustrated by the prototype introduced in this paper.}, KEYWORDS = {Lexical resources, Lexical Markup Framework (LMF), Standards}, PAGES = {145-152}, URL = {https://publications.cnr.it/doc/84751}, ISBN = {978-1-932432-56-5}, CONFERENCE_NAME = {ACL-IJCNLP 2009-7th Workshop on Asian Language Resources}, CONFERENCE_PLACE = {Singapore}, CONFERENCE_DATE = {6-7 Agosto 2009}, } @INPROCEEDINGS{TORAL_2009_INPROCEEDINGS_TMSR_84754, AUTHOR = {Toral, A. and Monachini, M. and Soroa, A. and Rigau, G.}, TITLE = {Studying the role of Qualia Relations for Word Sense Disambiguation}, YEAR = {2009}, KEYWORDS = {Generative Lexicon, Semantic Information Extraction, Word Sense Disambiguation}, URL = {https://publications.cnr.it/doc/84754}, CONFERENCE_NAME = {5th International Conference on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{VENTURI_2009_INPROCEEDINGS_VLMVSTA_173712, AUTHOR = {Venturi, G. and Lenci, A. and Montemagni, S. and Vecchi, E. M. and Sagri, M. T. and Tiscornia, D. and Agnoloni, T.}, TITLE = {Towards a FrameNet Resource for the Legal Domain}, YEAR = {2009}, KEYWORDS = {Frame Semantics, Legal Ontologies, Knowledge Representation, Corpus Annotation}, URL = {https://publications.cnr.it/doc/173712}, CONFERENCE_NAME = {3rd Workshop on Legal Ontologies and Artificial Intelligence Techniques joint with 2nd Workshop on Semantic Processing of Legal text}, CONFERENCE_PLACE = {Barcelona, Spain}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{VENTURI_2009_INPROCEEDINGS_VMMSTMA_84736, AUTHOR = {Venturi, G. and Montemagni, S. and Marchi, S. and Sasaki, Y. and Thompson, P. and McNaught, J. and Ananiadou, S.}, TITLE = {Bootstrapping a Verb Lexicon for Biomedical Information Extraction}, YEAR = {2009}, ABSTRACT = {The extraction of information from texts requires resources that contain both syntactic and semantic properties of lexical units. As the use of language in specialized domains, such as biology, can be very different to the general domain, there is a need for domain-specific resources to ensure that the information extracted is as accurate as possible. We are building a large-scale lexical resource for the biology domain, providing information about predicate-argument structure that has been bootstrapped from a biomedical corpus on the subject of E. Coli. The lexicon is currently focussed on verbs, and includes both automatically-extracted syntactic subcategorization frames, as well as semantic event frames that are based on annotation by domain experts. In addition, the lexicon contains manually-added explicit links between semantic and syntactic slots in corresponding frames. To our knowledge, this lexicon currently represents a unique resource within in the biomedical domain.}, KEYWORDS = {domain-specific lexical resources, Biological Language Processing, syntax-semantic linking}, PAGES = {137-148}, URL = {https://publications.cnr.it/doc/84736}, DOI = {10.1007/978-3-642-00382-0_11}, PUBLISHER = {Springer-Verlag (Berlin Heidelberg, DEU)}, ISBN = {9783642003813}, CONFERENCE_NAME = {10th International Conference on Intelligent Text Processing and Computational Linguistics}, CONFERENCE_PLACE = {Mexico City, Mexico}, CONFERENCE_DATE = {1-7/03/2009}, } @INPROCEEDINGS{ZAMPONI_2009_INPROCEEDINGS_ZMCM_84745, AUTHOR = {Zamponi, R. and Marconi, L. and Cutugno, P. and Morgavi, G.}, TITLE = {La lengua fang de la oralidad al diccionario: cuestiones y problemas atados a la realización de un diccionario bilingüe fang-español y español-fang}, YEAR = {2009}, ABSTRACT = {El fang es una lengua bantú de Africa centro-occidental a tradición oral y a fuerte fragmentación dialectal, sin un estandard y una norma ortográfica de referencia, que mustra hoy día, en Guinea Ecuatorial, país en que reside más allá de la mitad de los suyos ca. 450.000 hablantes, señales de desintegración bajo el empuje del español (lengua oficial y hegemónica). En las notas que siguen se delinean algunas cuestiones y problemas emergidos en la compilación de un diccionario bilingue fang-español y español-fang atados a el estatus sociolinguístico del fang, a la ausencia de estudios gramaticales detallados y de diccionarios modernos de esta lengua y, en fin, a la tradución del material léxico recogido por nosotros.}, KEYWORDS = {Lingua Fang}, PAGES = {553-556}, URL = {http://www.santiago.cu/hosting/linguistica/descargar.php?d=791}, PUBLISHER = {Centro de linguística aplicada, Ministerio de ciencia, tecnología y medio ambiente (Santiago de Cuba, CUB)}, ISBN = {978-959-7174-14-1}, CONFERENCE_NAME = {XI simposio Internacional de Comunicación Social Santiago de Cuba}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {19-23 gennaio 2009}, BOOKTITLE = {Actas, XI Simposio Internacional Comunicación Social}, EDITOR = {Alvarez Silva, M. R. and Alvares Moreno, C. and Ruiz Miyares, L.}, } @INPROCEEDINGS{BOZZI_2009_INPROCEEDINGS_B_112945, AUTHOR = {Bozzi, A.}, TITLE = {Edizione critica elettronica di fonti manoscritte antiche digitali}, YEAR = {2009}, KEYWORDS = {Computational Philology}, URL = {https://publications.cnr.it/doc/112945}, CONFERENCE_NAME = {Archivi e Biblioteche: dalla memoria del passato al web}, CONFERENCE_PLACE = {Cagliari}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{ENEA_2009_INPROCEEDINGS_E_112944, AUTHOR = {Enea, A.}, TITLE = {Comunicazione orale}, YEAR = {2009}, KEYWORDS = {Archivistica, Beni Culturali}, URL = {https://publications.cnr.it/doc/112944}, CONFERENCE_NAME = {Un archivio on line. Presentazione dell'inventario della sezione storica dell'Archivio dell'Istituto degli Innocenti di Firenze (1218-1996)}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{MARINELLI_2009_INPROCEEDINGS_MSC_112949, AUTHOR = {Marinelli, R. and Spadoni, G. and Cucurullo, S.}, TITLE = {Visual information to improve a lexical-semantic terminological resource}, YEAR = {2009}, ABSTRACT = {The lexical semantic database MariTerm contains structured information about the specialized terminology of the maritime domain (maritime navigation and transport). This paper describes the main phases of a project which aims to enhance the terminological database by means of a set of images: a) the structure of the terminological database; b) the domain conceptual modelling; c) the database management tool which, among its various features, allows visualization on demand of the image which is associated with the term being sought, contributing to clarification of the meaning of the term and increasing its information and communication potential}, KEYWORDS = {Lexical Semantic Data Bases, Visual Information, Terminology}, PAGES = {159-160}, URL = {https://publications.cnr.it/doc/112949}, VOLUME = {1}, CONFERENCE_NAME = {XXVII AESLA Conference "Modos y formas de la comunicacion humana-Ways and modes of human communication"}, CONFERENCE_PLACE = {Ciudad Real}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{MARZI_2009_INPROCEEDINGS_MPS_112950, AUTHOR = {Marzi, C. and Pardelli, G. and Sassi, M.}, TITLE = {Grey Literature and Computational Limguistics: From Paper to Net}, YEAR = {2009}, ABSTRACT = {The advent and exponential development of the World Wide Web has led to an increasing availability of unstructured knowledge and distributed information sources, meeting general public requirements that are hardly addressed by other more traditional information channels. This trend has concurrently raised a considerable interest in the application of Computational Linguistics (CL) methodologies to document access and retrieval, as they offer the unprecedented opportunity to make the subjective, user-centred information demands of Net citizens meet the ever changing and heterogeneous information flow of the web. Over the last five years, more and more Italian Universities have introduced CL courses into their Humanities curricula, making available on-line teaching materials, tutorials and language engineering software that appear to supply the lack of offer from traditional Italian publishing houses. In this paper, we consider in some detail the role played by this type of Grey Literature in bringing up a wider and increasingly more aware community of web users in Italy. Theme: Impact of Grey Literature on Net Citizens}, KEYWORDS = {Computational Linguistics, Grey Literature, Web-based information}, PAGES = {81-84}, URL = {https://publications.cnr.it/doc/112950}, VOLUME = {11}, ISBN = {978-90-77484-14-2}, CONFERENCE_NAME = {Eleventh International Conference on Grey Literature "The Grey Mosaic, Piecing it All Together"}, CONFERENCE_PLACE = {Washington, DC}, CONFERENCE_DATE = {14-15 December 2009}, BOOKTITLE = {Eleventh International Conference on Grey Literature "The Grey Mosaic, Piecing it All Together" Acronimo titolo evento}, EDITOR = {Farace, D. J. and Frantzen, J.}, } @INPROCEEDINGS{MONACHINI_2009_INPROCEEDINGS_M_112943, AUTHOR = {Monachini, M.}, TITLE = {A 15-year journey in Standards for Lexical Resources}, YEAR = {2009}, KEYWORDS = {Standards, Lexicon}, URL = {https://publications.cnr.it/doc/112943}, CONFERENCE_NAME = {New horizons for Linguistic Resources in a Global Context}, CONFERENCE_PLACE = {Barcelona, Spain}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{MONACHINI_2009_INPROCEEDINGS_M_112947, AUTHOR = {Monachini, M.}, TITLE = {The WordNet-LMF Representation Format}, YEAR = {2009}, KEYWORDS = {Standards, Lexicon}, URL = {https://publications.cnr.it/doc/112947}, CONFERENCE_NAME = {The First KYOTO Workshop Environmental Knowledge Transition and Exchange}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {2009}, } @INPROCEEDINGS{MORGAVI_2009_INPROCEEDINGS_MMMC_184927, AUTHOR = {Morgavi, G. and Marconi, L. and Morando, M. and Cutugno, P.}, TITLE = {From creative cognitive learning to adaptable artificial system design}, YEAR = {2009}, ABSTRACT = {Background: Over the last decade, a number of researchers have suggested a developmental perspective on AI and robotics. The ultimate shared goal among them seems to be the idea of bootstrapping high-level cognition through a process in which the agent interacts with a real physical environment over extended periods of time [2]. These studies generated epigenetic robotics, a new AI/ robotics field which includes the two-fold goal of understanding biological systems by the interdisciplinary integration between social/life and engineering sciences and, simultaneously, that of enabling robots and other artificial systems to autonomously develop skills for any particular environment (instead of programming them to solve particular goals for a specific environment). Interdisciplinary theory and empirical evidence are used to inform epigenetic robotic models, and these models can be used as theoretical tools to make experimental predictions in developmental psychology and other disciplines studying cognitive development in living systems. One of the fundamental methodological assumptions is that cognition is embodied, which means that it arises from bodily interactions with the real world[1]. The next logical step along the road towards truly autonomous robots that can dive in unpredictable environments is to investigate how one might design robots that are capable of `growing up' through experience. A living artifact grows up when its capabilities, abilities/knowledge, shift to a further level of complexity [3]. Following different psychological points of view, growing up implies: adaptation, change of functional meaning; increased complexity; enlargement of the internal knowledge map; ; abstraction and insight.}, KEYWORDS = {creative processes, abstraction, growing up}, PAGES = {257-260}, URL = {http://www.isefc.rnu.tn/cem09/Downloads/Cem09_Abstracts_Book.pdf}, ISBN = {978-9973-13-009-9}, CONFERENCE_NAME = {CEM09 International Congress on Cognition, Emotion \& Motivation}, CONFERENCE_PLACE = {Hammamet, Tunisia}, CONFERENCE_DATE = {2-5 Novembre 2009}, BOOKTITLE = {Cognition Emotion Motivation Percept-Concept-Decision: Application to Learning Activities}, EDITOR = {Masmoudi, S. and Naceur, A.}, } @INPROCEEDINGS{PIRRELLI_2009_INPROCEEDINGS_P_288113, AUTHOR = {Pirrelli, V.}, TITLE = {Comprendere un documento con il computer}, YEAR = {2009}, ABSTRACT = {In this talk, I shall deal with the complex process of computer reading and understanding of text documents as the result of the interleaving of a number of levels of processing, both linguistic and extra-linguistic, such as parsing, classifying, learning and knowing. In real tasks, there exists no parsing without classifying, no classifying without learning, no learning without knowing and eventually no knowing without "doing things with words". Only through robust integration and co-operation of less than optimal components and inter-disciplinary cross-fertilization we can hope to develop general and comprehensive solutions which are more than the sums of their parts.}, KEYWORDS = {Gestione documentale, annotazione linguistica, indicizzazione}, URL = {https://publications.cnr.it/doc/288113}, CONFERENCE_NAME = {Documentazione, terminologia e Scienze delle Informazione}, CONFERENCE_PLACE = {CNR, Roma}, CONFERENCE_DATE = {12 giugno 2009}, } @INPROCEEDINGS{RUIMY_2009_INPROCEEDINGS_RBP_112946, AUTHOR = {Ruimy, N. and Bozzi, A. and Pardelli, G.}, TITLE = {Modèle lexical pour un thésaurus-lexique électronique de la terminologie saussurienne}, YEAR = {2009}, ABSTRACT = {Le modèle lexical SIMPLE que nous nous proposons d'adopter pour la création d'un thésaurus-lexique de la terminologie linguistique saussurienne a été créé dans le cadre du projet européen éponyme et a permis le développement de lexiques sémantiques monolingues harmonisés pour douze langues de l'Union Européenne. Dès sa création, il s'est imposé comme standard de facto et a ainsi inspiré de manière déterminante le standard international ISO pour les lexiques du Traitement Automatique des Langues. Dans le panorama de la Lexicographie Computationnelle, ce modèle lexical se distingue par certains aspects particulièrement novateurs. L'approche théorique adoptée pour la représentation de l'information sémantique est basée sur les principes fondamentaux de la théorie du Lexique Génératif (J. Pustejovsky, 1995 ; 2001 ). L'architecture flexible du modèle ainsi que la méthodologie de construction du lexique permettent d'encoder dans les entrées lexicales une ample gamme d'informations hautement structurées couvrant tous les aspects de la sémantique lexicale des mots ; ceci de manière cohérente, et au degré de granularité souhaité. La structuration conceptuelle du lexique est confiée à une ontologie linguistique mettant en relief le caractère multidimensionnel du sens lexical. L'ontologie SIMPLE est formée de types sémantiques indépendants des langues et a été conçue en prévision d'expansions ou de spécialisations visant à satisfaire aux exigences de domaines d'intérêt particuliers. Outre la classification ontologique et un certain nombre de traits sémantiques, les unités lexicales sont caractérisées par un réseau de relations sémantiques qui sont en majeure partie le fruit d'une relecture de la structure des qualia que Pustejovsky emprunte à Moravcsik, 1975 . Ces relations permettent de modéliser l'information concernant les différentes composantes sémantiques qui concourent à définir la structure interne d'une unité lexicale. Elles explicitent ses liens hyperonymiques et méronymiques mais aussi, sur l'axe syntagmatique, les liens aux différents prédicats qui contribuent à éclairer son sens, et en particulier l'origine et la fonction de l'entité dénotée. D'autres relations, telles que la synonymie, la polysémie logique et la dérivation morphologique sont également définies. Les lexèmes prédicatifs, et en particulier les verbes, sont de surcroît caractérisés par le scénario sémantique dans lequel ils sont impliqués. Celui-ci est décrit en termes de classe aspectuelle et de structure argumentale du prédicat, avec indication du rôle sémantique et des restrictions de sélection de chacun des arguments.}, KEYWORDS = {Computational Lexicon, Thesaurus}, PAGES = {1-17}, URL = {https://publications.cnr.it/doc/112946}, CONFERENCE_NAME = {Séminaire international "Publier les manuscrits de Ferdinand de Saussure"}, CONFERENCE_PLACE = {Arcavacata, Università della Calabria}, CONFERENCE_DATE = {1-3 ottobre 2009}, } @INPROCEEDINGS{SPINOSA_2009_INPROCEEDINGS_SGCMVM_130118, AUTHOR = {Spinosa, P. and Giardiello, G. and Cherubini, M. and Marchi, S. and Venturi, G. and Montemagni, S.}, TITLE = {NLP–based Metadata Extraction for Legal Text Consolidation}, YEAR = {2009}, KEYWORDS = {Natural Language Processing, textual amendments, XML representation, metadata extraction, consolidation of legal text}, URL = {https://publications.cnr.it/doc/130118}, CONFERENCE_NAME = {Twelfth International Conference on Artificial Intelligence and Law (ICAIL 2009)}, CONFERENCE_PLACE = {Barcelona}, CONFERENCE_DATE = {June 8-12, 2009}, } @INPROCEEDINGS{VENTURI_2009_INPROCEEDINGS_VMMSTMA_112956, AUTHOR = {Venturi, G. and Montemagni, S. and Marchi, S. and Sasaki, Y. and Thompson, P. and McNaught, J. and Ananiadou, S.}, TITLE = {Bootstrapping a Verb Lexicon for Biomedical Information Extraction}, YEAR = {2009}, ABSTRACT = {The extraction of information from texts requires resources that contain both syntactic and semantic properties of lexical units. As the use Of language in specialized domains, such as biology, can be very different to the general domain, there is a need for domain-specific resources to ensure that the information extracted is as accurate as possible. We are building a large-scale lexical resource for the biology domain. providing information about predicate-argument structure that has been bootstrapped from a biomedical corpus on the subject of E. Coli. The lexicon is currently focussed on verbs, and includes both automatically-extracted syntactic subcategorization frames, as well as semantic event frames that are based on annotation by domain experts. In addition, the lexicon contains manually-added explicit links between semantic and syntactic slots in corresponding frames. To Our knowledge, this lexicon currently represents a unique resource within in the biomedical domain.}, KEYWORDS = {domain-specific lexical resources, lexical acquisition, syntax-semantics linking, Information Extraction, Biological Language Processing}, PAGES = {137-148}, URL = {https://publications.cnr.it/doc/112956}, VOLUME = {5449}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, ISBN = {978-3-642-00381-3}, CONFERENCE_NAME = {International Conference on Intelligent Text Processing and Computational Linguistics (CICLing 2009)}, CONFERENCE_PLACE = {Mexico City, Mexico}, CONFERENCE_DATE = {March 1-7, 2009}, BOOKTITLE = {Proceedings of the 10th International Conference on Intelligent Text Processing and Computational Linguistics (CICLing 2009)}, EDITOR = {Gelbukh, A.}, } @TECHREPORT{ALIPRANDI_2009_TECHREPORT_ANMRTSMVBAADRS_262195, AUTHOR = {Aliprandi, C. and Neri, F. and Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W. and Agirre, E. and Artola, X. and De Ilarraza, A. D. and Rigau, G. and Soroa, A.}, TITLE = {Database Models and Data Formats DELIVERABLE NR. 1/WP NR. 2}, YEAR = {2009}, ABSTRACT = {This deliverable describes data structure and XML formats that have been investigated and defined for data representation of linguistic and semantic resources underlying the KYOTO system.}, URL = {https://publications.cnr.it/doc/262195}, } @TECHREPORT{ALIPRANDI_2009_TECHREPORT_ANMRTSMVBAADRS_157459, AUTHOR = {Aliprandi, C. and Neri, F. and Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W. and Agirre, E. and Artola, X. and Diaz, D. I. A. and Rigau, G. and Soroa, A.}, TITLE = {Database Models and Data Formats}, YEAR = {2009}, KEYWORDS = {XML data format, TMF, SEMAF, OWL/KIF, FACTAF}, URL = {https://publications.cnr.it/doc/157459}, } @TECHREPORT{BARONI_2009_TECHREPORT_B_316559, AUTHOR = {Baroni, P.}, TITLE = {FLaReNet Web Statistics: 7 December 2008-31 August 2009}, YEAR = {2009}, ABSTRACT = {Statistics relating to the access to the FLaReNet Web site from 7 December 2008 to 31 August 2009.}, KEYWORDS = {Language Resources, Web Statistics}, PAGES = {6}, URL = {https://publications.cnr.it/doc/316559}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CBGMQST_157465, AUTHOR = {Calzolari, N. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Dissemination Plan}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157465}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CBGMQST_157468, AUTHOR = {Calzolari, N. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157468}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CMSBGQT_157467, AUTHOR = {Calzolari, N. and Monachini, M. and Soria, C. and Baroni, P. and Goggi, S. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Progress Report No. 2}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157467}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBCGMQTBBCMOP_157462, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Caselli, T. and Goggi, S. and Monachini, M. and Quochi, V. and Toral, A. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Action Plan}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157462}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBGMQT_157466, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Evaluation Plan for the functioning of the Network}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157466}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBMQ_157463, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Monachini, M. and Quochi, V.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Annual Report No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157463}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBMQT_157469, AUTHOR = {Calzolari, N. and Soria, C. and Baroni, P. and Monachini, M. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Project Presentation}, YEAR = {2009}, KEYWORDS = {Language Resources}, URL = {https://publications.cnr.it/doc/157469}, } @TECHREPORT{CALZOLARI_2009_TECHREPORT_CSBBCCMMOPQT_157464, AUTHOR = {Calzolari, N. and Soria, C. and Bel, N. and Budin, G. and Caselli, T. and Choukri, K. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Toral, A.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Blueprint of actions and infrastructures No. 1}, YEAR = {2009}, KEYWORDS = {Language Resources, Infrastructures, Recommendations}, URL = {https://publications.cnr.it/doc/157464}, } @TECHREPORT{ENEA_2009_TECHREPORT_ES_157458, AUTHOR = {Enea, A. and Sandri, L.}, TITLE = {L'Archivio dell'Ospedale degli Innocenti di Firenze e il suo inventario on line}, YEAR = {2009}, ABSTRACT = {Sono descritti i passaggi intervenuti per la realizzazione di un sito internet dedicato alla consultazione dell'inventario dell'Archivio dell'antico Ospedale degli Innocenti di Firenze.}, KEYWORDS = {Archivistica, Beni Culturali}, URL = {https://publications.cnr.it/doc/157458}, } @TECHREPORT{HEROLD_2009_TECHREPORT_HHSVRALMTS_157472, AUTHOR = {Herold, A. and Hicks, A. and Segers, R. and Vossen, P. and Rigau, G. and Agirre, E. and Laparra, E. and Monachini, M. and Toral, A. and Soria, C.}, TITLE = {WordNets mapped to Central Ontology}, YEAR = {2009}, KEYWORDS = {Ontology}, URL = {https://publications.cnr.it/doc/157472}, } @TECHREPORT{PICCHI_2009_TECHREPORT_PSCE_157473, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S. and Ensini, M.}, TITLE = {WP3. 1 Tecniche di analisi semantica per l’estrazione di ontologie bio-medicali}, YEAR = {2009}, KEYWORDS = {Ontologie, Semantica computazionale, Terminologia medica}, URL = {https://publications.cnr.it/doc/157473}, } @TECHREPORT{PIRRELLI_2009_TECHREPORT_PM_176379, AUTHOR = {Pirrelli, V. and Marzi, C.}, TITLE = {Words In Action: Interdisciplinary Approaches to Understanding Word Processing and Storage}, YEAR = {2009}, ABSTRACT = {Almost all levels of language knowledge and processing (from phonology, to syntax and semantics) are known to be affected by knowledge of word structure at varying degrees. A better understanding of the human strategies involved in learning and processing word structure thus lies at the heart of our comprehension of the basic mechanisms serving both language and cognition and is key to addressing some fundamental challenges for the study of the physiology of grammar. On the 12th and 13th of October 2009, in the Research Area of the Italian National Research Council (CNR) in Pisa, 26 scholars from Europe, Canada and the United States were convened to take part in the European Science Foundation Exploratory Workshop "Words in Action: Interdisciplinary Approaches To Understanding Word Processing And Storage". The workshop brought together experts of various scientific domains and different theoretical inclinations to advance the current awareness of theoretical, historical, psycholinguistic, computational and neurophysiological issues in morphological processing and learning, with a view to assessing levels of research convergence and exploring the potential for synergy and strategic co-operation.}, KEYWORDS = {Morphology, Word Processing, Word Learning, Mental Lexicon}, URL = {https://publications.cnr.it/doc/176379}, } @TECHREPORT{SASSI_2009_TECHREPORT_SC_157456, AUTHOR = {Sassi, M. and Cinini, A.}, TITLE = {Dieci anni di informazione sanitaria. Analisi di tre quotidiani a tiratura nazionale}, YEAR = {2009}, ABSTRACT = {L'Istituto di Linguistica Computazionale (ILC-CNR) ha realizzato In collaborazione con L'Osservatorio della Comunicazione Sanitaria (OCS) una Banca Dati Testuale composta dagli articoli che trattano Informazione Sanitaria, pubblicati a partire dall'anno 1999 su tre quotidiani a diffusione nazionale: Corriere della Sera (C), Repubblica (R) e Stampa (S).}, KEYWORDS = {Analisi Informazione sanitaria, Banca dati Testuale}, PAGES = {1-31}, URL = {https://publications.cnr.it/doc/157456}, } @MISC{CALZOLARI_2009_MISC_CBBBCGMMOPQST_157471, AUTHOR = {Calzolari, N. and Baroni, P. and Bel, N. and Budin, G. and Choukri, K. and Goggi, S. and Mariani, J. and Monachini, M. and Odijk, J. and Piperidis, S. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157471}, } @MISC{CALZOLARI_2009_MISC_CBBCMOPBGMQST_157457, AUTHOR = {Calzolari, N. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Extended Report of: The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157457}, } @MISC{CALZOLARI_2009_MISC_CBBCMOPBGMQST_157460, AUTHOR = {Calzolari, N. and Bel, N. and Budin, G. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S. and Baroni, P. and Goggi, S. and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Short Report of The European Language Resources and Technologies Forum: Shaping the Future of the Multilingual Digital Europe}, YEAR = {2009}, KEYWORDS = {Language Resources, Language Technologies}, URL = {https://publications.cnr.it/doc/157460}, } @MISC{CASELLAS_2009_MISC_CFHM_157461, AUTHOR = {Casellas, N. and Francesconi, E. and Hoekstra, R. and Montemagni, S.}, TITLE = {3rd Workshop on Legal Ontologies and Artificial Intelligence Techniques joint with 2nd Workshop on Semantic Processing of Legal text}, YEAR = {2009}, KEYWORDS = {Legal Ontologies, Computational Semantics}, URL = {https://publications.cnr.it/doc/157461}, } @MISC{ENEA_2009_MISC_E_220437, AUTHOR = {Enea, A.}, TITLE = {Gli ARCHIVI CARTOLINE degli Istituti per la storia della Resistenza e della società contemporanea in Italia}, YEAR = {2009}, ABSTRACT = {Gli ARCHIVI CARTOLINE degli Istituti per la storia della Resistenza e della società contemporanea in Italia consentono una consultazione delle descrizioni delle cartoline del fondo Giulio Fiocchi conservato presso l'Istituto Nazionale "F. Parri" di Milano. Il fondo è organizzato in serie e sottoserie e la descrizione è a livello del singolo documento. Le immagini di tutti i documenti sono riprodotte e consultabili.}, KEYWORDS = {archivistica, beni culturali}, URL = {http://www.reteparri.it/risorse-on-line/servizi-archivistici}, } @MISC{ENEA_2009_MISC_E_220438, AUTHOR = {Enea, A.}, TITLE = {Il METAOPAC ARCHIVISTICO degli Istituti per la storia della Resistenza e della società contemporanea in Italia}, YEAR = {2009}, ABSTRACT = {Il METAOPAC ARCHIVISTICO degli Istituti per la storia della Resistenza e della società contemporanea in Italia consente una ricerca simultanea alle banche dati Guida, Foto e Carto.}, KEYWORDS = {archivistica, beni culturali}, URL = {http://www.reteparri.it/risorse-on-line/servizi-archivistici}, } @MISC{ENEA_2009_MISC_ES_220445, AUTHOR = {Enea, A. and Sandri, L.}, TITLE = {L'Inventario della sezione storica dell'Archivio dell'Ospedale degli Innocenti di Firenze}, YEAR = {2009}, KEYWORDS = {Archivistica, Beni Culturali}, URL = {http://www.istitutodeglinnocenti.it/?q=content/inventario}, } @ARTICLE{BAGLIONI_2008_ARTICLE_BGMRS_68449, AUTHOR = {Baglioni, M. and Giovannetti, E. and Masserotti, M. V. and Renso, C. and Spinsanti, L.}, TITLE = {Ontology-supported Querying of Geographical Databases}, YEAR = {2008}, ABSTRACT = {Querying geographical information systems has been recognized as a difficult task for non-expert users. Furthermore, user queries are often characterized by semantic aspects not directly managed by traditional spatial databases or GIS. Examples of such semantic geospatial queries are the use of implicit spatial relations between objects, or the reference of domain concepts not explicitly represented in data. To handle such queries, we envisage a system that translates natural language queries into spatial SQL statements on a database, thus improving standard GIS with new semantic capabilities. Within this general objective, the contribution of this article is to introduce a methodology to handle semantic geospatial queries issued over a spatial database. This approach captures semantics from an ontology built upon the spatial database and enriched by domain concepts and properties specifically defined to represent the localization of objects. Some examples of the use of the methodology in the urban domain are presented.}, KEYWORDS = {H. 2. 8 Database Applications, Ontology extraction, Spatial databases}, PAGES = {31-44}, URL = {https://publications.cnr.it/doc/68449}, VOLUME = {12 (suppl. 1)}, PUBLISHER = {Blackwell (Oxford, Regno Unito)}, ISSN = {1467-9671}, JOURNAL = {Transactions in GIS (Online)}, } @ARTICLE{CALZOLARI_2008_ARTICLE_C_30875, AUTHOR = {Calzolari, N.}, TITLE = {Initiatives, Tendencies and Driving Forces for a 'Lexical Web' as Part of a 'Language Infrastructure'}, YEAR = {2008}, ABSTRACT = {While highlighting the infrastructural role of Language Resources (LR) I sketch my perception of the current situation in the LR field. I underline some of the priority areas of concern today with respect to implementing an open Language Infrastructure, and specifically what we could call a "Lexical Web". My objective is to show that it is imperative that there is an underlying global strategy behind the set of initiatives which are/can be launched in Europe and world-wide, and that a global vision and cooperation among different communities is necessary to achieve more coherent and useful results. I end up mentioning two new European initiatives that go on this direction and promise to be influential in shaping the future of the LR area.}, KEYWORDS = {Language Resources, Language Infrastructure, Computational Lexicons, Standards, Language Technology}, PAGES = {90-105}, URL = {http://www.springerlink.com/content/54357211172v7653/}, VOLUME = {4938}, DOI = {10.1007/978-3-540-78159-2_10}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{CALZOLARI_2008_ARTICLE_C_64542, AUTHOR = {Calzolari, N.}, TITLE = {Language Infrastructures: what happens outside Europe?}, YEAR = {2008}, ABSTRACT = {The setup of CLARIN in Europe was the result of a long series of initiatives and attempts from many of us, starting already at the beginning on the 6th Framework Programme. That time is finally ripe for such an infrastructure is shown also by other initiatives outside Europe that share objectives and ideas with CLARIN. I mention here just a few.}, KEYWORDS = {Language Technology}, PAGES = {1-1}, URL = {https://publications.cnr.it/doc/64542}, VOLUME = {3}, } @ARTICLE{CALZOLARI_2008_ARTICLE_CC_288393, AUTHOR = {Calzolari, N. and Choukri, K.}, TITLE = {LREC2008-The Language Resources and Evaluation Conference}, YEAR = {2008}, ABSTRACT = {LREC, launched in 1998 in Granada as a visionary idea of Antonio Zampolli and other colleagues, celebrated its tenth birthday in the wonderful city of Marrakech. The conference had the honour of receiving the Royal Patronage of His Majesty Mohammed VI, King of Morocco, highlighting the importance assigned to multilingual and multicultural issues.}, KEYWORDS = {Language Resources}, PAGES = {56-56}, URL = {http://ercim-news.ercim.eu/images/stories/EN74/EN74-web.pdf}, VOLUME = {74 July 2008}, PUBLISHER = {ERCIM (Le Chesnay)}, ISSN = {0926-4981}, JOURNAL = {ERCIM news}, } @ARTICLE{CIGNONI_2008_ARTICLE_C_64540, AUTHOR = {Cignoni, L.}, TITLE = {'Yummy Yummy! Per giocare con l'inglese', Inglese Educazione alimentare}, YEAR = {2008}, ABSTRACT = {No abstract available}, KEYWORDS = {Insegnamento lingua inglese, bambini, educazione alimentare, scuola dell'infanzia}, PAGES = {1-32}, URL = {https://publications.cnr.it/doc/64540}, PUBLISHER = {Giunti Gruppo Editoriale (Firenze, Italia)}, ISSN = {1590-3206}, JOURNAL = {Scuola dell'infanzia}, } @ARTICLE{DELLORLETTA_2008_ARTICLE_DLMMPV_64541, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio}, YEAR = {2008}, ABSTRACT = {The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Natural Language Processing, Machine Learning, Knowledge extraction from texts, Ontology learning, Legal ontologies}, PAGES = {197-218}, URL = {https://publications.cnr.it/doc/64541}, VOLUME = {26}, PUBLISHER = {Aida (Roma, Italia)}, ISSN = {1594-2201}, JOURNAL = {Aida Informazioni (Online)}, } @ARTICLE{MONTEMAGNI_2008_ARTICLE_M_64543, AUTHOR = {Montemagni, S.}, TITLE = {The space of Tuscan dialectal variation. A correlation study}, YEAR = {2008}, ABSTRACT = {The paper illustrates the results of a correlation study focusing on linguistic variation in an Italian region, Tuscany. By exploiting a multi-level representation scheme of dialectal data, the study analyses attested patterns of phonetic and morpho-lexical variation with the aim of testing the degree of correlation between a) phonetic and morpho-lexical variation, and b) linguistic variation and geographic distance. The correlation analysis was performed by combining two complementary approaches proposed in dialectometric literature, namely by computing both global and place-specific correlation measures and by inspecting their spatial distribution. Achieved results demonstrate that phonetic and morpho-lexical variations in Tuscany seem to follow a different pattern than encountered in previous studies.}, KEYWORDS = {Computational dialectology, Dialectometry}, PAGES = {135-152}, URL = {http://www.euppublishing.com/doi/abs/10.3366/E1753854809000354}, VOLUME = {2}, DOI = {10.3366/E1753854809000354}, PUBLISHER = {Edinburgh University Press for the Association for History and Computing (Edinburgh, Regno Unito)}, ISSN = {1753-8548}, JOURNAL = {International journal of humanities and arts computing (Print)}, } @INCOLLECTION{MARINELLI_2008_INCOLLECTION_M_136461, AUTHOR = {Marinelli, R.}, TITLE = {Idiomatic Expressions and Metaphors from the Maritime Domain}, YEAR = {2008}, ABSTRACT = {According to euroWordNet/ItalWordNet model, a terminological database has been created which contains terms belonging to the specialized lexicon of the technical-nautical and maritime transport domain. We want to give prominence to the frequency of idiomatic expressions and methaphors coming from this cultural environment used in everyday language. Our lexicographic research aims at: i) analyzing the relationship between the maritime domain as "source domain" and the concepts described in the "target domain"; ii) structuring the semantic code of idiomatic expressions in the terminological database as well as the link with the equivalent or closest expressions in English; iii) checking the use and the frequency of this type of idiomatic expressions and metaphors in a large corpus of Italian contemporary language.}, KEYWORDS = {Terminology, Idiomatic Expressions, Metaphors, Lexical databases}, PAGES = {209-220}, URL = {https://publications.cnr.it/doc/136461}, VOLUME = {41}, PUBLISHER = {Peter Lang Internationaler Verlag der Wissenschaften (Francoforte, DEU)}, ISBN = {978-3-631-57419-5}, BOOKTITLE = {Lenguaje figurado y motivacion. Una perspectiva desde la fraseologia}, EDITOR = {De La Granja, M. A.}, } @INCOLLECTION{MARINELLI_2008_INCOLLECTION_M_177509, AUTHOR = {Marinelli, R.}, TITLE = {Analisi di metafore e espressioni idiomatiche per mezzo di risorse computazionali e corpora elettronici}, YEAR = {2008}, ABSTRACT = {Sulla base del modello di database semantico lessicale EWN e IWN è stato creato un database terminologico che contiene circa 3000 termini appartenenti al lessico specializzato del dominio marittimo.}, KEYWORDS = {Metafore, corpora, database semantici, ontologia}, PAGES = {469-488}, URL = {https://publications.cnr.it/doc/177509}, VOLUME = {XI/1-2}, PUBLISHER = {Viella (Roma, ITA)}, ISBN = {978-88-8334-431-2}, BOOKTITLE = {Critica del testo. L'Europa dei proverbi}, EDITOR = {Punzi, A. and Tomassetti, I.}, } @INCOLLECTION{MAZZOCCHI_2008_INCOLLECTION_MMT_136462, AUTHOR = {Mazzocchi, F. and Marinelli, R. and Tiberi, M.}, TITLE = {Refining the Thesaural Associative Relationship by Applying the EuroWordNet Semantic Model}, YEAR = {2008}, KEYWORDS = {Thesaurus}, URL = {https://publications.cnr.it/doc/136462}, } @INCOLLECTION{MONTEMAGNI_2008_INCOLLECTION_M_136460, AUTHOR = {Montemagni, S.}, TITLE = {Analisi linguistico-computazionali del corpus dialettale dell'Atlante Lessicale Toscano. Primi risultati sul rapporto toscano-italiano}, YEAR = {2008}, KEYWORDS = {Corpus dialettale}, URL = {https://publications.cnr.it/doc/136460}, PUBLISHER = {Pacini (Pisa, ITA)}, } @EDITORIAL{GOGGI_2008_EDITORIAL_GZ_146079, AUTHOR = {Goggi, S. and Zamorani, N.}, TITLE = {Language Resources and Evaluation}, YEAR = {2008}, URL = {https://publications.cnr.it/doc/146079}, } @INPROCEEDINGS{ANANIADOU_2008_INPROCEEDINGS_AMNS_84713, AUTHOR = {Ananiadou, S. and Monachini, M. and Nenadic, G. and Su, J.}, TITLE = {Foreword}, YEAR = {2008}, ABSTRACT = {Key resources of interest in biomedical text mining are lexical and knowledge repositories (controlled vocabularies, terminologies, thesauri, ontologies) and annotated corpora, including both task-specific resources and repositories reengineered from biomedical or general language resources. Of particular interest is the process of building annotated resources, including designing guidelines and annotation schemas (aiming at both syntactic and semantic interoperability) and relying on language engineering standards. Challenging aspects are updates and evolution management of resources, as well as their documentation, dissemination and evaluation.}, KEYWORDS = {Biomedical text mining, Corpora annotated}, URL = {https://publications.cnr.it/doc/84713}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation-Workshop}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{BROEDER_2008_INPROCEEDINGS_BDHPRCW_84715, AUTHOR = {Broeder, D. and Declerck, T. and Hinrichs, E. and Piperidis, S. and Romary, L. and Calzolari, N. and Wittenburg, P.}, TITLE = {Foundation of a Component-based Flexible Registry for Language Resources and Technology}, YEAR = {2008}, KEYWORDS = {Metadata, Digital libraries, LR Infrastructures and Architectures}, URL = {https://publications.cnr.it/doc/84715}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{CALZOLARI_2008_INPROCEEDINGS_C_84702, AUTHOR = {Calzolari, N.}, TITLE = {Approaches towards a Lexical Web: the role of Interoperability}, YEAR = {2008}, KEYWORDS = {Language Resources, Lexicons, Ontology}, URL = {https://publications.cnr.it/doc/84702}, CONFERENCE_NAME = {The First International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{CASELLI_2008_INPROCEEDINGS_CIB_84699, AUTHOR = {Caselli, T. and Ide, N. and Bartolini, R.}, TITLE = {A Bilingual Corpus of Inter-linked Events}, YEAR = {2008}, ABSTRACT = {This paper describes the creation of a bilingual corpus of inter-linked events for Italian and English. Linkage is accomplished through the Inter-Lingual Index (ILI) that links ItalWordNet withWordNet. The availability of this resource, on the one hand, enables contrastive analysis of the linguistic phenomena surrounding events in both languages, and on the other hand, can be used to perform multilingual temporal analysis of texts. In addition to describing the methodology for construction of the inter-linked corpus and the analysis of the data collected, we demonstrate that the ILI could potentially be used to bootstrap the creation of comparable corpora by exporting layers of annotation for words that have the same sense.}, KEYWORDS = {Corpus (creation, annotation, etc.), Semantics, Validation of LRs}, PAGES = {2424-2429}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/610_paper.pdf}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26 May-1 June 2008}, BOOKTITLE = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Tapias, D.}, } @INPROCEEDINGS{CIGNONI_2008_INPROCEEDINGS_C_84711, AUTHOR = {Cignoni, L.}, TITLE = {Filling the gap: a three-language philological dictionary based on contexts from authoritative sources}, YEAR = {2008}, ABSTRACT = {This paper describes the methodology adopted for the creation of a multilingual--English-Italian-French--philological dictionary, designed to meet as far as possible the requirements of users in the field of philology, who need to use specific terms in a language other than their own. The project is addressed to graduate and post-graduate students, tutors and scholars, translators and interpreters for whom a glossary of specialised terms relative to a given universe of discourse is essential. The dictionary defines a variety of terms associated with philology and extends to other closely connected disciplines such as textual criticism, codicology, palaeography, epigraphy, papyrology, genetic criticism, etc. The three-language glossary is arranged in conventional form--each lexical entry listed alphabetically--and the English terms are followed by their equivalents in Italian and French. The Greek or Latin words from which many philological words have derived and frequently used to refer to a particular concept or phenomenon are also included. The project involves recording not just the term but also a brief contextualized definition in each language, accurately quoting the source, certified and scientifically reliable, from which the information was drawn. Alongside these definitions, a number of other contextualizations appear, also derived from authoritative sources, and different types of illustrations--e.g. manuscripts, stems, images of people and places--relative to the terminology are provided. The textual data and images will be included into an application (Alpha version) of the PINAKES project, released in March 2007, which is able to deal with different types of information--text, scientific objects, tables or graphics. This ongoing dictionary project, at present covering a total of around 1000 words, is constantly enriched with new entries, definitions and contextualizations in the different languages.}, KEYWORDS = {Computational Philology Terminology Dictionaries Multilingual}, PAGES = {947-955}, URL = {https://publications.cnr.it/doc/84711}, PUBLISHER = {IULA-Institut Universitari de Lingüística Aplicada (Barcelona, ESP)}, ISBN = {978-84-96742-67-3}, CONFERENCE_NAME = {XIII Euralex International Congress}, CONFERENCE_PLACE = {Barcelona, Spain}, CONFERENCE_DATE = {15-19 July 2008}, BOOKTITLE = {XIII Euralex International Congress}, EDITOR = {Bernal, E. and Decesaris, J.}, } @INPROCEEDINGS{DELGRATTA_2008_INPROCEEDINGS_DBCMSC_84729, AUTHOR = {Del Gratta, R. and Bartolini, R. and Caselli, T. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {UFRA: a UIMA-based Approach to Federated Language Resource Architecture}, YEAR = {2008}, ABSTRACT = {In this paper we address the issue of developing an interoperable infrastructure for language resources and technologies. In our approach, called UFRA, we extend the Federate Database Architecture System adding typical functionalities caming from UIMA. In this way, we capitalize the advantages of a federated architecture, such as autonomy, heterogeneity and distribution of components, monitored by a central authority responsible for checking both the integration of components and user rights on performing different tasks. We use the UIMA approach to manage and define one common front-end, enabling users and clients to query, retrieve and use language resources and technologies. The purpose of this paper is to show how UIMA leads from a Federated Database Architecture to a Federated Resource Architecture, adding to a registry of available components both static resources such as lexicons and corpora and dynamic ones such as tools and general purpose language technologies. At the end of the paper, we present a case-study that adopts this framework to integrate the SIMPLE lexicon and TIMEML annotation guidelines to tag natural language texts.}, KEYWORDS = {LR Infrastructures and Architectures, LR web services, Lexicon, Lexical database}, PAGES = {2634-2639}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/656_paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26 May-1 June 2008}, } @INPROCEEDINGS{DELGRATTA_2008_INPROCEEDINGS_DCCR_84724, AUTHOR = {Del Gratta, R. and Caselli, T. and Calzolari, N. and Ruimy, N.}, TITLE = {TimeML: An ontological mapping onto UIMA Type Systems}, YEAR = {2008}, KEYWORDS = {Infrastructure, UIMA, Event detection}, URL = {https://publications.cnr.it/doc/84724}, CONFERENCE_NAME = {ICGL 2008, The First International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{DELGRATTA_2008_INPROCEEDINGS_DRT_84728, AUTHOR = {Del Gratta, R. and Ruimy, N. and Toral, A.}, TITLE = {Simple-Clips ongoing research: more information with less data by implementing inheritance}, YEAR = {2008}, ABSTRACT = {This paper presents the application of inheritance to the formal taxonomy (is-a) of a semantically rich Lexical Resource (LR) based on the Generative Lexicon theory, SIMPLE-CLIPS. The aim is to lighten the representation of its semantic layer by reducing the number of encoded relations. A prediction calculation on the impact of introducing inheritance as regards space occupancy is carried out, which yields a significant space reduction of 22%. This is corroborated by its actual application that reduces the number of explicitly encoded relations in this lexicon by 18.4%. Later on, we study the issues that inheritance poses to the Lexical Resources and discuss sensitive solutions, illustrated by examples, to tackle each of them. Finally, we present a discussion on the application of inheritance, from which two advantages arise: consistency enhancement and inference capabilities.}, KEYWORDS = {Lexicon, Knowledge representation, Lexical database}, PAGES = {2380-2385}, URL = {https://publications.cnr.it/doc/84728}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {28-30/5/2008}, } @INPROCEEDINGS{DELLORLETTA_2008_INPROCEEDINGS_DLMMPV_84707, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Dal testo alla conoscenza e ritorno: estrazione terminologica e annotazione semantica di basi documentali di dominio}, YEAR = {2008}, ABSTRACT = {The paper focuses on the automatic extraction of domain knowledge from Italian legal texts and presents a fully-implemented ontology learning system (T2K, Text-2-Knowledge) that includes a battery of tools for Natural Language Processing, statistical text analysis and machine learning. Evaluated results show the considerable potential of systems like T2K, exploiting an incremental interleaving of NLP and machine learning techniques for accurate large-scale semi-automatic extraction and structuring of domain-specific knowledge.}, KEYWORDS = {Natural Language Processing, Machine Learning, Knowledge extraction from texts, Ontology learning, Legal ontologies}, PAGES = {197-218}, URL = {http://www.assiterm91.it/wp-content/uploads/2010/11/Convegno-2008.pdf}, VOLUME = {Anno 26, numero 1-2}, PUBLISHER = {Aida (Roma, Italia)}, ISSN = {1594-2201}, CONFERENCE_NAME = {Atti del Convegno Nazionale Ass. I. Term}, CONFERENCE_PLACE = {Arcavacata di Rende (CS)}, CONFERENCE_DATE = {5-7/06/2008}, BOOKTITLE = {Terminologia analisi testuale e documentazione nella città digitale}, } @INPROCEEDINGS{DELLORLETTA_2008_INPROCEEDINGS_DLMMPV_84698, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Marchi, S. and Pirrelli, V. and Venturi, G.}, TITLE = {Acquiring Legal Ontologies from Domain-specific Texts}, YEAR = {2008}, ABSTRACT = {The paper reports on methodology and preliminary results ofa case study in automatically extracting ontological knowledgefrom Italian legislative texts in the environmental domain. Weuse a fully-implemented ontology learning system (T2K) thatincludes a battery of tools for Natural Language Processing(NLP), statistical text analysis and machine language learn-ing. Tools are dynamically integrated to provide an incremen-tal representation of the content of vast repositories of unstruc-tured documents. Evaluated results, however preliminary, arevery encouraging, showing the great potential of NLP-poweredincremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.}, KEYWORDS = {Ontology learning, Document management, knowledge extraction from texts, Natural Language Processing}, PAGES = {98-101}, URL = {https://publications.cnr.it/doc/84698}, CONFERENCE_NAME = {LangTech 2008}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {28-29/02/2008}, } @INPROCEEDINGS{FRANCOPOULO_2008_INPROCEEDINGS_FDSDM_84708, AUTHOR = {Francopoulo, G. and Declerck, T. and Sornlertlamvanich, V. and De La Clergerie, E. and Monachini, M.}, TITLE = {Data Category Registry: Morpho-syntactic and Syntactic Profiles}, YEAR = {2008}, ABSTRACT = {After a brief presentation of the data model, we describe a work in progress to define an initial set of morpho-syntactic and syntactic data categories dedicated to NLP applications. The aim is to improve interoperability among language resources and to optimize the process leading to their integration in applications. The main point is to be sure that when a language resource makes use of a value, the other language resources and programs have the same interpretation for this given value. From a practical point of view, these values are collected from existing lists, discussed, extended, and then recorded within a freely accessible data base: the ISO Data Category Registry.}, KEYWORDS = {Data Category, Language resources, ISO}, PAGES = {31-39}, URL = {https://publications.cnr.it/doc/84708}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation-Workshop: Use and usage of language resource-related standards}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-27-28 May 2004}, } @INPROCEEDINGS{GIOVANNETTI_2008_INPROCEEDINGS_GMM_84706, AUTHOR = {Giovannetti, E. and Marchi, S. and Montemagni, S.}, TITLE = {Combining statistical techniques and lexico-syntactic patterns for semantic relations extraction from text}, YEAR = {2008}, ABSTRACT = {We describe here a methodology to combine two different techniques for Semantic Relation Extraction from texts. On the one hand, generic lexicosyntactic patterns are applied to the linguistically analyzed corpus to detect a first set of pairs of co-occurring words, possibly involved in "syntagmatic" relations. On the other hand, a statistical unsupervised association system is used to obtain a second set of pairs of "distributionally similar" terms, that appear to occur in similar contexts, thus possibly involved in "paradigmatic" relations. The approach aims at learning ontological information by filtering the candidate relations obtained through generic lexico-syntactic patterns and by labelling the anonymous relations obtained through the statistical system. The resulting set of relations can be used to enrich existing ontologies and for semantic annotation of documents or web pages.}, KEYWORDS = {Ontology Learning from Text, Semantic Relation Extraction, Lexico-syntactic Patterns, Distributional Similarity}, URL = {http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_54.pdf}, CONFERENCE_NAME = {SWAP 2008-Semantic Web Applications and Perspectives}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {15-17 December 2008}, EDITOR = {Gangemi, A. and Keizer, J. and Presutti, V. and Stoermer, H.}, } @INPROCEEDINGS{GIOVANNETTI_2008_INPROCEEDINGS_GMMB_84726, AUTHOR = {Giovannetti, E. and Marchi, S. and Montemagni, S. and Bartolini, R.}, TITLE = {Ontology Learning and Semantic Annotation: a Necessary Symbiosis}, YEAR = {2008}, ABSTRACT = {Semantic annotation of text requires the dynamic merging of linguistically structured information and a "world model", usually represented as a domain-specific ontology. On the other hand, the process of engineering a domain-ontology through semi-automatic ontology learning system requires the availability of a considerable amount of semantically annotated documents. Facing this bootstrapping paradox requires an incremental process of annotation-acquisition-annotation, whereby domain-specific knowledge is acquired from linguistically-annotated texts and then projected back onto texts for extra linguistic information to be annotated and further knowledge layers to be extracted. The presented methodology is a first step in the direction of a full "virtuous" circle where the semantic annotation platform and the evolving ontology interact in symbiosis. As a case study we have chosen the semantic annotation of product catalogues. We propose a hybrid approach, combining pattern matching techniques to exploit the regular structure of product descriptions in catalogues, and Natural Language Processing techniques which are resorted to analyze natural language descriptions. The semantic annotation involves the access to the ontology, semi-automatically bootstrapped with an ontology learning tool from annotated collections of catalogues.}, KEYWORDS = {Information Extraction, Information Retrieval, Ontologies, Tools, Systems}, PAGES = {2079-2085}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {2008}, BOOKTITLE = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Tapias, D.}, } @INPROCEEDINGS{HAYASHI_2008_INPROCEEDINGS_HDBM_84723, AUTHOR = {Hayashi, Y. and Declerck, T. and Buitelaar, P. and Monachini, M.}, TITLE = {Ontologies for a Global Language Infrastructure}, YEAR = {2008}, ABSTRACT = {Given a situation where human language technologies have been maturing considerably and a rapidly growing range of language data resources being now available, together with natural language processing (NLP) tools/systems, a strong need for a global language infrastructure (GLI) is becoming more and more evident, if one wants to ensure re-usability of the resources. A GLI is essentially an open and web-based software platform on which tailored language services can be efficiently composed, disseminated and consumed. An infrastructure of this sort is also expected to facilitate further development of language data resources and NLP functionalities. The aims of this paper are twofold: (1) to discuss necessity of ontologies for a GLI, and (2) to draw a high-level configuration of the ontologies, which are integrated into a comprehensive language service ontology. To these ends, this paper first explores dimensions of GLI, and then draws a triangular view of a language service, from which necessary ontologies are derived. This paper also examines relevant ongoing international standardization efforts such as LAF, MAF, SynAF, DCR and LMF, and discusses how these frameworks are incorporated into our comprehensive language service ontology. The paper concludes in stressing the need for an international collaboration on the development of a standardized language service ontology.}, KEYWORDS = {Ontology, Global language infrastructure (GLI)}, PAGES = {105-112}, URL = {https://publications.cnr.it/doc/84723}, CONFERENCE_NAME = {ICGL 2008-The First International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {9-11/01/2008}, EDITOR = {Webster, J. and Ide, N. and Fang, A.}, } @INPROCEEDINGS{HAYASHI_2008_INPROCEEDINGS_HNMSC_84725, AUTHOR = {Hayashi, Y. and Narawa, C. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Ontologizing Lexicon Access Functions based on an LMF-based Lexicon Taxonomy}, YEAR = {2008}, ABSTRACT = {This paper discusses ontologization of lexicon access functions in the context of a service-oriented language infrastructure, such as the Language Grid. In such a language infrastructure, an access function to a lexical resource, embodied as an atomic Web service, plays a crucially important role in composing a composite Web service tailored to a user's specific requirement. To facilitate the composition process involving service discovery, planning and invocation, the language infrastructure should be ontology-based; hence the ontologization of a range of lexicon functions is highly required. In a service-oriented environment, lexical resources however can be classified from a service-oriented perspective rather than from a lexicographically motivated standard. Hence to address the issue of interoperability, the taxonomy for lexical resources should be ground to principled and shared lexicon ontology. To do this, we have ontologized the standardized lexicon modeling framework LMF, and utilized it as a foundation to stipulate the service-oriented lexicon taxonomy and the corresponding ontology for lexicon access functions. This paper also examines a possible solution to fill the gap between the ontological descriptions and the actual Web service API by adopting a W3C recommendation SAWSDL, with which Web service descriptions can be linked with the domain ontology.}, KEYWORDS = {Lexicon, LR web services, Standards for LRs, Lexical database}, PAGES = {916-922}, URL = {https://publications.cnr.it/doc/84725}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{LENCI_2008_INPROCEEDINGS_LMPM_84730, AUTHOR = {Lenci, A. and McGillivray, B. and Pirrelli, V. and Montemagni, S.}, TITLE = {Unsupervised Acquisition of Verb Subcategorization Frames from Shallow-Parsed Corpora}, YEAR = {2008}, KEYWORDS = {Acquisition, Machine Learning, Corpus (creation, annotation, etc.), Lexicon, Lexical database}, URL = {https://publications.cnr.it/doc/84730}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{MAGNINI_2008_INPROCEEDINGS_MCTBMLBCTBSS_92169, AUTHOR = {Magnini, B. and Cappelli, A. and Tamburini, F. and Bosco, C. and Mazzei, A. and Lombardo, V. and Bertagna, F. and Calzolari, N. and Toral, A. and Bartalesi, L. V. and Sprugnoli, R. and Speranza, M.}, TITLE = {Evaluation of natural language tools for italian: EVALITA 2007}, YEAR = {2008}, ABSTRACT = {EVALITA 2007, the first edition of the initiative devoted to the evaluation of Natural Language Processing tools for Italian, provided a shared framework where participants' systems had the possibility to be evaluated on five different tasks, namely Part of Speech Tagging (organised by the University of Bologna), Parsing (organised by the University of Torino), Word Sense Disambiguation (organised by CNR-ILC, Pisa), Temporal Expression Recognition and Normalization (organised by CELCT, Trento), and Named Entity Recognition (organised by FBK, Trento). We believe that the diffusion of shared tasks and shared evaluation practices is a crucial step towards the development of resources and tools for Natural Language Processing. Experiences of this kind, in fact, are a valuable contribution to the validation of existing models and data, allowing for consistent comparisons among approaches and among representation schemes. The good response obtained by EVALITA, both in the number of participants and in the quality of results, showed that pursuing such goals is feasible not only for English, but also for other languages.}, KEYWORDS = {Natural language evaluation, Standards for LRs, Evaluation methodologies}, PAGES = {2536-2543}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {Proceedings of LREC 2008}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {28-30 May 2008}, BOOKTITLE = {Proceeding LREC 2008}, EDITOR = {Calzolari, N. and Choukri, K. and Maegard, B.}, } @INPROCEEDINGS{MAGNINI_2008_INPROCEEDINGS_MCTBMLBCTBSS_171693, AUTHOR = {Magnini, B. and Cappelli, A. and Tamburini, F. and Bosco, C. and Mazzei, A. and Lombardo, V. and Bertagna, F. and Calzolari, N. and Toral, R. A. and Bartalesi, L. V. and Sprugnoli, R. and Speranza, M.}, TITLE = {Evaluation of natural language tools for italian: EVALITA 2007}, YEAR = {2008}, ABSTRACT = {EVALITA 2007, the first edition of the initiative devoted to the evaluation of Natural Language Processing tools for Italian, provided a shared framework where participants' systems had the possibility to be evaluated on five different tasks, namely Part of Speech Tagging (organised by the University of Bologna), Parsing (organised by the University of Torino), Word Sense Disambiguation (organised by CNR-ILC, Pisa), Temporal Expression Recognition and Normalization (organised by CELCT, Trento), and Named Entity Recognition (organised by FBK, Trento). We believe that the diffusion of shared tasks and shared evaluation practices is a crucial step towards the development of resources and tools for Natural Language Processing. Experiences of this kind, in fact, are a valuable contribution to the validation of existing models and data, allowing for consistent comparisons among approaches and among representation schemes. The good response obtained by EVALITA, both in the number of participants and in the quality of results, showed that pursuing such goals is feasible not only for English, but also for other languages.}, KEYWORDS = {Natural language evaluation, Standards for LRs, Evaluation methodologies, H. 3 INFORMATION STORAGE AND RETRIEVAL. Linguistic processing}, PAGES = {2536-2543}, URL = {https://publications.cnr.it/doc/171693}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26th May-1st June 2008}, } @INPROCEEDINGS{MARCHETTI_2008_INPROCEEDINGS_MTVARCMFHIKNRV_173483, AUTHOR = {Marchetti, A. and Tesconi, M. and Vossen, P. and Agirre, E. and Rigau, G. and Calzolari, N. and Monachini, M. and Fellbaum, C. and Hsieh, S. and Isahara, H. and Kanzaki, K. and Neri, F. and Raffaelli, R. and Vangent, J.}, TITLE = {KYOTO: A System for Mining, Structuring, and Distributing Knowledge Across Languages and Cultures}, YEAR = {2008}, ABSTRACT = {We outline work to be carried out within the framework of an impending EC project. The goal is to construct a language-independent information system for a specific domain (environment/ecology) anchored in a language-independent ontology that is linked to WordNets in several languages. For each language, information extraction and identification of lexicalized concepts with ontological entries will be done by text miners ("Kybots"). The mapping of language-specific lexemes to the ontology allows for crosslinguistic identification and translation of equivalent terms. The infrastructure developed within this project will enable long-range knowledge sharing and transfer to many languages and cultures, addressing the need for global and uniform transition of knowledge beyond the domain of ecology and environment addressed here.}, KEYWORDS = {Global WordNet Grid, Ontologies and WordNets, Multilinguality, Semantic indexing and search, Text mining}, PAGES = {474-484}, URL = {https://publications.cnr.it/doc/173483}, ISBN = {978-963-482-854-9}, CONFERENCE_NAME = {GWC2008-The Fourth Global WordNet Conference}, CONFERENCE_PLACE = {Szeged, Hungary}, CONFERENCE_DATE = {22-25 Gennaio 2008}, EDITOR = {Tanács, A. and Csendes, D. and Vincze, V. and Fellbaum, C. and Vossen, P.}, } @INPROCEEDINGS{MARINELLI_2008_INPROCEEDINGS_M_84710, AUTHOR = {Marinelli, R.}, TITLE = {Enhancing a Terminological Database with Terms from a Scientific Domain}, YEAR = {2008}, ABSTRACT = {This paper reports on the enhancing of a maritime terminological database by means of a set of terms belonging to meteorology. The main phases of this research are described and the initial results outlined: the model of the terminological database, following EWN/IWN; the criteria used to build corpora of specialized texts to be employed as the source for term selection and extraction; the use of a semantic database (IWN) as source for exporting synsets to be coded in the terminological resource and as reference (WN 3.0) for comparing and evaluating synsets. The set of semantic relations useful for codifying new terms belonging to the discipline of meteorology is described; in addition to the semantic relations provided by the IWN model, new relations are introduced which are more suitably tailored to specific needs either scientific or pragmatic.}, KEYWORDS = {Terminology, Corpus linguistics, Semantics, Lexical databases, Semantic relations}, PAGES = {165-172}, URL = {https://publications.cnr.it/doc/84710}, PUBLISHER = {Vytautas Magnus University, Institute of the Lithuanian Language (Kaunas, LTU)}, ISBN = {978-9955-704-53-9}, CONFERENCE_NAME = {The Third Baltic Conference on Human Language Technologies}, CONFERENCE_PLACE = {Kaunas, Lithuania}, CONFERENCE_DATE = {October 4-5 2007}, BOOKTITLE = {The Third Baltic Conference on Human Language Technologies}, EDITOR = {Cermak, F. and Marcinkevièienë, R. and Rimkutë, E. and Zabarskaitë, J.}, } @INPROCEEDINGS{MARINELLI_2008_INPROCEEDINGS_MB_84732, AUTHOR = {Marinelli, R. and Bindi, R.}, TITLE = {Uso metaforico e metonimico dei nomi propri: una verifica su un corpus di italiano contemporaneo}, YEAR = {2008}, KEYWORDS = {Italian Corpus, Proper Names}, URL = {https://publications.cnr.it/doc/84732}, CONFERENCE_NAME = {Prospettive nello studio del lessico italiano: atti del IX Congresso SILFI, Firenze, 14-17 giugno 2006}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{MARINELLI_2008_INPROCEEDINGS_MBMSCCC_112941, AUTHOR = {Marinelli, R. and Bindi, R. and Marchi, S. and Santarcangelo, E. L. and Cavallaro, F. I. and Castellani, E. and Carli, G.}, TITLE = {Suscettibilità ipnotica e linguaggio}, YEAR = {2008}, ABSTRACT = {--}, KEYWORDS = {Psycholinguistics}, PAGES = {10}, URL = {https://publications.cnr.it/doc/112941}, PUBLISHER = {Bulzoni (Roma, ITA)}, ISBN = {978-88-7870-652-1}, CONFERENCE_NAME = {XLII Congresso Internazionale di Studi della Società di Linguistica Italiana}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {25-27/09/2008}, } @INPROCEEDINGS{MARINELLI_2008_INPROCEEDINGS_MTB_84709, AUTHOR = {Marinelli, R. and Tiberi, M. and Bindi, R.}, TITLE = {Encoding Terms from a Scientific Domain in a Terminological Database: Methodology and Criteria}, YEAR = {2008}, KEYWORDS = {Knowledge representation, Lexicon, Corpus (creation, annotation, etc.), Lexical database}, URL = {https://publications.cnr.it/doc/84709}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{MAZZOCCHI_2008_INPROCEEDINGS_MMT_84254, AUTHOR = {Mazzocchi, F. and Marinelli, R. and Tiberi, M.}, TITLE = {Refining the thesaural associative relationship by applying the EuroWordnet semantic model}, YEAR = {2008}, ABSTRACT = {Thesauri are tools which semantically organize a domain of knowledge for operational purposes. Their relational semantics is concerned with methods that connect terms with related meanings and are designed to support information retrieval, namely enhancing the information recall performance and contributing to improve precision, too. The network of relations of a thesaurus has a semantic function: by means of it, in fact, a representation of the meaning of each thesaurus term is provided, as well as of the conceptual structure of a subject area. The traditional thesaurus format - as described in international standards - includes three basic relationships, two of them at a conceptual level, hierarchical and associative relations, the other mostly at a lexical level, relation of equivalence. However, a rather widespread opinion is that this format should be refined in order to cope with the current needs of information organization. This refinement is necessary to enhance thesaurus suitability for uses in artificial intelligence (AI) and in the Semantic Web environments, as well as to increase possibilities for IR. This paper discusses the possibility of refining the associative relation into a number of sub-kinds by adopting the semantic model of EuroWordNet (EWN), as it was used, according to one of its national versions, ItalWordNet (IWN), to structure a terminological resource for a specific domain. A number of issues that such a work of refinement could imply, and in particular its domain dependence, are also discussed.}, KEYWORDS = {thesaurus, lexical database, semantic relations}, PAGES = {61-77}, URL = {https://publications.cnr.it/doc/84254}, ISBN = {978-3-89913-644-9}, CONFERENCE_NAME = {TKE2008, Managing Ontologies and Lexical Resources: 8th International Conference on Terminology and Knowledge Engineering}, CONFERENCE_PLACE = {Copenhagen}, CONFERENCE_DATE = {19-20. VIII. 2008}, BOOKTITLE = {Proceedings of the 8th International Conference on Terminology and Knowledge Engineering (TKE 2008), "Managing Ontologies and Lexical Resources"}, EDITOR = {Madsen, B. N. and Thomsen and , H. E.}, } @INPROCEEDINGS{MONACHINI_2008_INPROCEEDINGS_MQDC_84731, AUTHOR = {Monachini, M. and Quochi, V. and Del Gratta, R. and Calzolari, N.}, TITLE = {Using LMF to Shape a Lexicon for the Biomedical Domain}, YEAR = {2008}, ABSTRACT = {This paper describes the design, implementation and population of the BioLexicon in the framework of BootStrep, an FP6 project. The BioLexicon (BL) is a lexical resource designed for text mining in the bio-domain. It has been conceived to meet both domain requirements and upcoming ISO standards for lexical representation. The data model and data categories are compliant to the ISO Lexical Markup Framework and the Data Category Registry. The BioLexicon integrates features of lexicons and terminologies: term entries (and variants) derived from existing resources are enriched with linguistic features, including sub-categorization and predicate-argument information, extracted from texts. Thus, it is an extendable resource. Furthermore, the lexical entries will be aligned to concepts in the BioOntology, the ontological resource of the project. The BL implementation is an extensible relational database with automatic population procedures. Population relies on a dedicated input data structure allowing to upload terms and their linguistic properties and "pull-and-push" them in the database. The BioLexicon teaches that the state-of-the-art is mature enough to aim at setting up a standard in this domain. Being conformant to lexical standards, the BioLexicon is interoperable and portable to other areas.}, KEYWORDS = {Domain terminologies, Computational lexicons, Lexical standards, Lexical architectures}, PAGES = {153-157}, URL = {https://publications.cnr.it/doc/84731}, CONFERENCE_NAME = {LangTech 2008-Tecnologia applicata alla linguistica}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {28-29 February 2008}, EDITOR = {Delogu, C. and Falcone, M.}, } @INPROCEEDINGS{MORGAVI_2008_INPROCEEDINGS_MM_173251, AUTHOR = {Morgavi, G. and Marconi, L.}, TITLE = {Growing Up of Autonomous Agents: an Emergent Phenomenon}, YEAR = {2008}, ABSTRACT = {A fundamental research challenge is the design of robust artifacts that are capable of operating under changing environments and noisy input, and yet exhibit the desired behavior and response time. These systems should be able to adapt and learn how to react to unforeseen scenarios as well as to display properties comparable to biological entities. The turn to nature has brought us many unforeseen great concepts. Biological systems are able to handle many of these challenges with an elegance and efficiency still far beyond current human artifacts. A living artifact grows up when its capabilities, abilities/knowledge, shift to a further level of complexity, i.e. the complexity rank of its internal capabilities performs a step forward. In the attempt to define an architecture for autonomous growing up agents [1]. We conducted an experiment on the abstraction process in children as natural parts of a cognitive system. We found that linguistic growing up involve a number of different trial processes. We identified a fixed number of distinct paths that were crossed by children. Once a given interpretation paths was discovered useless, they tried to follow another path, until the new meaning was emerging. This study generates suggestion about the evolutionary conditions conducive to the emergence of growing up in robots and provides guidelines for designing artificial evolutionary systems displaying spontaneous adaptation abilities. The importance of multi-sensor perception, motivation and emotional drives are underlined and, above all, the growing up insights shows similarities to emergent self-organized behaviors.}, KEYWORDS = {growing up, emergence, adaptive systems, living artifacts, epigenetic robotics}, PAGES = {177-186}, URL = {https://publications.cnr.it/doc/173251}, VOLUME = {1051}, DOI = {10.1063/1.3020657}, ISBN = {978-0-7354-0579-0}, CONFERENCE_NAME = {CASYS’2007 Eight International Conference on Computing Anticipatory Systems}, CONFERENCE_PLACE = {Liege, Belgium}, CONFERENCE_DATE = {5-13 Agosto 2007}, BOOKTITLE = {Computing Anticipatory Systems}, EDITOR = {Daniel, D.}, } @INPROCEEDINGS{MORGAVI_2008_INPROCEEDINGS_MMM_79660, AUTHOR = {Morgavi, G. and Marconi, L. and Morando, M.}, TITLE = {A contribution to specification toward truly autonomous robots}, YEAR = {2008}, ABSTRACT = {A great deal of current research work in robotics and autonomous systems is still focused on getting an agent to learn to do some task such as recognizing an object or going to a specific place. The learning process may be supervised, unsupervised or a process of occasional reinforcement, but the whole aim in such work is to get the robot to achieve the task that was predefined by the researcher. The next logical step along the road towards truly autonomous robots that can dive in unpredictable environments is to investigate how one might design robots that are capable of `growing up' through experience. A living artifact grows up when its capabilities, abilities/knowledge, shift to a further level of complexity, i.e. the complexity rank of its internal capabilities performs a step forward. Robotics researchers increasingly agree that ideas from nature and self-organization can strongly benefit the design of autonomous robots. In this paper we studied the modalities through which pre-school children (from 4 to 5) tackle with a growing up process: the abstraction. Children of these ages are not supposed to be able to perform the abstraction process, but they have a sufficient knowledge of the natural language that allow the description of the processes they are using when they try to reach the meaning of an abstract sentence. This experiment resulted in some very interesting suggestions on what can be useful for the architecture of an adaptive and evolving robot. The importance of multi-sensor perception, motivation and emotional drives are underlined and, above all, the growing up insights shows similarities to emergent self-organized behaviors.}, KEYWORDS = {growing up, emergence, adaptive systems, living artifacts, epigenetic robotics}, PAGES = {153-158}, URL = {http://www.eurasip.org/Proceedings/Ext/CIP2008/CIP08%20Authors.html}, CONFERENCE_NAME = {IAPR Workshop on Cognitive Information Processing}, CONFERENCE_PLACE = {Santorini, Greece}, CONFERENCE_DATE = {9-10 Giugno}, } @INPROCEEDINGS{PICCHI_2008_INPROCEEDINGS_PSCB_84719, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S. and Bertagna, F.}, TITLE = {Mining the News with Semantic Press}, YEAR = {2008}, KEYWORDS = {Text mining, Press review}, URL = {https://publications.cnr.it/doc/84719}, CONFERENCE_NAME = {LangTech 2008}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{PICCHI_2008_INPROCEEDINGS_PSCBB_84727, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S. and Bertagna, F. and Baroni, P.}, TITLE = {Semantic Press}, YEAR = {2008}, ABSTRACT = {In this paper Semantic Press, a tool for the automatic press review, is introduced. It is based on Text Mining technologies and is tailored to meet the needs of the eGovernment and eParticipation communities. First, a general description of the application demands emerging from the eParticipation and eGovernment sectors is offered. Then, an introduction to the framework of the automatic analysis and classification of newspaper content is provided, together with a description of the technologies underlying it.}, KEYWORDS = {Text Mining, Tools, Systems, Applications}, PAGES = {2752-2756}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008-Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech}, CONFERENCE_DATE = {26/05/2008-01/06/2008}, BOOKTITLE = {Proceedings of the Sixth International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Tapias, D.}, } @INPROCEEDINGS{QUOCHI_2008_INPROCEEDINGS_QC_288714, AUTHOR = {Quochi, V. and Calderone, B.}, TITLE = {Learning properties of Noun Phrases: from data to functions}, YEAR = {2008}, ABSTRACT = {The paper presents two experiments of unsupervised classification of Italian noun phrases. The goal of the experiments is to identify the most prominent contextual properties that allow for a functional classification of noun phrases. For this purpose, we used a Self Organizing Map is trained with syntactically-annotated contexts containing noun phrases. The contexts are defined by means of a set of features representing morpho-syntactic properties of both nouns and their wider contexts. Two types of experiments have been run: one based on noun types and the other based on noun tokens. The results of the type simulation show that when frequency is the most prominent classification factor, the network isolates idiomatic or fixed phrases. The results of the token simulation experiment, instead, show that, of the 3 6 attributes represented in the original input matrix, only a few of them are prominent in the re-organization of the map. In particular, key features in the emergent macro-classification are the type of determiner and the grammatical number of the noun. An additional but not less interesting result is an organization into semantic/pragmatic micro-classes. In conclusions, our result confirm the relative prominence of determiner type and grammatical number in the task of noun (phrase) categorization.}, KEYWORDS = {cognitive linguistics, noun phrase}, PAGES = {2596-2602}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/summaries/644.html}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {Sixth International Conference on Language Resources and Evaluation (LREC'08)}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {28-30 Maggio}, } @INPROCEEDINGS{QUOCHI_2008_INPROCEEDINGS_QMDC_84700, AUTHOR = {Quochi, V. and Monachini, M. and Del Gratta, R. and Calzolari, N.}, TITLE = {A lexicon for biology and bioinformatics: the BOOTStrep experience}, YEAR = {2008}, KEYWORDS = {Lexicon, Ontologies, Lexical database}, PAGES = {2285-2292}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/576_paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{ROVENTINI_2008_INPROCEEDINGS_RR_84720, AUTHOR = {Roventini, A. and Ruimy, N.}, TITLE = {Mapping Events and Abstract Entities from PAROLE-SIMPLE-CLIPS to ItalWordNet}, YEAR = {2008}, KEYWORDS = {Lexicon, Lexical database, Semantics, Ontologies}, URL = {https://publications.cnr.it/doc/84720}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{RUIMY_2008_INPROCEEDINGS_RRMU_84718, AUTHOR = {Ruimy, N. and Roventini, A. and Marinelli, R. and Ulivieri, M.}, TITLE = {Linking and Integrating two Electronic Lexicons}, YEAR = {2008}, KEYWORDS = {Electronics Lexicons, Language Resources}, URL = {https://publications.cnr.it/doc/84718}, CONFERENCE_NAME = {The First International Conference on Global Interoperability for Language Resources}, CONFERENCE_PLACE = {Hong Kong}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{RUIMY_2008_INPROCEEDINGS_RT_84721, AUTHOR = {Ruimy, N. and Toral, R. A.}, TITLE = {More semantic links in the SIMPLE-CLIPS database}, YEAR = {2008}, ABSTRACT = {Notwithstanding its acknowledged richness, the SIMPLE semantic model does not offer the representational vocabulary for encoding some conceptual links holding between events and their participants and among co-participants in events. Although critical for boosting performance in many NLP application tasks, such deep lexical information is therefore only partially encoded in the SIMPLE-CLIPS Italian semantic database. This paper reports on the enrichment of the SIMPLE relation set by some expressive means, namely semantic relations, borrowed from the EuroWordNet model and their implementation in the SIMPLE-CLIPS lexicon. The original situation existing in the database, as to the expression of this type of information is described and the loan descriptive vocabulary presented. Strategies based on the exploitation of the source lexicon data were adopted to induce new information: a wide range of semantic - but also syntactic - information was investigated for singling out word senses candidate to be linked by the new relations. The lexicon enrichment by 5,000 new relations instantiated so far has therefore been carried out as a largely automated, low-effort and cost-free process, with no heavy human intervention. The redundancy set off by such an extension of information is being addressed by the implementation of inheritance in the SIMPLE-CLIPS database (Del Gratta et al., 2008).}, KEYWORDS = {Lexicon, Knowledge representation, Semantics, Lexical database}, PAGES = {3555-3560}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26/05-01/06/2008}, EDITOR = {Calzolari, N. and Choukri, K. and Maegaard, B. and Mariani, J. and Odjik, J. and Piperidis, S. and Tapias, D.}, } @INPROCEEDINGS{SASAKI_2008_INPROCEEDINGS_SMPRMA_84703, AUTHOR = {Sasaki, Y. and Montemagni, S. and Pezik, P. and Rebholz Schuhmann, D. and McNaught, J. and Ananiadou, S.}, TITLE = {BioLexicon: A Lexical Resource for the Biology Domain}, YEAR = {2008}, KEYWORDS = {BioLexicon, Terminological verbs}, URL = {https://publications.cnr.it/doc/84703}, CONFERENCE_NAME = {Third International Symposium on Semantic Mining in Biomedicine}, CONFERENCE_PLACE = {Turku, Finland}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{TAKENOBU_2008_INPROCEEDINGS_TKHHCMSSSCY_84701, AUTHOR = {Takenobu, T. and Kaplan, D. and Huang, C. and Hsieh, S. and Calzolari, N. and Monachini, M. and Soria, C. and Shirai, K. and Sornlertlamvanich, V. and Charoenporn, T. and Yingju, X.}, TITLE = {Adapting International Standard for Asian Language Technologies}, YEAR = {2008}, ABSTRACT = {Corpus-based approaches and statistical approaches have been the main stream of natural language processing research for the past two decades. Language resources play a key role in such approaches, but there is an insufficient amount of language resources in many Asian languages. In this situation, standardisation of language resources would be of great help in developing resources in new languages. This paper presents the latest development efforts of our project which aims at creating a common standard for Asian language resources that is compatible with an international standard. In particular, the paper focuses on i) lexical specification and data categories relevant for building multilingual lexical resources for Asian languages; ii) a core upper-layer ontology needed for ensuring multilingual interoperability and iii) the evaluation platform used to test the entire architectural framework.}, KEYWORDS = {LR national/international projects, Organizational/policy issues, LR Infrastructures and Architectures, Lexicon, Lexical database}, PAGES = {1663}, URL = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/422_paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{THOMPSON_2008_INPROCEEDINGS_TCAMMTV_84704, AUTHOR = {Thompson, P. and Cotter, P. and Ananiadou, S. and McNaught, J. and Montemagni, S. and Trabucco, A. and Venturi, G.}, TITLE = {Building a Bio-Event Annotated Corpus for the Acquisition of Semantic Frames from Biomedical Corpora}, YEAR = {2008}, KEYWORDS = {Corpus (creation, annotation, etc.), Text mining, Semantics, Event Extraction}, PAGES = {2159-2166}, URL = {https://publications.cnr.it/doc/84704}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resouces and Evaluation}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {28-30 maggio 2014}, } @INPROCEEDINGS{THOMPSON_2008_INPROCEEDINGS_TVMMA_84705, AUTHOR = {Thompson, P. and Venturi, G. and McNaught, J. and Montemagni, S. and Ananiadou, S.}, TITLE = {Categorising Modality in Biomedical Texts}, YEAR = {2008}, ABSTRACT = {The accurate recognition of modal information is vital for the correct interpretation of statements. In this paper, we report on the collection a list of words and phrases that express modal information in biomedical texts, and propose a categorisation scheme according to the type of information conveyed. We have performed a small pilot study through the annotation of 202 MEDLINE abstracts according to our proposed scheme. Our initial results suggest that modality in biomedical statements can be predicted fairly reliably though the presence of particular lexical items, together with a small amount of contextual information.}, KEYWORDS = {Biomedical texts, Modality}, PAGES = {27-34}, URL = {https://publications.cnr.it/doc/84705}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation: Workshop 'Building and Evaluating Resources for Biomedical Text Mining'}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26 maggio 2008}, } @INPROCEEDINGS{TORAL_2008_INPROCEEDINGS_TMM_84722, AUTHOR = {Toral, R. A. and Muñoz, R. and Monachini, M.}, TITLE = {Named Entity WordNet}, YEAR = {2008}, ABSTRACT = {This paper presents the automatic extension of Princeton WordNet with Named Entities (NEs). This new resource is called Named Entity WordNet. Our method maps the noun is-a hierarchy of WordNet to Wikipedia categories, identifies the NEs present in the latter and extracts different information from them such as written variants, definitions, etc. This information is inserted into a NE repository. A module that converts from this generic repository to the WordNet specific format has been developed. The paper explores different aspects of our methodology such as the treatment of polysemous terms, the identification of hyponyms within the Wikipedia categorization system, the identification of Wikipedia articles which are NEs and the design of a NE repository compliant with the LMF ISO standard. So far, this procedure enriches WordNet with 310,742 NEs and 381,043 "instance of" relations.}, KEYWORDS = {Lexicon, Named Entity recognition, Ontologies, Lexical database}, PAGES = {741-747}, URL = {https://publications.cnr.it/doc/84722}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{TORAL_2008_INPROCEEDINGS_TQDMSC_84714, AUTHOR = {Toral, R. A. and Quochi, V. and Del Gratta, R. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Lexically-based Ontologies and Ontologically Based Lexicons}, YEAR = {2008}, ABSTRACT = {This paper deals with the relations between ontologies and lexicons. We study the role of these two components and their evolution during the last years in the field of Computational Linguistics. Subsequently, we survey the current lines of research at ILC-CNR which tackle this topic. They involve (I) the reuse of already existing Lexical Resources to derive formal ontologies, (II) the conversion and combination of terminologies into rich and formal Lexical Resources and (III) the use of formal ontologies as the backbone of multilingual Lexical Resources.}, KEYWORDS = {Resource Infrastructure, UIMA, Clarin}, PAGES = {49-59}, URL = {https://publications.cnr.it/doc/84714}, CONFERENCE_NAME = {AI*IA 2008-10th Congress of Italian Association for Artificial Intelligence}, CONFERENCE_PLACE = {Cagliari}, CONFERENCE_DATE = {11-13 Settembre 2008}, } @INPROCEEDINGS{VOSSEN_2008_INPROCEEDINGS_VACFHHIKMMNRRTV_84716, AUTHOR = {Vossen, P. and Agirre, E. and Calzolari, N. and Fellbaum, C. and Hsieh, S. and Huang, C. and Isahara, H. and Kanzaki, K. and Marchetti, A. and Monachini, M. and Neri, F. and Raffaelli, R. and Rigau, G. and Tesconi, M. and Vangent, J.}, TITLE = {KYOTO: A System for Mining, Structuring, and Distributing Knowledge Across Languages and Cultures}, YEAR = {2008}, ABSTRACT = {We outline work performed within the framework of a current EC project. The goal is to construct a language-independent information system for a specific domain (environment/ecology/biodiversity) anchored in a language-independent ontology that is linked to wordnets in seven languages. For each language, information extraction and identification of lexicalized concepts with ontological entries is carried out by text miners ("Kybots"). The mapping of language-specific lexemes to the ontology allows for crosslinguistic identification and translation of equivalent terms. The infrastructure developed within this project enables long-range knowledge sharing and transfer across many languages and cultures, addressing the need for global and uniform transition of knowledge beyond the specific domains addressed here.}, KEYWORDS = {Information Extraction, Information Retrieval, Digital libraries, Lexicon, Lexical database}, URL = {https://publications.cnr.it/doc/84716}, ISBN = {2-9517408-4-0}, CONFERENCE_NAME = {LREC 2008, Sixth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Marrakech, Marocco}, CONFERENCE_DATE = {26-05/1-06-2008}, } @INPROCEEDINGS{CALZOLARI_2008_INPROCEEDINGS_C_112933, AUTHOR = {Calzolari, N.}, TITLE = {New European Infrastructural and Networking Initiatives}, YEAR = {2008}, KEYWORDS = {Language Resources, Technology Infrastucture}, URL = {https://publications.cnr.it/doc/112933}, CONFERENCE_NAME = {LangTech 2008}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{CARLI_2008_INPROCEEDINGS_CMBMCCS_112938, AUTHOR = {Carli, G. and Marinelli, R. and Bindi, R. and Marchi, S. and Cavallaro, F. I. and Castellani, E. and Santarcangelo, E. L.}, TITLE = {Language modulation by hypnotizability}, YEAR = {2008}, KEYWORDS = {Psychotherapy, Neurorehabilitation}, URL = {https://publications.cnr.it/doc/112938}, CONFERENCE_NAME = {59° Congresso Nazionale della Società Italiana di Fisiologia}, CONFERENCE_PLACE = {Villasimius (CA)}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{CIGNONI_2008_INPROCEEDINGS_CPS_112937, AUTHOR = {Cignoni, L. and Pardelli, G. and Sassi, M.}, TITLE = {Grey Literature for Natural Language Processing: a Terminological and Statistical Approach}, YEAR = {2008}, ABSTRACT = {This paper presents the results of a study on grey literature (GL) in the field of Natural Language Processing (NLP). Our data has been collected in a corpus of ca 13,000 records corresponding to the titles of papers presented at International Conferences from 1950 to June 2008. A statistical representation of the most significant terms relative to GL in NLP and other interrelated disciplines associates old and new words, highlighting the terminological changes that have taken place in the course of time. Aim of our study is to contribute to the creation of language resources for the extraction of GL coming from the Web in order to help prevent the disappearance of documents containing NLP words that have undergone rapid development over the last decades. This paper is organised as follows: after a general introduction to our work, section 2 provides a historical overview of NLP; sections 3 and 4 offer an account of the most relevant terms used by specialists in different periods, and indicative of the changes that have taken place; section 5 describes the methodology we have used and also contains information on our GL database and a graphical representation of the data. Finally, the conclusions stress the need to integrate pre-existing or obsolete words and expressions, creating NLP synonym relations.}, KEYWORDS = {Computational Linguistics, Grey Literature}, PAGES = {116-120}, URL = {http://hdl.handle.net/10068/697993}, ISBN = {978-90-77484-12-8}, CONFERENCE_NAME = {Tenth International Conference on Grey Literature: Designing the Grey Grid for Information Society}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {December 8-9 2008}, EDITOR = {Farace, D. G. and Frantzen, J.}, } @INPROCEEDINGS{MARINELLI_2008_INPROCEEDINGS_M_112939, AUTHOR = {Marinelli, R.}, TITLE = {Ontological Structure and Digital Corpora for Metaphorical Sense Recognition}, YEAR = {2008}, KEYWORDS = {Ontology}, URL = {https://publications.cnr.it/doc/112939}, CONFERENCE_NAME = {XXVI AESLA Conference 'From Applied Linguistics to the Linguistics of the Mind: Issues, Practices and Trends'}, CONFERENCE_PLACE = {Almeria}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{MARINELLI_2008_INPROCEEDINGS_M_112942, AUTHOR = {Marinelli, R.}, TITLE = {Analisi di metafore e espressioni idiomatiche per mezzo di risorse computazionali e corpora elettronici}, YEAR = {2008}, KEYWORDS = {risorse computazionali, terminologia, linguaggio figurativo, corpora, ontologia}, URL = {https://publications.cnr.it/doc/112942}, CONFERENCE_NAME = {Paremiologia: classificazione, traduzione e tecnologie informatiche}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{MONTEMAGNI_2008_INPROCEEDINGS_M_112936, AUTHOR = {Montemagni, S.}, TITLE = {Exploring the correlation between phonetic and lexical variation in Tuscany}, YEAR = {2008}, KEYWORDS = {Dialectal variation, ALT-Web}, URL = {https://publications.cnr.it/doc/112936}, CONFERENCE_NAME = {Thirteenth International Conference on Methods in Dialectology}, CONFERENCE_PLACE = {Leeds}, CONFERENCE_DATE = {2008}, } @INPROCEEDINGS{PIRRELLI_2008_INPROCEEDINGS_P_288118, AUTHOR = {Pirrelli, V.}, TITLE = {Morphology Learning as Paradigm Learning: Developmental and Computational Evidence from Romance Languages}, YEAR = {2008}, ABSTRACT = {In a comprehensive comparison of the developmental stages in the acquisition of inflection in nearly two dozen languages (in the Indo-European, Ugro-Finnic and Semitic families plus Turkish), Bittner et al. (2003) arrive at the conclusion that the transition from lexical processing to morphological patterning is not the automatic outcome of rote lexical storage, but rather the result of an active construction of the child, crucially conditioned by typological factors such as richness, uniformity and transparency of inflectional paradigms. In the present talk I intend to assess this hypothesis by observing the dynamics of a purely morphological acquisition of Romance verb paradigms through a family of Artificial Neural Networks known as Self- Organizing Maps (Kohonen 2002). I shall show that the interplay between built-in principles of acquisition of time-coded sequences and morphology-specific principles of organization of inflectional paradigms can go a long way in accounting for the typological trends highlighted in Bittner et al. (2003). Reported results allow us to draw some general conclusions concerning the process of morphology acquisition as paradigm-based learning and lead to a reappraisal of the traditional one-route vs. dual-route debate in morphology processing and learning.}, URL = {http://www.mod-langs.ox.ac.uk/romance-morphology/oxmorph1.html}, CONFERENCE_NAME = {First Oxford Workshop on Romance Verb Morphology}, CONFERENCE_PLACE = {Trinity College, Oxford, UK}, CONFERENCE_DATE = {28 August 2008}, } @INPROCEEDINGS{REBHOLZSCHUHMANN_2008_INPROCEEDINGS_RPLDKSMMMCA_112935, AUTHOR = {Rebholz Schuhmann, D. and Pezik, P. and Lee, V. and Del Gratta, R. and Kim, J. and Sasaki, Y. and McNaught, J. and Montemagni, S. and Monachini, M. and Calzolari, N. and Ananiadou, S.}, TITLE = {BioLexicon: Towards a reference terminological resource in the biomedical domain}, YEAR = {2008}, ABSTRACT = {The BioLexicon is a publicly available large-scale terminological resource which brings together potential terms from several resources representing selected semantic types (genes, proteins, chemicals, species, enzymes, selected ontological terms). The schema of the BioLexicon enables improved resolution of term ambiguity and follows lexical standards for terminological resources.}, KEYWORDS = {BioLexicon}, URL = {https://publications.cnr.it/doc/112935}, ISBN = {978-1-61567-371-1}, CONFERENCE_NAME = {16th Annual International Conference on Intelligent Systems for Molecular Biology}, CONFERENCE_PLACE = {Toronto, Canada}, CONFERENCE_DATE = {19-23 Luglio 2008}, } @TECHREPORT{ALIPRANDI_2008_TECHREPORT_ANMRTSMVBAAARS_157449, AUTHOR = {Aliprandi, C. and Neri, F. and Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W. and Agirre, E. and Artola, X. and Arantza, D. and Rigau, G. and Soroa, A.}, TITLE = {Database models and data formats}, YEAR = {2008}, KEYWORDS = {XML data format, TMF, SEMAF, OWL/KIF, FACTAF}, URL = {https://publications.cnr.it/doc/157449}, } @TECHREPORT{BINDI_2008_TECHREPORT_BMGP_157447, AUTHOR = {Bindi, R. and Marinelli, R. and Goggi, S. and Picchi, E.}, TITLE = {LE-PAROLE, Italian Corpus Description, Part Available for Distribution. Updated Version}, YEAR = {2008}, KEYWORDS = {Corpus, Corpus linguistics, Databases}, URL = {https://publications.cnr.it/doc/157447}, } @TECHREPORT{CININI_2008_TECHREPORT_CS_390626, AUTHOR = {Cinini, A. and Sassi, M.}, TITLE = {Aggiornamento della Banca Dati del CSM}, YEAR = {2008}, ABSTRACT = {Aggiornamento della Banca dati delle sentenze e ordinanze della Sezione Disciplinare del Consiglio Superiore della Magistratura, realizzata in collaborazione con l'Istituto di Ricerca sui Sistemi Giudiziari (IRSIG-CNR), con i documenti relativi agli anni 2004-2007. Sperimentazione di nuove funzioni di estrazione dell'informazione tramite software di trattamento automatico del linguaggio (TAL), con particolare riferimento ad analisi diacroniche dei risultati di ricerche complesse su dati testuali.}, KEYWORDS = {Informatica giuridica documentale, Analisi Sentenze, DBT}, PAGES = {1-17}, URL = {https://publications.cnr.it/doc/390626}, } @TECHREPORT{MARCHETTI_2008_TECHREPORT_MRTSMVB_262193, AUTHOR = {Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W.}, TITLE = {XML Schema for Wordnet and Ontology: DELIVERABLE NR. 1 /WP NR. 7}, YEAR = {2008}, ABSTRACT = {This deliverable describes the XML schemata adopted to represent all the data related to the management of the multi-language wordnets and the ontology; they constitute the set of linguistic and semantic resources of KYOTO system.}, URL = {http://www2.let.vu.nl/twiki/pub/Kyoto/WP07:DatabaseSystemsAndWiki/D7.1_XML_Schema_for_Wordnet_and_Ontology_v2.0.pdf}, } @TECHREPORT{MARCHETTI_2008_TECHREPORT_MRTSMVB_157455, AUTHOR = {Marchetti, A. and Ronzano, F. and Tesconi, M. and Soria, C. and Monachini, M. and Vossen, P. and Bosma, W.}, TITLE = {XML Schema for Wordnet and Ontology}, YEAR = {2008}, ABSTRACT = {This deliverable describes the XML schema adopted to represent all the data related to the management of the multi-language wordnets and the ontology; they constitute the set of linguistic and semantic resources of KYOTO system.}, KEYWORDS = {XML Schema, Wordnet, Ontology, LMF, TMF}, URL = {https://publications.cnr.it/doc/157455}, } @TECHREPORT{MARINELLI_2008_TECHREPORT_M_157445, AUTHOR = {Marinelli, R.}, TITLE = {Descrizione dei criteri e dei metodi per la costruzione di un database di terminologia}, YEAR = {2008}, KEYWORDS = {Terminology}, URL = {https://publications.cnr.it/doc/157445}, } @TECHREPORT{MARINELLI_2008_TECHREPORT_MT_157446, AUTHOR = {Marinelli, R. and Tiberi, M.}, TITLE = {L'ampliamento del db semantico lessicale terminologico Mariterm con un insieme di termini di meteorologia}, YEAR = {2008}, KEYWORDS = {Terminology}, URL = {https://publications.cnr.it/doc/157446}, } @TECHREPORT{MONACHINI_2008_TECHREPORT_MS_157451, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Report on use of LMF for representing WordNets}, YEAR = {2008}, KEYWORDS = {WordNets}, URL = {https://publications.cnr.it/doc/157451}, } @TECHREPORT{MONACHINI_2008_TECHREPORT_MS_157452, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Report on use of TMF and LMF for representing raw terms}, YEAR = {2008}, KEYWORDS = {Terminological Data Collection, Terminological Markup Framework, Terminological Markup Language}, URL = {https://publications.cnr.it/doc/157452}, } @TECHREPORT{MONACHINI_2008_TECHREPORT_MSC_157454, AUTHOR = {Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {The Lexical Grid: Lexical Resources in Language Infrastructures}, YEAR = {2008}, ABSTRACT = {Language Resources are recognized as a central and strategic for the development of any Human Language Technology system and application product. they play a critical role as horizontal technology and have been recognized in many occasions as a priority also by national and spra-national funding a number of initiatives (such as EAGLES, ISLE, ELRA) to establish some sort of coordination of LR activities, and a number of large LR creation projects, both in the written and in the speech areas.}, KEYWORDS = {Human Language Technology, Language Resources}, URL = {https://publications.cnr.it/doc/157454}, } @TECHREPORT{MONTEMAGNI_2008_TECHREPORT_M_157448, AUTHOR = {Montemagni, S.}, TITLE = {Augmented version of the bio-lexicon extended with bio event information and term-to-term weighted links}, YEAR = {2008}, KEYWORDS = {Bio-lexicon}, URL = {https://publications.cnr.it/doc/157448}, } @TECHREPORT{PARDELLI_2008_TECHREPORT_PSOP_255578, AUTHOR = {Pardelli, G. and Sassi, M. and Orsolini, P. and Parrinelli, V.}, TITLE = {Verso la costruzione di una Biblioteca Digitale}, YEAR = {2008}, ABSTRACT = {A data base of the "Antonio Zampolli Fund" has been created and the respective catalogue has been published1. The work of analysis and selection of texts for cataloguing helped in creating this bibliography, in large part built on references extracted by books and journals. Very old bibliographical references have also been retrieved by curricula prepared by Professor Zampolli for various projects and commissions.}, KEYWORDS = {Biblioteca Digitale, Linguistica Computazionale}, PAGES = {1-43}, URL = {https://publications.cnr.it/doc/255578}, } @TECHREPORT{SORIA_2008_TECHREPORT_SM_157450, AUTHOR = {Soria, C. and Monachini, M.}, TITLE = {KYOTO-LMF WordNet Representation Format}, YEAR = {2008}, KEYWORDS = {Ontology linked to wordnets}, URL = {https://publications.cnr.it/doc/157450}, } @TECHREPORT{TOKUNAGA_2008_TECHREPORT_TCHKSYCCHKMPS_157453, AUTHOR = {Tokunaga, T. and Calzolari, N. and Huang, C. and Kiyoaki, S. and Sornlertlamvanich, V. and Yingju, X. and Charoenporn, T. and Chung, S. and Hsieh, S. and Kaplan, D. and Monachini, M. and Prévot, L. and Soria, C.}, TITLE = {Developing International Standards of Language Resources for Semantic Web Applications-Research Report of the International Joint Research Program NEDO}, YEAR = {2008}, ABSTRACT = {This report describes a three-year project aiming at an international standard for language resources that includes Asian languages. We summarise our contribution to an international standard of lexical markup framework (LMF) and introduce a prototype query expansion system using LMF-compliant lexical resources. Since ISO 24613 was in the FDIS stage and fairly stable, we built sample lexicons in Chinese, English, Italian, Japanese, and Thai based on ISO24613. At the same time, we implemented a query expansion system utilising rich linguistic resources including lexicons described in the ISO 24613 framework. We confirmed that a system was feasible which worked on the tested languages (including both Western and Eastern languages) when given lexicons are compliant with the framework.}, KEYWORDS = {International standards, Language resources, Semantic web applications}, URL = {https://publications.cnr.it/doc/157453}, } @MISC{BARONI_2008_MISC_B_349800, AUTHOR = {Baroni, P.}, TITLE = {ECP-2007-LANG-617001 FLaReNet: Project Web Site}, YEAR = {2008}, ABSTRACT = {Sito Web del progetto FLaReNet - Fostering Language Resources Network (Programma eContentplus | Accordo di Sovvenzione N° ECP-2007-LANG-617001), realizzato con Drupal, sviluppato in inglese}, KEYWORDS = {Sito web}, URL = {http://www.flarenet.eu}, } @MISC{CUCURULLO_2008_MISC_CPB_151564, AUTHOR = {Cucurullo, S. and Picchi, E. and Biffi, M.}, TITLE = {Lessico italiano radiofonico 1995-2003}, YEAR = {2008}, KEYWORDS = {Lessico radiofonico, Corpus parlato}, URL = {https://publications.cnr.it/doc/151564}, } @MISC{CUCURULLO_2008_MISC_CPSSMM_151565, AUTHOR = {Cucurullo, S. and Picchi, E. and Sassi, M. and Segre, C. and Martignoni, C. and Morini, L.}, TITLE = {Le concordanze diacroniche dell'Orlando Furioso}, YEAR = {2008}, KEYWORDS = {Furioso, Orlando, Concordanze diacroniche}, URL = {https://publications.cnr.it/doc/151565}, } @MISC{PICCHI_2008_MISC_PCS_151568, AUTHOR = {Picchi, E. and Cucurullo, S. and Sassolini, E.}, TITLE = {Semantic Press}, YEAR = {2008}, KEYWORDS = {Rassegna stampa, Estrazione di informazione}, URL = {https://publications.cnr.it/doc/151568}, } @MISC{PIRRELLI_2008_MISC_PM_151569, AUTHOR = {Pirrelli, V. and Montemagni, S.}, TITLE = {AnITA}, YEAR = {2008}, KEYWORDS = {NLP Tools}, URL = {https://publications.cnr.it/doc/151569}, } @MISC{SASSOLINI_2008_MISC_SPBP_151567, AUTHOR = {Sassolini, E. and Picchi, E. and Bellone, G. and Porquier, E.}, TITLE = {Progetto per lo studio e la realizzazione di un sistema di erogazione on line in modalità multicanale, dei servizi sanitari prioritari per cittadini e imprese}, YEAR = {2008}, KEYWORDS = {Studi statistici, Patologia}, URL = {https://publications.cnr.it/doc/151567}, } @MISC{SASSOLINI_2008_MISC_SPH_151566, AUTHOR = {Sassolini, E. and Picchi, E. and Haines, M.}, TITLE = {Gli anni della Cupola}, YEAR = {2008}, KEYWORDS = {Corpus trilingue di documenti antichi}, URL = {https://publications.cnr.it/doc/151566}, } @ARTICLE{BARONI_2007_ARTICLE_BGP_64535, AUTHOR = {Baroni, M. and Guevara, E. and Pirrelli, V.}, TITLE = {NN Compounds in Italian: Modelling Category Induction and Analogical Extension}, YEAR = {2007}, KEYWORDS = {Morphology, Compounding, Mental Lexicon, Lexical Semantics}, PAGES = {263-290}, URL = {https://publications.cnr.it/doc/64535}, VOLUME = {2}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{BERTAGNA_2007_ARTICLE_BMSCHHMT_30874, AUTHOR = {Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Huang, C. and Hsieh, S. and Marchetti, A. and Tesconi, M.}, TITLE = {Fostering Intercultural Collaboration: a Web Service Architecture for Cross-Fertilization of Distributed Wordnets}, YEAR = {2007}, ABSTRACT = {Enhancing the development of multilingual lexicons is of foremost importance for intercultural collaboration to take place, as multilingual lexicons are the cornerstone of several multilingual applications. However, the development and maintenance of large-scale, robust multilingual dictionaries is a tantalizing task. In this paper we present a tool, based on a web service architecture, enabling semi-automatic generation of bilingual lexicons through linking of distributed monolingual lexical resources. In addition to lexicon development, the architecture also allows enrichment of monolingual source lexicons through exploitation of the semantic information encoded in corresponding entries. In the paper we describe our case study applied to the Italian and Chinese wordnets, and we illustrate how the architecture can be extended to access distributed multilingual WordNets over the Internet, paving the way to exploitation in a cross-lingual framework of the wealth of information built over the last decade.}, PAGES = {146-158}, URL = {https://publications.cnr.it/doc/30874}, VOLUME = {4568}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{BERTAGNA_2007_ARTICLE_BTC_64532, AUTHOR = {Bertagna, F. and Toral, A. and Calzolari, N.}, TITLE = {Evalita 2007: The All-Words WSD Task}, YEAR = {2007}, ABSTRACT = {This report describes the Italian all-words sense disambiguation task organized for EVALITA. The objectives of the task and the type of data prepared and distributed to participants are presented. Moreover, evaluation measures and the results obtained by the participating system are introduced.}, PAGES = {50-52}, URL = {https://publications.cnr.it/doc/64532}, VOLUME = {IV(2)}, } @ARTICLE{BOZZI_2007_ARTICLE_BCF_64533, AUTHOR = {Bozzi, A. and Cignoni, L. and Fedele, G.}, TITLE = {Linguistic Tools for Navigation in a Virtual Museum}, YEAR = {2007}, ABSTRACT = {Introduction. The digital and Virtual Reality technologies introduced in recent years in the world of museums have promoted the development of innovative products able to provide users and visitors with access modes very different from the traditional ones. Basically, such products are constituted by interactive information units set up in the display rooms, or by digital devices (CDs, DVDs) simulating the path followed by the visitors; they are suitable for didactic purposes and can sell very well if supplied with multilingual audio guides available at the bookshops of the museums. However, if we consider the information available on the net and try to enter a museum of this type which either corresponds to a real one or reflects a typological set of objects actually housed in different and even geographically distant sites, we will certainly be unsatisfied for a number of reasons. Firstly, the few cases available can be looked upon as only partially virtual since the correlated information such as catalogue identification, inventory number, description, etc., has simply been converted into the new condition of digital format. It is true that an artifact that a visitor can see in a low resolution icon catalogue simplifies the information retrieval operations, but it offers no innovative approach capable of justifying the considerable resources invested to produce the digitized objects consultable on-line. Another problem we would like to face is represented by the possible interaction between digital objects and their linguistic captions which we think could highly contribute to the development of a real virtual museum, as long as the environment of navigation and interaction with the user occurs with appropriate paradigms. A number of simple suggestions on this matter will be provided below. The work described here refers to an experiment that was carried out in order to make the visit to a painting gallery, represented by iconographical objects collected in an imaginary space, at the same time virtual and independent of cultural prejudices. Our virtual museum is thus considered as a series of undefined places, e.g. Internet sites or addresses relative to image files in .jpg format, eventually stored in directories available on one or more discs. For each painting it was necessary to produce a text format description of about 400 words, therefore much longer than that of an ordinary caption, but shorter than a monographic essay. In particular, we wanted to check whether and to what extent the element represented by a text would make it possible to create a logical and conceptual association even among elements with apparently no relation (at least for users with low or medium level of culture), as well as among those with evident relations that anybody could identify easily. The system, therefore, functions regardless of the cultural background of the visitor, with the result that anybody, experts and non-experts alike, can see the associations between the iconographic works and the “linguistic” reasons taken into account by the system for their realization. Let us first anticipate that technology makes it possible nowadays to intervene automatically so that iconographic elements can be identified by digital image analysis; therefore, no linguistic description is actually necessary to associate paintings which have a number of elements in common. However, it should be pointed out that such methods can only highlight the similarities among chromatic elements, graphical patterns, well evident features in the foreground of the image, while many other aspects impossible to capture are missed, thus reducing the number of feasible associations. On the other hand, the associations carried out on linguistic grounds have sometimes shown to be excessive, owing to the considerable amount of information (in particular the profuse sequence of diversified adjectives) contained in historical and artistic works. This negative element which emerged from the experiment can however be exploited to find suitable solutions aimed at reducing the production of partially useless results.}, KEYWORDS = {Information extraction, Data mining, Self Organising Maps}, PAGES = {209-220}, URL = {https://publications.cnr.it/doc/64533}, VOLUME = {1}, PUBLISHER = {All'Insegna del giglio (Firenze, Italia)}, ISSN = {1120-6861}, JOURNAL = {Archeologia e calcolatori}, } @ARTICLE{CALDERONE_2007_ARTICLE_CHP_64536, AUTHOR = {Calderone, B. and Herreros, I. and Pirrelli, V.}, TITLE = {Learning Inflection: The Importance of Starting Big}, YEAR = {2007}, ABSTRACT = {Perchè i sistemi verbali morfologicamente più "ricchi" vengono appresi da un bambino con maggiore facilità di sistemi più "poveri", caratterizzati da maggiore suppletivismo e da un minor numero di marcatori flessionali? Studi recenti condotti nel quadro della Morfologia Naturale (Bittner et al. 2003) hanno evidenziato il ruolo centrale svolto in questo apparente paradosso dal "contrasto morfologico" e dalla relazione biunivoca tra forma e contenuto all'interno del paradigma flessionale. Il presente lavoro illustra da questo punto di vista il comportamento di un modello originale di reti neurali artificiali auto-organizzanti con architettura "a cascata" e apprendimento asincrono, addestrato su forme verbali codificate fonologicamente. Il modello addestrato è in grado di memorizzare sia configurazioni morfologiche astratte, corrispondenti alle terminazioni flessionali di forme verbali regolari e irregolari, sia forme flesse piene, in funzione della loro frequenza per tipo e per unità nel corpus di addestramento. Il comportamento del modello è valutato su due differenti corpora di addestramento, italiano e inglese, entrambi campionati dal database CHILDES. L'analisi della topologia delle informazioni memorizzate dal modello addestrato consente di trarre alcune conclusioni generali sull'interazione tra processi di acquisizione di sequenze fonotattiche e principi di acquisizione paradigmatica. Le implicazioni teoriche dei risultati vengono inoltre discusse alla luce del tradizionale dibattito tra modelli "a meccanismo singolo" e "a meccanismo doppio" di acquisizione morfologica.}, PAGES = {175-200}, URL = {https://publications.cnr.it/doc/64536}, VOLUME = {2}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{CIGNONI_2007_ARTICLE_C_64538, AUTHOR = {Cignoni, L.}, TITLE = {'The magic tree', Natale in Festa}, YEAR = {2007}, ABSTRACT = {Le feste natalizie possono essere un pretesto per svolgere in maniera divertente ma costruttiva una serie di attività costituite da recite, canti, filastrocche e realizzazioni manuali, tutte legate al tema del Natale. Il racconto che viene presentato, dal titolo, “The Magic Tree” fa da filo conduttore a tutte le altre attività, in un’atmosfera piacevole e familiare. Si inizia con l’organizzazione della recita, e la definizione del contesto in cui questa si svolgerà, poi si continua con la preparazione degli addobbi. La costruzione di piccoli oggetti da utilizzare per la drammatizzazione di una storia e l’allestimento dell’ambiente natalizio costituiscono un’attività socializzante e gratificante che aiuta il bambino a comunicare, a collaborare con i compagni, a creare esperienze comuni durante le quali la lingua inglese viene usata in maniera divertente. Il fare finta di è un’attività importante per lo sviluppo del bambino: la maestra racconta la storia e poi incoraggia i bambini a recitarla sviluppando così il loro vocabolario. Le canzoncine e le filastrocche, ascoltate più volte, permettono ai bambini di divertirsi attraverso il movimento e la mimica, ma allo stesso tempo gli offrono la possibilità di ripetere e consolidare il lessico e le strutture più semplici. La maestra abitua i bambini ad accompagnare le parole che pronunciano al gesto corrispondente, acquisendo così familiarità con i suoni della lingua inglese, favorendo la comprensione orale dei bambini, la loro partecipazione attiva, l’interesse e la motivazione a apprendere. Il metodo adottato è quello sviluppato da James Usher del Total Physical Response, secondo cui il bambino per la sua naturale capacità di apprendere è in grado di eseguire o mimare delle azioni sulla base di comandi impartiti dalla maestra. La festa del Natale offre anche la possibilità di coinvolgere i bambini di altre nazionalità: ognuno dei bambini può cantare, oppure portare, o preparare con l’aiuto degli altri qualcosa di caratteristico del proprio paese.}, KEYWORDS = {Scuola dell'infanzia, inglese, laboratorio, Natale, feste}, PAGES = {17-32}, URL = {https://publications.cnr.it/doc/64538}, VOLUME = {3}, PUBLISHER = {Giunti Gruppo Editoriale (Firenze, Italia)}, ISSN = {1590-3206}, JOURNAL = {Scuola dell'infanzia}, } @ARTICLE{DELLORLETTA_2007_ARTICLE_DFLMP_64537, AUTHOR = {Dell'Orletta, F. and Federico, M. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Maximum Entropy for Italian PoS Tagging}, YEAR = {2007}, ABSTRACT = {L'articolo illustra le prestazioni del ILC-UniPi MaxEnt PoS Tagger in Evalita 2007. The report contains a description of the ILC-UniPi MaxEnt PoS Tagger performance in Evalita 2007.}, PAGES = {10-11}, URL = {https://publications.cnr.it/doc/64537}, VOLUME = {IV(2)}, } @ARTICLE{PIRRELLI_2007_ARTICLE_P_64534, AUTHOR = {Pirrelli, V.}, TITLE = {Psycho-Computational Issues in Morphology Learning and Processing: An Overture}, YEAR = {2007}, PAGES = {131-138}, URL = {https://publications.cnr.it/doc/64534}, VOLUME = {2}, } @ARTICLE{PIRRELLI_2007_ARTICLE_P_64539, AUTHOR = {Pirrelli, V.}, TITLE = {Lingue e Linguaggio}, YEAR = {2007}, PAGES = {130-300}, URL = {https://publications.cnr.it/doc/64539}, VOLUME = {2}, } @BOOK{SABA_2007_BOOK_S_136458, AUTHOR = {Saba, A.}, TITLE = {El Léxico del Arte de la verdadera Navegación de Pedro de Siria}, YEAR = {2007}, ABSTRACT = {El Arte de la verdadera Navegación de Pedro de Siria, cuyas concordancias aquí presentamos, forma parte de un corpus de textos del siglo XVI preparado para la construcción del Léxico Náutico del Español del Siglo de Oro (LéNESO*). Este corpus, que consta de quince textos, se ha llevado a cabo con la contribución del Istituto di Linguistica Computazionale del Consiglio Nazionale delle Ricerche e del Dipartimento di Lingue Romanze dell'Università di Pisa, del Departamento de Lengua Española y Lingüística General, y de la Sección de Medios Impresos de la UNED de Madrid que está atendiendo a la publicación de los varios léxicos por separado. Con el descubrimiento de América, España adquirió gran renombre y prestigio internacional en todos los campos, especialmente en las técnicas de navegación y en las disciplinas relacionadas con el arte de marear. En este período el horizonte geográfico se amplió, se produjeron nuevas cartas y se perfeccionaron las existentes y varios fueron los alcances en los campos de la astronomía, de la geografía, de la metereología, etc. La necesidad de favorecer el comercio con el Nuevo Mundo creó la exigencia de multiplicar los viajes de exploración en el Atlántico y al mismo tiempo garantizar una mayor seguridad a los barcos y a sus tripulaciones durante la navegación, por lo cual fue necesario redactar textos de teoría y de práctica marinera que sirviesen de guía a los navegantes en los derroteros transoceánicos. La obra de Siria Arte de la verdadera Navegación se imprimió en 1602, pero su realización es más antigua, como revela al lector el mismo autor "Los muchos ruegos de algunos amigos, a los quales es justo obedecer, me han movido a que sacasse a luz este libro, que ya casi tenía olvidado". El texto abarca muchos temas, hablando de "...de la máchina del mundo, es a saber, cielos y elementos, de las mareas y señales de tempestades, del aguja de marear, del modo de hazer cartas de navegar, del uso dellas, de la declinación y rodeo que comúnmente hazen los pilotos, del modo verdadero de navegar por círculo menor, por línea recta sin declinación ni rodeo, el modo cómo se sabrá el camino, y leguas que ha navegado el piloto por qualquier rumbo, y últimamente el saber tomar el altura del polo". Este volumen, al cuidado de Antonina Saba**, contiene la concordancia lematizada, los índices de frecuencia de los lemas, los índices de los nombres propios, el diccionario inverso del Arte de la verdadera Navegación, y un CD-ROM con un sistema de consulta del texto que constituyen una novedad y un estímulo para toda investigación sobre el texto del autor valenciano.}, KEYWORDS = {analisi linguistica, lessicografia}, PAGES = {ix-lxiii}, URL = {https://publications.cnr.it/doc/136458}, VOLUME = {33082EU01A01}, PUBLISHER = {Madrid: Universidad nacional de educación a distancia (Madrid, ESP)}, ISBN = {9788436254877}, EDITOR = {Saba, A.}, } @INCOLLECTION{CALZOLARI_2007_INCOLLECTION_C_136434, AUTHOR = {Calzolari, N.}, TITLE = {A Language Resources Infrastructure: from a vision of few to a reality for many?}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/136434}, } @INCOLLECTION{CALZOLARI_2007_INCOLLECTION_C_136454, AUTHOR = {Calzolari, N.}, TITLE = {Corpus-based lexicon building: an overview across projects, problems, approaches}, YEAR = {2007}, PAGES = {112-139}, URL = {https://publications.cnr.it/doc/136454}, PUBLISHER = {routledge (London, GBR)}, ISBN = {0415338964}, BOOKTITLE = {Corpus linguistics: critical concepts in linguistics}, EDITOR = {Teubert, W. and Krishnamurthy, R.}, } @INCOLLECTION{CALZOLARI_2007_INCOLLECTION_C_136457, AUTHOR = {Calzolari, N.}, TITLE = {Towards a new generation of Language Resources in the Semantic Web Vision}, YEAR = {2007}, ABSTRACT = {In this contribution I touch on issues related to: language resources (LR) and semantics, dynamic resources automatically acquired, and how to go for a new generation of LRs compliant with the Semantic Web (SW) vision, pointing at the potentialities and the need for cross-fertilisation between the two communities of Human Language Technology (HLT) and SW/ontologies. Many of these issues are related to Yorick's work on preferences, lexicons, semantic annotation, and recently to his ideas on the relation between HLT and SW Large scale LRs are unanimously recognised as the necessary infrastructure underlying language technology (LT) (Varile and Zampolli (eds.) 1997). Discussing a few major European initiatives for building harmonised LRs, I highlight how computational lexicons and textual corpora should be considered as complementary views on the lexical space, in the perspective of modelling a new type of resource which is both a lexicon and a corpus together. A "complete" computational lexicon should incorporate and represent our "knowledge of the world". I claim that it is theoretically impossible to achieve completeness within any "static" lexicon. Moreover, choices on the syntagmatic axis are pervasive in language. A sound language infrastructure must encompass both "static" lexicons, as the traditional ones, and "dynamic" systems able to enrich the lexicon with information acquired on-line from large corpora, thus capturing the "actually realised" potentialities, the large range of variation, and the flexibility inherent in the language as it is used. These are the challenges for semantic tagging, which is at the core of the SW vision of giving meaning, in a manner understandable by machines, to the content of Web documents Broadening our perspective into the future, the need for more and more "knowledge intensive" large-size LRs for effective content processing requires a change in the paradigm, and the design of a new generation of LRs, based on open content interoperability standards. The SW notion may be helpful in determining the shape of the LRs of the future, consistent with the vision of an open distributed space of sharable knowledge available on the Web for processing The approach to realise the necessary world-wide linguistic infrastructure requires coverage not only of a range of technical aspects, but also - and maybe most critically - of a number of organisational aspects. An essential aspect for ensuring an integrated basis is to enhance the interchange and cooperation among many communities that act now separately, such as LR and LT developers, Terminology, Semantic Web and Ontology experts, content providers, linguists and so on. This is one of the challenges for the next years, for a usable and useful "language" scenario in the global network}, PAGES = {63-105}, URL = {https://publications.cnr.it/doc/136457}, VOLUME = {36}, PUBLISHER = {Springer (Dordrecht, NLD)}, ISBN = {978-1-4020-5832-5}, BOOKTITLE = {Words and Intelligence II: Essays in honour of Yorick Wilks}, EDITOR = {Ahmad, K. and Brewster, C. and Stevenson, M.}, } @INCOLLECTION{DELLORLETTA_2007_INCOLLECTION_DLMP_136459, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Corpus-based Modelling of Grammar Variation}, YEAR = {2007}, KEYWORDS = {Grammar variation, stochastic parsing, linguistic typology}, PAGES = {38-55}, URL = {https://publications.cnr.it/doc/136459}, PUBLISHER = {Angeli (Milano, ITA)}, ISBN = {9788846489449}, BOOKTITLE = {Language resources and linguistic theory}, EDITOR = {Sansò, A.}, } @EDITORIAL{GOGGI_2007_EDITORIAL_GZ_146078, AUTHOR = {Goggi, S. and Zamorani, N.}, TITLE = {Language Resources and Evaluation}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/146078}, } @INPROCEEDINGS{AGNOLONI_2007_INPROCEEDINGS_ABFSTMV_171352, AUTHOR = {Agnoloni, T. and Bacci, L. and Francesconi, E. and Spinosa, P. and Tiscornia, D. and Montemagni, S. and Venturi, G.}, TITLE = {Building an ontological support for multilingual legislative drafting}, YEAR = {2007}, PAGES = {9-18}, URL = {https://publications.cnr.it/doc/171352}, CONFERENCE_NAME = {International Conference on Legal Knowledge and Information Systems (JURIX 2007)}, CONFERENCE_PLACE = {Leiden}, CONFERENCE_DATE = {2007}, BOOKTITLE = {Legal Knowledge and information Systems}, EDITOR = {Ar, L. and Mommers, L.}, } @INPROCEEDINGS{BARONI_2007_INPROCEEDINGS_BGP_84669, AUTHOR = {Baroni, M. and Guevara, E. and Pirrelli, V.}, TITLE = {Sulla tipologia dei composti N N in italiano: principi categoriali ed evidenza distribuzionale a confronto}, YEAR = {2007}, KEYWORDS = {Morphology, Compounding, Mental Lexicon, Lexical Semantics}, URL = {https://publications.cnr.it/doc/84669}, ISBN = {978-88-7870-469-5}, CONFERENCE_NAME = {XL Congresso Internazionale di Studi della Società di Linguistica Italiana (SLI 2006)}, CONFERENCE_PLACE = {Vercelli}, CONFERENCE_DATE = {settembre 2006}, BOOKTITLE = {Linguistica e modelli tecnologici della ricerca}, EDITOR = {Ferrari, G. and Benatti, R. and Mosca, M.}, } @INPROCEEDINGS{BERTAGNA_2007_INPROCEEDINGS_BMSCRTM_172595, AUTHOR = {Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Ronzano, N. and Tesconi, M. and Marchetti, A.}, TITLE = {Cooperative Building of Semantic Resources}, YEAR = {2007}, ABSTRACT = {In this paper we present LexFlow, a framework for the automatic and cooperative enrichment, integration and exploitation of semantic resources. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a particular case of workflow instance, where lexical entries move across agents and become dynamically updated. We also give an important exploitation example of the semantic resources managed or built thanks to LexFlow, describing its integration with SemKey, a system for semantic collaborative tagging.}, KEYWORDS = {semantic resources, cooperative knowledge definition, semantic tagging}, URL = {https://publications.cnr.it/doc/172595}, ISBN = {3-540-74781-8}, CONFERENCE_NAME = {10th Congress of Italian Association for Artificial Intelligence-Cooperative construction of linguistic knowledge bases Workshop}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {10-13 September 2007}, } @INPROCEEDINGS{BERTAGNA_2007_INPROCEEDINGS_BMSMTHH_173656, AUTHOR = {Bertagna, F. and Monachini, M. and Soria, C. and Marchetti, A. and Tesconi, M. and Huang, C. and Hsich, S.}, TITLE = {Fostering Intercultural Collaboration: a Web Service Architecture for Cross-Fertilization of Distributed Wordnets}, YEAR = {2007}, ABSTRACT = {Enhancing the development of multilingual lexicons is of foremost importance for intercultural collaboration to take place, as multilingual lexicons are the cornerstone of several multilingual applications. However, the development and maintenance of large-scale, robust multilingual dictionaries is a tantalizing task. In this paper we present a tool, based on a web service architecture, enabling semi-automatic generation of bilingual lexicons through linking of distributed monolingual lexical resources. In addition to lexicon development, the architecture also allows enrichment of monolingual source lexicons through exploitation of the semantic information encoded in corresponding entries. In the paper we describe our case study applied to the Italian and Chinese wordnets, and we illustrate how the architecture can be extended to access distributed multilingual WordNets over the Internet, paving the way to exploitation in a cross-lingual framework of the wealth of information built over the last decade}, KEYWORDS = {distributed language resources, interoperable lexical resources, integration of WordNets}, PAGES = {185-198}, URL = {https://publications.cnr.it/doc/173656}, VOLUME = {4568}, DOI = {10.1007/978-3-540-74000-1_11}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-1-60558-198-9}, CONFERENCE_NAME = {IWIC 2007-The First International Workshop on Intercultural Collaboration}, CONFERENCE_PLACE = {Kyoto, Japan}, CONFERENCE_DATE = {25-26 Gennaio 2007}, } @INPROCEEDINGS{CALZOLARI_2007_INPROCEEDINGS_C_84683, AUTHOR = {Calzolari, N.}, TITLE = {Towards a New Generation of Language Resources: global trends and international convergences}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84683}, CONFERENCE_NAME = {International Conference on Natural Language Processing and Knowledge Engineering}, CONFERENCE_PLACE = {Beijing}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{CARPI_2007_INPROCEEDINGS_CS_84667, AUTHOR = {Carpi, E. and Saba, A.}, TITLE = {El Corpus del Léxico Nàutico del Siglo de Oro}, YEAR = {2007}, ABSTRACT = {Resumen Objeto de este trabajo es presentar el Corpus del léxico náutico del Español del siglo XVI (LÉNESO), llevado a cabo en el istituto de Lingüística Computacional de Pisa con la colaboración de la Universidad de Pisa y de la UNED de Madrid. Se explicarán las herramientas informáticas utilizadas para la preparación del corpus, es decir, el sistema automático para el análisis de los textos (AyDA) y el sistema de interrogación para la consulta de los datos lingüísticos (COR). Además se ilustrarán las posibilidades que el corpus proporciona al lingüista para sus investigaciones sobre la lengua a partir del análisis cuantitativo y cualitativo de los textos.}, KEYWORDS = {linguaggio scientifico e tecnico, strumenti informatici, lessicografia}, PAGES = {61-73}, URL = {http://www.edizioniets.com}, VOLUME = {[27] Collana: Memorie e Atti di Convegni}, PUBLISHER = {Edizioni ETS (Pisa, ITA)}, ISBN = {9788846712264}, CONFERENCE_NAME = {GIORNATE DI STUDIO DI LESSICOGRAFIA ROMANZA Il linguaggio scientifico e tecnico (medico, botanico, farmaceutico e nautico) fra Medioevo e Rinascimento}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {7-8 NOVEMBRE 2003}, BOOKTITLE = {Giornate di Studio di Lessicografia romanza: Il linguaggio scientifico e tecnico fra Medioevo e Rinascimento}, EDITOR = {Corradini, M. S. and Periñán, B.}, } @INPROCEEDINGS{CASELLI_2007_INPROCEEDINGS_CPRC_84668, AUTHOR = {Caselli, T. and Prodanof, I. and Ruimy, N. and Calzolari, N.}, TITLE = {Mapping SIMPLE and TimeML: improving event identification and classification using a semantic lexicon}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84668}, CONFERENCE_NAME = {GL2007: Fourth International Workshop on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Parigi}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{CASELLI_2007_INPROCEEDINGS_CQ_84670, AUTHOR = {Caselli, T. and Quochi, V.}, TITLE = {Inferring the semantics of temporal prepositions in Italian}, YEAR = {2007}, KEYWORDS = {italian, prepositions, computational linguistics}, PAGES = {38-44}, URL = {http://www.aclweb.org/anthology/W07-1606}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, CONFERENCE_NAME = {Fourth ACL-SIGSEM Workshop on Prepositions}, CONFERENCE_PLACE = {Prague, Czech Republic}, CONFERENCE_DATE = {28/07/2007}, BOOKTITLE = {Proceedings of the Fourth ACL-SIGSEM Workshop on Prepositions}, EDITOR = {Costello, F. and Kelleher, J. and Volk, M.}, } @INPROCEEDINGS{CIGNONI_2007_INPROCEEDINGS_CR_84671, AUTHOR = {Cignoni, L. and Ruffolo, P.}, TITLE = {Verso un dizionario filologico multilingue contestualizzato}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84671}, CONFERENCE_NAME = {XXV CILPR Congrès International de Linguistique et de Philologie Romanes}, CONFERENCE_PLACE = {Innsbruck}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{CUTUGNO_2007_INPROCEEDINGS_CMMM_172459, AUTHOR = {Cutugno, P. and Marconi, L. and Morgavi, G. and Morando, M.}, TITLE = {CoLFIS: sistemas de interrogación online}, YEAR = {2007}, ABSTRACT = {CoLFIS: sistemas de interrogación online. CoLFIS es una base de datos de la lengua italiana escrita de 3.798.275 palabras, formada de textos escritos de varios generes pesados oportunamente y selecionados en tres distintos sectores: diarios, periodicos y libros. El producto realizado representa el italiano leido mas bien que toda la lengua italiana escrita. Esta eleccion se justifica en cuanto se deseaba construir un corpus, y en consecuencia un lexico de frecuencia, que se acercara los mas posible al lexico mental de un hablante de media cultura y no a un diccionario de la lengua italiana. Los diarios con 1.836.119 palabras se han extraido de los tres diarios mas importantes y leidos en Italia: Il Corriere Della Sera, Repubblica, La Stampa. En cada diario se han elegidos textos de 9 diferentes subsectores: economia, cronica local, cronica mundana, cronica negra, politica exterior, politica interior, ciencia, espectaculo y deporte. El sector de los periodicos es constituido por 1.306.653 palabras elejdas entre 12 differentes subsectores: arte-ciencia-tecnica, auto-nautica, ninos-muchachos, casa-hobby, femenino, fotonovelas, informacion general, cronica mundana, radio-television, deporte, viajes-ecologia y otro. El sector de los libros es constituido por 655.503 palabras elejdas entre 13 generos literarios: arte, ninos, ficcion, gialli espionaje, hobby y viajes, narrativa clasica, narrativa moderna, rosa, ensaystica, ciencias naturales y exactas, ciencias sociales y humanas, teatro y poesia. El corpus CoLFIS ha sido sometido a una lematizacion completa y se han desarrollado paquetes software de analisis estadistico para producir los lexicos de frecuencia relativos al corpus total y a los distintos sectores para cada lema y formas relativas. . En este trabajo seran explicados los metodos de interogacion realizados para ayudar el usuario que quiere acercarse al corpus, puesto a disposicion en internet, y obtener informaciones del corpus, del corpus con lematizacion, de los lexicos de frecuencia. Ademas se ensenarà un estudio sobre los adverbios derivados, es decir los adverbios que terminan en -mente, como ejemplo de posibles investigaciones permitidas del material linguistico y del sistema de interogacion a disposicion. Se presenteran los porcentajes de las distintas tipologias adverbiales y se analizara la posicion del adverbio en la oracion buscando evaluar sus posibilidades combinatorias.}, KEYWORDS = {Corpora, Lessico, Italiano}, PAGES = {505-510}, URL = {http://www.santiago.cu/hosting/linguistica/descargar.php?d=425}, PUBLISHER = {Centro de linguística aplicada, Ministerio de ciencia, tecnología y medio ambiente (Santiago de Cuba, CUB)}, ISBN = {959-7174-08-1}, CONFERENCE_NAME = {X Simposio Internacional Comunicacion Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {22-26 gennaio 2007}, BOOKTITLE = {Actas-I X Simposio Internacional Comunicación Social}, EDITOR = {Miyares, L. R. and Alvarado, A. M. and Moreno, C. A.}, } @INPROCEEDINGS{DELLORLETTA_2007_INPROCEEDINGS_DFLMP_84696, AUTHOR = {Dell'Orletta, F. and Federico, M. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Maximum Entropy for Italian PoS Tagging}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84696}, CONFERENCE_NAME = {Evaluation of NLP Tools for Italian-EVALITA 2007}, CONFERENCE_PLACE = {Roma}, } @INPROCEEDINGS{DELLORLETTA_2007_INPROCEEDINGS_DLMMP_84687, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: una piattaforma linguistico-computazionale per l'estrazione di conoscenza da testi}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84687}, CONFERENCE_NAME = {XL Congresso Internazionale di Studi della Società di Linguistica Italiana (SLI 2006)}, CONFERENCE_PLACE = {Roma}, } @INPROCEEDINGS{FRANCOPOULO_2007_INPROCEEDINGS_FBGCMPS_84673, AUTHOR = {Francopoulo, G. and Bel, N. and George, M. and Calzolari, N. and Monachini, M. and Pet, M. and Soria, C.}, TITLE = {Lexical Markup Framework: an ISO Standard for Semantic Information in NLP Lexicons}, YEAR = {2007}, ABSTRACT = {Lexical Markup Framework (LMF) is a model that provides a common standardized framework for Natural Language Processing (NLP) lexicons. The goals of LMF are to provide a common model for the creation and use of such lexical resources to manage the exchange of data between and among these resources, and to enable the merging of a large number of individual resources to form extensive global electronic resources.}, URL = {https://publications.cnr.it/doc/84673}, ISBN = {978-3-8233-6314-9}, CONFERENCE_NAME = {GLDV2007-Lexical-Semantic and Ontological Resources of the GLDV Working Group on Lexicography at the Biennal Spring Conference}, CONFERENCE_PLACE = {Tubingen}, CONFERENCE_DATE = {13-14/04/2007}, } @INPROCEEDINGS{GIOVANNETTI_2007_INPROCEEDINGS_GMMB_84690, AUTHOR = {Giovannetti, E. and Marchi, S. and Montemagni, S. and Bartolini, R.}, TITLE = {Ontology-based Semantic Annotation of Product Catalogues}, YEAR = {2007}, ABSTRACT = {This paper describes a methodology for the semantic annotation of product catalogues. We propose a hybrid approach, combining pattern matching techniques to exploit the regular structure of product descriptions in catalogues, and Natural Language Processing techniques which are resorted to analyze natural language descriptions. It also includes the access to an application ontology, semi-automatically bootstrapped from collections of catalogues with an ontology learning tool, which is used to drive the semantic annotation process.}, KEYWORDS = {Semantic Annotation of texts, Ontology Learning, Information Extraction for e-commerce}, PAGES = {235-239}, URL = {https://publications.cnr.it/doc/84690}, CONFERENCE_NAME = {Recent Advances in Natural Language Processing (RANLP-2007)}, CONFERENCE_PLACE = {Borovets}, CONFERENCE_DATE = {27-29 settembre 2007}, BOOKTITLE = {Proceedings of the International Conference "Recent Advances in Natural Language Processing"}, } @INPROCEEDINGS{LENCI_2007_INPROCEEDINGS_LMPV_84693, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V. and Venturi, G.}, TITLE = {NLP-based ontology learning from legal texts. A case study}, YEAR = {2007}, ABSTRACT = {The paper reports on the methodology and preliminary results of a case study in automatically extracting ontological knowledge from Italian legislative texts in the environmental domain. We use a fully-implemented ontology learning system (T2K) that includes a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine language learning. Tools are dynamically integrated to provide an incremental representation of the content of vast repositories of unstructured documents. Evaluated results, however preliminary, are very encouraging, showing the great potential of NLP-powered incremental systems like T2K for accurate large-scale semi-automatic extraction of legal ontologies.}, PAGES = {113-129}, URL = {https://publications.cnr.it/doc/84693}, CONFERENCE_NAME = {II Workshop on Legal Ontologies and Artificial Intelligence Techniques (LOAIT'07)}, CONFERENCE_PLACE = {Stanford}, CONFERENCE_DATE = {4 giugno 2007}, } @INPROCEEDINGS{MARINELLI_2007_INPROCEEDINGS_MB_84674, AUTHOR = {Marinelli, R. and Bindi, R.}, TITLE = {Creativity and Fixation Processes in Proper Names Sense Extensions}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84674}, CONFERENCE_NAME = {XXIX International Conference of Functional Linguistics}, CONFERENCE_PLACE = {Helsinki}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{MARINELLI_2007_INPROCEEDINGS_MS_84675, AUTHOR = {Marinelli, R. and Spadoni, G.}, TITLE = {Modeling a Maritime Domain Ontology}, YEAR = {2007}, ABSTRACT = {The users' demand has determined the need to manage the growing new technical maritime terminology which includes very different domains such as the juridical or commercial ones. A terminological database was built by exploiting the computational tools of ItalWordNet (IWN) and its lexical-semantic model EuroWordNet. This paper concerns the development of database structure and data coding, relevance of the concepts of 'term' and 'domain', information potential of the terms, complexity of this domain and detailed ontology structuring recently undertaken and still in progress. Our domain structure is described defining a core set of terms representing the two main sub-domains specified in 'technical-nautical' and 'maritime transport' terminology. These terms are sufficiently general to be the root nodes of the core ontology we are developing. They are mostly domain-dependent, but the link with the Top Ontology of IWN remains, endorsing either general and 'foundation' information, or detailed description directly connected with the specific domain. This structure seems to be the most appropriate to characterize the main conceptual schemas that people of the technical-nautical or maritime transport "world" actually use, namely activity plans, navigation management, etc. Also a set of acronyms has been codified to represent their ever increasing use in maritime terminology. Through the semantic relations linking the synsets, every term 'inherits' the IWN Top Ontology definitions and becomes itself an integral part of the structure. While codifying a term in the maritime database, the reference is at the same time allowed to the Base Concepts of the terminological ontology embedding the term in the semantic network, showing that upper and core ontologies make it possible for the framework to integrate different views on the same domain in a meaningful way.}, KEYWORDS = {terminology, lexical databases, ontology, computational resources}, PAGES = {511-515}, URL = {http://www.santiago.cu/hosting/linguistica/detalles.php?id=en\&d=316}, VOLUME = {1}, PUBLISHER = {Centre for applied linguistics (Santiago de Cuba, CUB)}, ISBN = {959-7174-08-1}, CONFERENCE_NAME = {Tenth International Symposium on Social Communication. Santiago de Cuba January 22-26, 2007}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {2007}, BOOKTITLE = {Proceedings of the Tenth International Symposium on Social Communication. Santiago de Cuba, 2007}, EDITOR = {Myarez, L. and Myarez, E. B.}, } @INPROCEEDINGS{MONACHINI_2007_INPROCEEDINGS_MQRC_84676, AUTHOR = {Monachini, M. and Quochi, V. and Ruimy, N. and Calzolari, N.}, TITLE = {Lexical Relations and Domain Knowledge: The BioLexicon Meets the Qualia Structure}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84676}, CONFERENCE_NAME = {GL2007: Fourth International Conference on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Parigi}, CONFERENCE_DATE = {10-11 Maggio 2007}, EDITOR = {Bouillon, P. and Danlos, L. and Kanzaki, K.}, } @INPROCEEDINGS{MONTEMAGNI_2007_INPROCEEDINGS_M_84692, AUTHOR = {Montemagni, S.}, TITLE = {Patterns of phonetic variation in Tuscany: using dialectometric techniques on multi-level representations of dialectal data}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84692}, CONFERENCE_NAME = {International Workshop on Computational Phonology}, CONFERENCE_PLACE = {Borovets}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{MONTEMAGNI_2007_INPROCEEDINGS_M_84694, AUTHOR = {Montemagni, S.}, TITLE = {Aree fonetiche e lessicali toscane a confronto: prime elaborazioni computazionale dei dati dell’Atlante Lessicale Toscano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84694}, CONFERENCE_NAME = {XL Congresso Internazionale di Studi della Società di Linguistica Italiana}, CONFERENCE_PLACE = {Vercelli}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{MONTEMAGNI_2007_INPROCEEDINGS_M_84695, AUTHOR = {Montemagni, S.}, TITLE = {Acquisizione automatica di termini da testi: primi esperimenti di estrazione e strutturazione di terminologia metalinguistica}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84695}, CONFERENCE_NAME = {DLM su Lessicologia e metalinguaggio}, CONFERENCE_PLACE = {Macerata}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{MORGAVI_2007_INPROCEEDINGS_MMMC_168916, AUTHOR = {Morgavi, G. and Morando, M. and Marconi, L. and Cutugno, P.}, TITLE = {Instruments for evaluating communication processes}, YEAR = {2007}, ABSTRACT = {When humans want to use language to communicate orally with each other, they are faced with a sort of coordination problem: no one monopolizes the floor but the participants take turns to speak. This important concept in linguistic interaction is called "turn-taking". Recent studies showed that turn taking depends on whether speakers have a specific task and role. Often the turn taking is guided by a set of rules that speakers in a conversation adhere to. In the Psychological interviews, i.e., speakers have a non-symmetric role in the conversation; one speaker is supposed to provide information about a certain task, while the other speaker should carefully listen to the interviewee, giving a set of accepting feedbacks. Usually, we evaluate this whole communication process focusing our attention on semantic meanings of pronounced words, but actually this analysis cannot be automatically performed. In this paper we propose the extraction of some information on the evolution of the interview process through simple turn taking quantitative measurements. Over 1000 research interviews made from students during their psychology university course have been analyzed. Each whole interview process has been considered as a complex system evolving in the time. Our approach founds on analogies between interviews and mathematical chaotic processes. The proposed procedure allows the extraction of information on the conversation evolution: phase portraits with anomalous paths indicate situations where the communication has been troubled from external references. Some parameters showing very good indication on the process evolution are proposed.}, KEYWORDS = {turn taking, chaotic modeling, linguistic interaction}, PAGES = {485-489}, URL = {http://www.santiago.cu/hosting/linguistica/descargar.php?d=415}, PUBLISHER = {Centro de linguística aplicada, Ministerio de ciencia, tecnología y medio ambiente (Santiago de Cuba, CUB)}, ISBN = {959-7174-08-1}, CONFERENCE_NAME = {X Simposio Internacional Comunicacion Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {22-26 Gennaio}, BOOKTITLE = {ACTAS-I X Simposio Internacional Comunicacion Social}, EDITOR = {Miyares, L. R. and Alvarado, A. M. and Moreno, C. A.}, } @INPROCEEDINGS{PARDELLI_2007_INPROCEEDINGS_PSG_84678, AUTHOR = {Pardelli, G. and Sassi, M. and Goggi, S.}, TITLE = {A survey on Human Language Technology Terminology}, YEAR = {2007}, ABSTRACT = {This article originates from the revision of a 1969 unpublished article by Professor Antonio Zampolli carried out by Gabriella Pardelli and Manuela Sassi, two of his collaborators at the Institute of Computational Linguistics in Pisa. It is a technical report titled "Due Conversazioni sul Panorama Attuale della Linguistica Computazionale", drawn up by Zampolli on the occasion of two lectures at the Istituto di Matematica Ulisse Dini of Florence in June 1969. A synthesis of the introductory part - mainly based on some classifications for the various areas of Computational Linguistics - is reported here because the most interesting from the point of view of the relationship between automatic processing of linguistic data and other sciences. The rich bibliographic part has been extracted as well from the report and used for a terminological statistical analysis. Some sections, for example those on the International Conference on Computational Linguistics of 1969 and on the "Sezione Linguistica" of CNUCE in Pisa, have not - or only partly - been taken into account because already published by Zampolli in other books and journals (and not because considered less important). The whole revised technical report will soon be published in the "Quaderni di Linguistica Computazionale" edited by the Istituto di Linguistica Computazionale. The paper is divided in three parts: the first section is a terminological overview on the use of terms such like Computational Linguistics, Applied Linguistics and Mathematical Linguistics; the second has a statistical approach and shows the graphical representation of terms extracted from bibliographies and used in the 1960s; lastly, the conclusions. This contribution is a "historical" document which places itself at the beginning of a field which afterwards knew an exceptional development and it highlights both the continuity and the change which brought to the present Human Language Technology.}, KEYWORDS = {Human Language Technology, Terminology}, PAGES = {364-368}, URL = {https://publications.cnr.it/doc/84678}, PUBLISHER = {Wydawnictwo Poznanskie Sp. z o. o (Poznan, POL)}, ISBN = {978-90-77484-17-3}, CONFERENCE_NAME = {3rd Language \& Technology Conference}, CONFERENCE_PLACE = {Poznan}, CONFERENCE_DATE = {october 5-7, 2007}, BOOKTITLE = {Human Language Technologies as a Challenge for Computer Science and Linguistics}, EDITOR = {Vetulani, Z.}, } @INPROCEEDINGS{PIRRELLI_2007_INPROCEEDINGS_P_84688, AUTHOR = {Pirrelli, V.}, TITLE = {On the cognitive autonomy of morphological processing}, YEAR = {2007}, ABSTRACT = {Does morphological knowledge define an autonomous domain of grammar or is it rather the by-product of syntax-based principles and representations? We address this question by tapping a large body of cognitive language evidence, focusing on what is known about the way speakers learn, structure, access and use their mental morphological lexicon to parse and produce words. In line with the assumption that empirical evidence of concrete language usage can shed light on issues of domain-specificity in grammar, we conclude that it is difficult to reconcile usage-based language facts with the view that morphology is the syntax of morphemes. However, it would be equally misleading and logically unnecessary to characterise the functional autonomy of morphology from syntax in terms of processing modularity.}, KEYWORDS = {Theoretical Morphology, Mental Lexicon, Language Learning, Self-Organizing Maps}, PAGES = {245-269}, URL = {https://publications.cnr.it/doc/84688}, VOLUME = {37}, PUBLISHER = {LINCOM academic publishers (LINCOM GmbH) (München, DEU)}, ISBN = {9783895865046}, CONFERENCE_NAME = {Actes du colloque international de Morphologie 4èmes Décembrettes}, CONFERENCE_PLACE = {Toulouse}, CONFERENCE_DATE = {4-5 Dicembre 2005}, BOOKTITLE = {Morphologie à Toulouse}, EDITOR = {Hathout, N. and Montermini, F.}, } @INPROCEEDINGS{PIRRELLI_2007_INPROCEEDINGS_PH_84689, AUTHOR = {Pirrelli, V. and Herreros, I.}, TITLE = {Learning Inflection by Itself}, YEAR = {2007}, ABSTRACT = {The paper reports on a few experimental results of a computer simulation of learning the verb morphology of Italian, English and Arabic with the same type of neural architecture based on Kohonen's self-organizing maps. Issues of the mental organization of the resulting morphological lexica are explored in some detail and discussed in the light of the differential distribution of regular and irregular inflections in the three languages. It is shown that typologically diverse, non trivial aspects of the underlying paradigmatic structure of the three verb systems effectively emerge through sheer exposure to realistic distributions of verb forms devoid of morpho-syntactic content. We argue that these results go a long way towards explaining how global organization effects in the mental morphological lexicon may eventually result from local word processing steps.}, KEYWORDS = {Theoretical Morphology, Mental Lexicon, Language Learning, Self-Organizing Maps}, PAGES = {269-290}, URL = {http://mmm.lingue.unibo.it/}, PUBLISHER = {Università degli Studi di Bologna (Bologna, Italia)}, ISSN = {1826-7491}, CONFERENCE_NAME = {V Mediterranean Morphology Meeting}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {2005}, BOOKTITLE = {Proceedings of the Fifth Mediterranean Morphology Meeting}, EDITOR = {Booij, G. and Ducceschi, L. and Fradin, B. and Guevara, E. and Ralli, A. and Scalise, S.}, } @INPROCEEDINGS{QUOCHI_2007_INPROCEEDINGS_QDSMC_84735, AUTHOR = {Quochi, V. and Del Gratta, R. and Sassolini, E. and Monachini, M. and Calzolari, N.}, TITLE = {Toward a Standard Lexical Resource in the Bio Domain}, YEAR = {2007}, ABSTRACT = {The present paper describes a large-scale lexical resource for the biology domain designed both for human and for machine use. This lexicon aims at semantic interoperability and extendability, through the adoption of ISO-LMF standard for lexical representation and through a granular and distributed encoding of relevant information. The first part of this contribution focuses on three aspects of the model that are of particular interest to the biology community: the treatment of term variants, the representation on bio events and the alignment with a domain ontology. The second part of the paper describes the physical implementation of the model: a relational database equipped with a set of automatic uploading procedures. Peculiarity of the BioLexicon is that it combines features of both terminologies and lexicons. A set verbs relevant for the domain is also represented with full details on their syntactic and semantic argument structure.}, KEYWORDS = {Lexical representation model, Lexical Database, Computational Lexicography, Special Domains, Standards}, PAGES = {295-299}, URL = {https://publications.cnr.it/doc/84735}, PUBLISHER = {Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)}, ISBN = {978-83-7177-413-3}, CONFERENCE_NAME = {LTC07-3rd Language and Technology Conference: Human Language Technology. Challenges of the Information Society}, CONFERENCE_PLACE = {Poznan, Poland}, CONFERENCE_DATE = {5-7 Ottobre 2007}, } @INPROCEEDINGS{ROVENTINI_2007_INPROCEEDINGS_RRMUM_84680, AUTHOR = {Roventini, A. and Ruimy, N. and Marinelli, R. and Ulivieri, M. and Mammini, M.}, TITLE = {Mapping Concrete Entities from PAROLE-SIMPLE-CLIPS to ItalWordNet: Methodology and Results}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84680}, CONFERENCE_NAME = {45th Annual Meeting of the Association for Computational Linguistics}, CONFERENCE_PLACE = {Praga}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{RUIMY_2007_INPROCEEDINGS_R_84681, AUTHOR = {Ruimy, N.}, TITLE = {Enhancing SIMPLE Semantic Relations: A Proposal}, YEAR = {2007}, ABSTRACT = {Semantic relations play a prominent role and have considerable expressive power in the SIMPLE model. Yet, some conceptual links are still too vaguely expressed or not even captured through lack of appropriate representational vocabulary. In this paper, the relations that were added to the Extended Qualia Structure in the framework of the CLIPS project are first illustrated. Then, a proposal is made to further enrich the relation network by borrowing from the EuroWordNet model some conceptual links holding between events and their participants and among co-participants in events, with a view to enhancing the understanding of the relationships among word senses in a sentence.}, URL = {https://publications.cnr.it/doc/84681}, PUBLISHER = {Wydawnictwo Poznanskie Sp. z o. o (Poznan, POL)}, ISBN = {978-83-7177-413-3}, CONFERENCE_NAME = {3rd Language \& Technology Conference}, CONFERENCE_PLACE = {Poznan}, CONFERENCE_DATE = {05-07/10/2007}, BOOKTITLE = {Proceedings of 3rd Language \& Technology Conference}, EDITOR = {Vetulani}, } @INPROCEEDINGS{SORIA_2007_INPROCEEDINGS_SBLMP_84682, AUTHOR = {Soria, C. and Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Automatic Extraction of Semantics in Law Documents}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/84682}, CONFERENCE_NAME = {V Legislative XML Workshop}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {2007}, } @INPROCEEDINGS{TORAL_2007_INPROCEEDINGS_TM_84684, AUTHOR = {Toral, A. and Monachini, M.}, TITLE = {Formalising and bottom-up enriching the ontology of a Generative Lexicon}, YEAR = {2007}, ABSTRACT = {This paper presents on-going research to formalise the ontology of a computational lexicon in OWL (W3C standard) as well as to enrich it by applying a bottom-up approach that extracts semantic information from the lexicon. The resource used follows the Generative Lexicon (GL) theory and therefore (1) puts a challenge to ontology design as its semantic types are multidimensional and (2) enables the acquisition of further knowledge on concepts from semantic units. The formalisation allows the ontology to be processed by Description Logics reasoners as well as to be employed in Semantic Web applications. Moreover, the lexicon-driven enrichment increases the semantic information present in the ontology making it appropriate for ontology-driven Natural Language Processing. Finally, the paper studies the application of these procedures to a subsequent GL-based biological resource.}, KEYWORDS = {Ontologies, Generative Lexicon, Qualia Structure, Semantic Web}, PAGES = {599-603}, URL = {https://publications.cnr.it/doc/84684}, PUBLISHER = {INCOMA Ltd (Shoumen, BGR)}, ISBN = {978-954-91743-7-3}, CONFERENCE_NAME = {RANLP-2007-International Conference on Recent Advances in Natural Language Processing}, CONFERENCE_PLACE = {Borovets, Bulgaria}, CONFERENCE_DATE = {27-29 September 2007}, EDITOR = {Angelova, G. and Bontcheva, K. and Mitkov, R. and Nicolov, N. and Nicolov, N.}, } @INPROCEEDINGS{TORAL_2007_INPROCEEDINGS_TM_84685, AUTHOR = {Toral, A. and Monachini, M.}, TITLE = {SIMPLE-OWL: a Generative Lexicon Ontology for NLP and the Semantic Web}, YEAR = {2007}, ABSTRACT = {This research deals with the modelling of a Generative Lexicon based ontology to be used in the Semantic Web and Natural Language Processing semantic tasks. This ontology is imported from a existing computational Lexical Resource and is converted to the W3C standard Web Ontology Language. This presents some challenges, as for example the multidimensionality of the original ontology, which are covered in the current paper. The result of this research is an OWL compliant semantically rich and linguistically-based ontology, thus useful to the automatic processing of text within the Semantic Web paradigm.}, KEYWORDS = {Owl, Ontologies, Generative Lexicon, Semantic Web}, URL = {https://publications.cnr.it/doc/84685}, ISBN = {3-540-74781-8}, CONFERENCE_NAME = {10th Congress of Italian Association for Artificial Intelligence-Senso Comune Workshop}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {10-13 Settembre 2007}, } @INPROCEEDINGS{TORAL_2007_INPROCEEDINGS_TMM_84686, AUTHOR = {Toral, A. and Monachini, M. and Muñoz, R.}, TITLE = {Automatically converting and enriching a computational lexicon Ontology for NLP semantic tasks}, YEAR = {2007}, ABSTRACT = {This paper describes the automatic transformation of a Generative Lexicon (GL) based Ontology into OWL, the Semantic Web ontology language. Furthermore, the OWL ontology is automatically enriched by means of a bottom-up procedure that extracts additional semantic information (relationships, features, predicates and quantifier restrictions) from the lexicon. The contribution of this research is two-fold. On one hand, we introduce a methodology for the formalisation of GL ontologies. On the other, we have developed automatic procedures that bring out a formalised, reasoning-capable, and semantically rich ontology, thus suitable for Natural Language Processing semantic tasks.}, PAGES = {216-220}, URL = {https://publications.cnr.it/doc/84686}, PUBLISHER = {Fundacja Uniwersytetu im A. Mickiewicza (Poznan, POL)}, ISBN = {978-83-7177-413-3}, CONFERENCE_NAME = {LTC07-3rd Language \& Technology Conference: Human Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan}, CONFERENCE_DATE = {5-7 Ottobre 2007}, } @INPROCEEDINGS{CALDERONE_2007_INPROCEEDINGS_CQ_287126, AUTHOR = {Calderone, B. and Quochi, V.}, TITLE = {Emergent Cognitive Functions of the Noun Phrase}, YEAR = {2007}, KEYWORDS = {noun phrase, emergence of language}, URL = {https://publications.cnr.it/doc/287126}, CONFERENCE_NAME = {SLE 2007 Annual Meeting}, CONFERENCE_PLACE = {Joensuu, Finlandia}, CONFERENCE_DATE = {28/08/2007-01/09/2007}, BOOKTITLE = {SLE 2007 Annual Meeting Book of Abstracts}, } @INPROCEEDINGS{SASSI_2007_INPROCEEDINGS_SC_112932, AUTHOR = {Sassi, M. and Cinini, A.}, TITLE = {Il monitoraggio dell'amministrazione della giustizia}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/112932}, CONFERENCE_NAME = {Tecnologia dell'informazione e della comunicazione per la giustizia}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2007}, } @TECHREPORT{AITMOKHTAR_2007_TECHREPORT_ABBDGGMSS_157418, AUTHOR = {Ait Mokhtar, S. and Barker, E. and Brunelli, R. and Demetriou, G. and Gaizauskas, R. and Giovannetti, E. and Montemagni, S. and Sándor, A. and Sun, H.}, TITLE = {Semantic Annotation Services for Virtual Information and Knowledge Environments}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157418}, } @TECHREPORT{BOUQUET_2007_TECHREPORT_BSMGSNSBCJ_157419, AUTHOR = {Bouquet, P. and Stoermer, H. and Montemagni, S. and Giovannetti, E. and Semeraro, G. and Niederee, C. and Stecher, R. and Brunelli, R. and Chanod, J. P. and Jacquin, T.}, TITLE = {Semantic Representation and Management Report}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157419}, } @TECHREPORT{CALZOLARI_2007_TECHREPORT_CMQSGB_157444, AUTHOR = {Calzolari, N. and Monachini, M. and Quochi, V. and Soria, C. and Goggi, S. and Baroni, P.}, TITLE = {FLaReNet: Fostering Language Resources Network. Grant Agreement n° 617001, eContentPlus}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157444}, } @TECHREPORT{CININI_2007_TECHREPORT_CS_157430, AUTHOR = {Cinini, A. and Sassi, M.}, TITLE = {Archivio del Digesto Latino-Italiano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157430}, } @TECHREPORT{CININI_2007_TECHREPORT_CS_157431, AUTHOR = {Cinini, A. and Sassi, M.}, TITLE = {L'Informazione sanitaria. Analisi di tre quotidiani a tiratura nazionale}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157431}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DBCEMQS_157442, AUTHOR = {Del Gratta, R. and Bartolini, R. and Caselli, T. and Enea, A. and Monachini, M. and Quochi, V. and Sassolini, V.}, TITLE = {TimeML: An Ontological Mapping onto the UIMA Type Systems}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157442}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DMQSC_157425, AUTHOR = {Del Gratta, R. and Monachini, M. and Quochi, V. and Sassolini, E. and Calzolari, N.}, TITLE = {Bio-Lexicon DataBase: Architecture, Concepts and Loading Software}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157425}, } @TECHREPORT{DELGRATTA_2007_TECHREPORT_DTQM_157441, AUTHOR = {Del Gratta, R. and Toral, A. and Quochi, V. and Monachini, M.}, TITLE = {LocalBioLex: A database framework for biolinguistic research on integrated databases}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157441}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157412, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Segmentazione di un Testo Italiano in Token}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157412}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157413, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Language Recognition Tool, Specifiche di Implementazione}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157413}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157414, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Analisi Morfosintattica per l'Italiano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157414}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157415, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Specifiche di Chunking per l'Italiano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157415}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157416, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Specifiche di Named Entity Recognition per l'Italiano}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157416}, } @TECHREPORT{DELLORLETTA_2007_TECHREPORT_DFGLMTP_157417, AUTHOR = {Dell'Orletta, F. and Federico, M. and Giovannetti, E. and Lenci, A. and Marchi, S. and Trabucco, A. and Pirrelli, V.}, TITLE = {Segmentazione di un Testo Inglese in Token}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157417}, } @TECHREPORT{ENEA_2007_TECHREPORT_E_157411, AUTHOR = {Enea, A.}, TITLE = {Servizi di rete per il congresso LREC 2006}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157411}, } @TECHREPORT{FRANCOPOULO_2007_TECHREPORT_FMC_157433, AUTHOR = {Francopoulo, G. and Monachini, M. and Calzolari, N.}, TITLE = {Lexical Markup Framework: an ISO Standard for Semantic Information in NLP Lexicons}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157433}, } @TECHREPORT{FRANCOPOULO_2007_TECHREPORT_FMC_157435, AUTHOR = {Francopoulo, G. and Monachini, M. and Calzolari, N.}, TITLE = {Lexical Standards for ISO ballot}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157435}, } @TECHREPORT{KRAUWER_2007_TECHREPORT_KWC_157443, AUTHOR = {Krauwer, S. and Wittenburg, P. and Calzolari, N.}, TITLE = {CLARIN-Common Language Resources Infrastructure, Grant Agreement n° 212230 7thFP Common Research Infrastrucures}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157443}, } @TECHREPORT{MONACHINI_2007_TECHREPORT_M_157434, AUTHOR = {Monachini, M.}, TITLE = {Test-suites of ISO conformant lexical entries}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157434}, } @TECHREPORT{MONTEMAGNI_2007_TECHREPORT_MMVBBRPT_157440, AUTHOR = {Montemagni, S. and Marchi, S. and Venturi, G. and Bartolini, R. and Bertagna, F. and Ruffolo, P. and Peters, W. and Tiscornia, D.}, TITLE = {Report on Ontology learning tool and testing}, YEAR = {2007}, ABSTRACT = {This deliverable documents the work done within the DALOS EU project for what concerns the definition and implementation of methodologies and techniques to bootstrap terminological and ontological knowledge from domain corpora. Starting from a corpus of legacy legislative texts in different languages, linguistic technologies combined with statistical techniques have been used to extract significant terms as well as to structure them in conceptual structures for the different languages dealt with within the project, namely Italian, English, Spanish and Dutch.}, KEYWORDS = {Ontology Learning, Term Extraction, Natural Language Processing, Conceptual Indexing}, URL = {https://publications.cnr.it/doc/157440}, } @TECHREPORT{MONTEMAGNI_2007_TECHREPORT_MS_157420, AUTHOR = {Montemagni, S. and Simi, M.}, TITLE = {The Italian dependency annotated corpus developed for the CoNLL-2007 Shared Task}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157420}, } @TECHREPORT{MONTEMAGNI_2007_TECHREPORT_MTV_157421, AUTHOR = {Montemagni, S. and Trabucco, A. and Venturi, G.}, TITLE = {Bio-Event Linguistic Annotation Tool. User Manual}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157421}, } @TECHREPORT{MONTEMAGNI_2007_TECHREPORT_MTVTCAMKRP_157422, AUTHOR = {Montemagni, S. and Trabucco, A. and Venturi, G. and Thompson, P. and Cotter, P. and Ananiadou, S. and McNaught, J. and Kim, J. and Rebholz, D. and Pezik, P.}, TITLE = {Event annotation of domain corpora}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157422}, } @TECHREPORT{PICCHI_2007_TECHREPORT_P_157424, AUTHOR = {Picchi, E.}, TITLE = {Analisi di corpora di documenti: strumenti e risorse linguistiche verso un'integrazione con ontologie}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157424}, } @TECHREPORT{PICCHI_2007_TECHREPORT_PSC_157426, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S.}, TITLE = {Implementazione di procedure per la Named Entity Recognition}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157426}, } @TECHREPORT{PICCHI_2007_TECHREPORT_PSC_157427, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S.}, TITLE = {Implementazione di procedure di Clustering}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157427}, } @TECHREPORT{PICCHI_2007_TECHREPORT_PSC_157428, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S.}, TITLE = {Implementazione di procedure di Spidering per sistemi di Alerting}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157428}, } @TECHREPORT{ROMARY_2007_TECHREPORT_RFMDBWFG_157438, AUTHOR = {Romary, L. and Francopoulo, G. and Monachini, M. and Declerck, T. and Bunt, H. and Wittenburg, P. and Funk, A. and Gillam, L.}, TITLE = {LIRICS-Final Public Report}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157438}, } @TECHREPORT{SABA_2007_TECHREPORT_S_157439, AUTHOR = {Saba, A.}, TITLE = {Arte della verdadera Navegación de Pedro de Siria}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157439}, } @TECHREPORT{SASAKI_2007_TECHREPORT_SMAPMMP_157423, AUTHOR = {Sasaki, Y. and McNaught, J. and Ananiadou, S. and Pezik, P. and McGillivray, B. and Montemagni, S. and Pirrelli, V.}, TITLE = {Augmented Version of Bio-Lexicon}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157423}, } @TECHREPORT{SASSI_2007_TECHREPORT_SC_157432, AUTHOR = {Sassi, M. and Cinini, A.}, TITLE = {Content analysis dei provvedimenti della sezione disciplinare del C. S. M}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157432}, } @TECHREPORT{SORIA_2007_TECHREPORT_ST_157437, AUTHOR = {Soria, C. and Thorleifsdottir, A.}, TITLE = {eParticipation: the potential of new and emerging technologies}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157437}, } @MISC{PICCHI_2007_MISC_PMCSP_157436, AUTHOR = {Picchi, E. and Montemagni, S. and Cucurullo, S. and Sassolini, E. and Paoli, M.}, TITLE = {ALT-Web. Sito dell’Atlante Lessicale Toscano (ALT) in rete}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157436}, } @MISC{PICCHI_2007_MISC_PSC_157429, AUTHOR = {Picchi, E. and Sassolini, E. and Cucurullo, S.}, TITLE = {Legislazione Toscana raccolta e illustrata da Lorenzo Cantini}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157429}, } @MISC{SABA_2007_MISC_S_157410, AUTHOR = {Saba, A.}, TITLE = {La Instrución de Mercaderes di Saravia de la Calle e la Institutione de’ mercanti di Alfonso de Ulloa. Trattamento dati}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157410}, } @MISC{SASSI_2007_MISC_S_242668, AUTHOR = {Sassi, M. P. G.}, TITLE = {Digital Bibliography of Professor Antonio Zampolli}, YEAR = {2007}, ABSTRACT = {Antonio Zampolli: Director of Research and Assistant to the Director of CAAL (Centre for the Automatic Linguistic Annotations), Gallarate and Pisa (1960-1966). Research (Senior Engineer) at the Pisa Scientific Centre of IBM, Responsible for Computational Linguistics (1967-1975). Full Professor of Computational Linguistics at the University of Pisa, and founder (1968) and Director of the Linguistic Division of CNUCE, transformed in 1978 into the Institute of Computational Linguistics [Istituto di Linguistica Computazionale - ILC] of the National Research Council [Consiglio Nazionale delle Ricerche - CNR], Pisa. His main research interests were computational lexicology and lexicography, computer-assisted language teaching, formal grammars and parsers, literary and linguistic text analysis, machine translation, multimodality, multilinguality, quantitative linguistics, reusability of lexical resources, standards for literary and linguistic data processing, text processing.}, KEYWORDS = {Linguistica Computazionale, Bibliografia Digitale, Zampolli Antonio}, URL = {http://www.ilc.cnr.it/AZ_bibliography/AZbiography.htm}, } @MISC{TESCONI_2007_MISC_TMBMSC_157409, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LeXFlow: a Prototype Supporting Collaborative Lexicon Development and Cross-fertilization}, YEAR = {2007}, URL = {https://publications.cnr.it/doc/157409}, } @ARTICLE{BOZZI_2006_ARTICLE_B_64521, AUTHOR = {Bozzi, A.}, TITLE = {Electronic Publishing and Computational Philology}, YEAR = {2006}, ABSTRACT = {Abstract - This paper is concerned with the relationship between electronic publishing and digital scholarly textual criticism. Hypertextual techniques and computational tools are compared. These two different methodologies applied to modern and contemporary texts with respect to ancient manuscript tradition are highlighted. Particular attention is focussed on the general criteria employed in the development of a computer-assisted workstation for digital editions of Greek papyri and medieval manuscripts. Keywords - electronic publishing, computational philology, digital}, KEYWORDS = {Electronic publishing, Computational philology, Digital libraries, Textual criticism}, PAGES = {3-24}, URL = {https://publications.cnr.it/doc/64521}, VOLUME = {24-25}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{DELRIO_2006_ARTICLE_DDT_64524, AUTHOR = {Del Rio, N. and Durán, J. I. and Turrini, G.}, TITLE = {Addizionario: a Tool for the Emergence of Indigenous Language as a “House of Being}, YEAR = {2006}, ABSTRACT = {This paper reports the preliminary findings of a study about the use of an itneractive multimedia and multilingual software application (Addizionario) in an intercultural setting and the role of language in education. The study involved nine primary schools of very small P'urhepecha indigenous communities in the state of Michoacan, Mexico. In an attempt to evaluate the impact of th4e software as a tool for opening up a space for written language production in the maternal tongue, emphasis will be placed not only on products and results, but also on the process and difficulties of using the software in contexts characterized by strong digital divide, significant educational lag, and gradual loss of the indigenous languages.}, PAGES = {263-282}, URL = {https://publications.cnr.it/doc/64524}, VOLUME = {XXVI}, } @ARTICLE{MAC_2006_ARTICLE_MBBC_64523, AUTHOR = {Macé, C. and Baret, P. and Bozzi, A. and Cignoni, L.}, TITLE = {Preface}, YEAR = {2006}, ABSTRACT = {Many scientific ventures start from haphazard meetings and this is also true for the history of this volume. During an informal meeting, Philippe Baret, Caroline Macé, Barbara Bordalejo and Peter Robinson put forward the idea of organizing a workshop together; only a few months later what had first appeared to be a simple hypothesis became a real event, also made possible thanks to the collaboration of many people and to the financial support of the Unit of Genetics of the University of Louvain (Louvain-la- Neuve, Faculty of Bioengineering, Agricultural and Environmental Sciences). We need to thank people who morally supported our initiative from the very beginning: besides Peter Robinson and Barbara Bordalejo, we would like to thank Peter Van Deun and Bram Roosen (Instituut voor Vroegchristelijke en Byzantijnse studies, K. U. Leuven), Carlos Steel (De Wulf-Mansioncentrum, K. U. Leuven), Dirk Van Hulle (Antwerp James Joyce Centre, Universiteit Antwerpen) and Andrea Schmidt (Centre for the Study of Gregory of Nazianzus, U. C. L.). Above all, we wish to thank the people who helped us make this workshop not only a scientific achievement but also a pleasant moment of friendship and conviviality: Anne-Catherine Lantin, Steve Ferrière and Isabelle Caignet. We would not have been able to present the results of this international conference to the public without the support of the Institute for Computational Linguistics in Pisa. Although the conference was held in English, we did not want the publication to be exclusively in this language. Therefore, we are grateful to Richard Goulet, who could not participate in the workshop, but accepted to contribute to this volume. Richard Goulet, like Peter Robinson and Andrea Bozzi, is a pioneer in the use of the computer in philology. Finally, Laura Cignoni should be thanked for her precious contribution in all those cases in which it was necessary to perform translations from Italian into English and for the general editing of the text to be handed to the publisher in camera-ready format. ...}, PAGES = {xi-xv}, URL = {https://publications.cnr.it/doc/64523}, VOLUME = {24-25}, } @ARTICLE{MIAZZA_2006_ARTICLE_MZTCTM_64525, AUTHOR = {Miazza, D. and Zanetti, M. A. and Turrini, G. and Cerri, R. and Torti, R. and Muriana, M.}, TITLE = {Creating Worlds with Addizionario: an Integrated Teaching Approach}, YEAR = {2006}, ABSTRACT = {This paper presents an experience with 10-year-old children in the use of conceptual maps to build and comprehend descriptive texts. This was part of a multidisciplinary project aimed at increasing the pupils' knowledge of their own town. The aim of the project was twofold: to develop the general knowledge of the children and to promote the creation of linkages among different areas of knowledge by using Addizionario. The children built personal knowledge paths. This proved to be a motivating task, as each child enriched the given materials with self-made ones (pictures, drawings, photos, texts). This teaching method helped the children achieve significant learning goals, and promoted self-efficacy and empowerment. Moreover, it supported and favored an atmosphere of 'knowledge sharing' in the calssroom, which considerably improved individual productions. Throughout the school year, the topic of the research project - Pavia as a world - was developed in relation to four school subjects: history, geography, science, and visual arts.}, PAGES = {217-232}, URL = {https://publications.cnr.it/doc/64525}, VOLUME = {XXVI}, } @ARTICLE{PASSAROTTI_2006_ARTICLE_PB_64522, AUTHOR = {Passarotti, M. and Bozzi, A.}, TITLE = {Towards Textual Drift Modelling in Computational Philology}, YEAR = {2006}, ABSTRACT = {Abstract - The article highlights the need to fix the methodological basis of computational philology. Formalization and modelling of the textual drift phenomenon, intended as a set of modifications to which each text is submitted in the course of its production and/or transmission, are the necessary grounds for a formal and shared definition of the research methods of computational philology. A methodological interdisciplinary solution to these problems is proposed, which consists in merging the individual experiences achieved in the field of philology with the results obtained by evolutionary biology. Two important strands have been identified concerning philology (not only computational), independent of whether they are dealing with authorial, or non-authorial variants. Textual drift modelling should be conducted so that it can be applied to the greatest extent to both strands. Keywords - philology, philogenetics, computational textual criticism}, PAGES = {63-86}, URL = {https://publications.cnr.it/doc/64522}, VOLUME = {24-25}, } @ARTICLE{TURRINI_2006_ARTICLE_TBBP_64526, AUTHOR = {Turrini, G. and Baroni, P. and Bianchi, F. and Paccosi, A.}, TITLE = {Addizionario-Plus}, YEAR = {2006}, ABSTRACT = {The present paper describes Addizionario-Plus, the updated and extended version of Addizionario. The main changes that have been introduced into the software (some of which were requested or suggested by users) concern: a) the system architecture; b) the graphics of the user interface; c) the programming language; d) the number and types of activities that it is possible to carry out; e) the creation of the Teacher Module; f) the development of a wide, detailed range of online helps; and g) while performing the activities, the possibility of accessing a large quantity of ready-to-use material, arranged into a number of teaching paths already traced out.}, KEYWORDS = {multimedia tools, language teaching/learning, intercultural education}, PAGES = {283-293}, URL = {http://www.libraweb.net/articoli.php?chiave=200601502\&rivista=15}, VOLUME = {XXVI}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{TURRINI_2006_ARTICLE_TBP_64530, AUTHOR = {Turrini, G. and Baroni, P. and Paccosi, A.}, TITLE = {AddizionarioPLUS: a Creative Approach to Linguistic and Intercultural Education}, YEAR = {2006}, ABSTRACT = {This paper describes AddizionarioPLUS, the updated and extended version of Addizionario. Addizionario - a hypermedia linguistic laboratory in which children being from 5 to 12 years old can study Italian as their native or second language at various levels of difficulty and from different points of view - was developed by the Institute for Computational Linguistics of the National Research Council (ILC-CNR), in collaboration with the Department of Computer Science of the University of Turin, and was successfully tested in Italy and abroad. The main changes that have been introduced into the software concern: a) the graphic interface with the user, b) the programming language, c) the system architecture, d) the possible activities, e) the organization of the working environments, f) a module for the teacher, g) the available ready-to-use material and learning paths, h) the possible helps for both pupils and the teacher.}, KEYWORDS = {multimedia, dictionary, language, culture, education}, PAGES = {407-412}, URL = {http://www.informatica.si/vols/vol30.html#No4}, VOLUME = {30}, PUBLISHER = {Slovensko drutvo Informatika (Ljubljana, Slovenia)}, ISSN = {0350-5596}, JOURNAL = {Informatica (Ljublj.)}, } @ARTICLE{TURRINI_2006_ARTICLE_TB_64527, AUTHOR = {Turrini, G. and Bianchi, F.}, TITLE = {Preface}, YEAR = {2006}, PAGES = {xi-xviii}, URL = {https://publications.cnr.it/doc/64527}, VOLUME = {XXVI}, } @ARTICLE{TURRINI_2006_ARTICLE_TB_64528, AUTHOR = {Turrini, G. and Bianchi, F.}, TITLE = {Addizionario: A Child-Centered Approach to Linguistic Education}, YEAR = {2006}, ABSTRACT = {Addizionario is an innovative learning tool which supports the child's cognitive and linguistic development. In the form of a hypermedia laboratory, it encourages children to study language at various levels of difficulty and from different points of view, with particular emphasis on lexical competence and language reflection. This paper describes Addizionario and its two strictly interrelated, but at the same time independent, components: (i) the Core Dictionary, a dictionary for children written and illustrated by children; and (ii) the Activity Book, a multimedia creative environment which allows the children to construct their own personal dictionaries and to play with language. Furthermore, some teaching ideas are offered to help teachers take advantage of the features of the software.}, PAGES = {3-22}, URL = {https://publications.cnr.it/doc/64528}, VOLUME = {XXVI}, } @ARTICLE{ZANETTI_2006_ARTICLE_ZMT_64529, AUTHOR = {Zanetti, M. A. and Miazza, D. and Turrini, G.}, TITLE = {Addizionario: in between Cognition and Metacognition}, YEAR = {2006}, ABSTRACT = {Metacognitive theory considers learning as the result of the activation of several conscious processes. Our project aimed to support reading comprehension and meaningful learning by the use of conceptual maps and to investigate the processes involved in motivated learning when the pupil's attention is turned to learning objectives and suitable strategies are made explicit and shared. The project saw a group of children attending year four in an Italian primary school tackle a scientific subject matter from a multidisciplinary perspective. Each pupil created an initial conceptual map in pen-and-paper format and then a final one with Addizionario. Qualitative and quantitative comparison between the two outputs showed development in the learning processes, awareness of the logical reasoning used in creating the map, and ability to self-monitor the progressive mastering of knowledge. Active re-elaboration of the material helped the children understand the learning objectives, and find suitable strategies.}, PAGES = {233-252}, URL = {https://publications.cnr.it/doc/64529}, VOLUME = {XXVI}, } @ARTICLE{ZANETTI_2006_ARTICLE_ZMT_64531, AUTHOR = {Zanetti, M. A. and Miazza, D. and Turrini, G.}, TITLE = {Between Metacognition and Creativity}, YEAR = {2006}, PAGES = {399-405}, URL = {https://publications.cnr.it/doc/64531}, VOLUME = {30/4}, } @BOOK{CININI_2006_BOOK_CS_143963, AUTHOR = {Cinini, A. and Sassi, M.}, TITLE = {Content analysis dei provvedimenti della sezione disciplinare del C. S. M}, YEAR = {2006}, ABSTRACT = {Nell'ambito del progetto di ricerca denominato "Tecnologie dell'informazione e della comunicazione per la giustizia" coordinato dall'Istituto di Ricerca sui Sistemi Giudiziari (IRSIGCNR di Bologna) e co-finanziato dai Fondi Integrativi Ricerca di Base (FIRB) del Ministero dell'Università e della Ricerca, l'Istituto di Linguistica Computazionale (ILC-CNR di Pisa) ha collaborato con l'IRSIG e con il Centro Studi e Ricerche sull'Ordinamento Giudiziario dell'Università di Bologna per la realizzazione di una base dati elettronica per l'analisi dei provvedimenti della sezione disciplinare del Consiglio Superiore della Magistratura.}, KEYWORDS = {Informatica giuridica documentale, Knowledge extraction from texts, DBT}, PAGES = {1-68}, URL = {https://publications.cnr.it/doc/143963}, PUBLISHER = {S. T. A. R. Servizio Tecnografico Area Ricerca CNR (Pisa, ITA)}, } @BOOK{MAC_2006_BOOK_MBBC_136447, AUTHOR = {Macé, C. and Baret, P. and Bozzi, A. and Cignoni, L.}, TITLE = {The Evolution of Texts: Confronting Stemmatological and Genetical Methods}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/136447}, } @BOOK{TURRINI_2006_BOOK_TB_136449, AUTHOR = {Turrini, G. and Bianchi, F.}, TITLE = {Hypermedia for Education and Research}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/136449}, } @INCOLLECTION{BOZZI_2006_INCOLLECTION_B_136450, AUTHOR = {Bozzi, A.}, TITLE = {Edizione elettronica dei testi e filologia computazionale}, YEAR = {2006}, PAGES = {207-232}, URL = {https://publications.cnr.it/doc/136450}, PUBLISHER = {II mulino (Bologna, ITA)}, ISBN = {8815107193}, BOOKTITLE = {Fondamenti di critica testuale}, EDITOR = {Stussi, A.}, } @INCOLLECTION{CALZOLARI_2006_INCOLLECTION_C_136451, AUTHOR = {Calzolari, N.}, TITLE = {A Language Resources Infrastructure: from a vision of few to a reality for many?}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/136451}, } @INCOLLECTION{CALZOLARI_2006_INCOLLECTION_C_136452, AUTHOR = {Calzolari, N.}, TITLE = {Language Resources and Content Interoperability: technical, strategic and political issues for a new generation of Language Resources}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/136452}, } @INCOLLECTION{CALZOLARI_2006_INCOLLECTION_C_136453, AUTHOR = {Calzolari, N.}, TITLE = {11. Computational Linguistics}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/136453}, PUBLISHER = {EOLSS Publishers (Oxford, GBR)}, } @INCOLLECTION{MARINELLI_2006_INCOLLECTION_MBR_136455, AUTHOR = {Marinelli, R. and Bindi, R. and Roventini, A.}, TITLE = {Metonymic and Metaphorical Uses of Proper Names}, YEAR = {2006}, ABSTRACT = {In this paper we describe a research we are carrying out in the framework of ItalWordNet, a large lexical-semantic database containing semantic information for about 50,000 synsets of nouns, verbs, adjectives, adverbs, and a subset of proper names, which is continuously enriched and updated at the Institute for Computational Linguistics in Pisa. The research concerns the proper names considered from a twofold aspect: their coding in the lexical semantic database and their concrete use, as it is evidenced within a large corpus of the contemporary written Italian language. In particular the semantic relations involving the proper names and their senses (literal, derived and extended) are taken into consideration. Many proper names turn out to be the basis for many extensions of meaning, so in the paper we analyse in the corpus a set of them showing many types of derivates and sense extensions generated by means of lexical rules that operate as "generative factors" (Pustejovsky, 2001). The many contexts of use, analyzed in the corpus, confirm a rich regular polysemy regarding this category of nouns, so, for a set of cases, we propose to represent this phenomenon by introducing specific semantic relations in the database.}, KEYWORDS = {Lexical semantic databases, Proper Names, Corpora}, PAGES = {69-78}, URL = {http://www.c-s-p.org/Flyers/Linguistics-in-the-Twenty-First-Century.htm}, PUBLISHER = {Centro de linguística aplicada, Ministerio de ciencia, tecnología y medio ambiente (Santiago de Cuba, CUB)}, ISBN = {1904303862}, BOOKTITLE = {Linguistics in the Twenty First Century}, EDITOR = {Bermúdez, E. M. and Miyares, L. R.}, } @INCOLLECTION{MARINELLI_2006_INCOLLECTION_MR_136456, AUTHOR = {Marinelli, R. and Roventini, A.}, TITLE = {The Italian Marittime Lexicon and the ItalWordNet Semantic Database}, YEAR = {2006}, ABSTRACT = {The paper reports on the creation of a terminological subset belonging to the maritime lexical domain; this set of terms is structured according to the design principles of the generic Italian lexical semantic database ItalWordNet, i.e. applying the same semantic relations model and using the possibility of linking the specialised terms to the corresponding closest concepts in WordNet. The main characteristics of the lexical semantic database are also described and, in particular: i) the construction of the terminological wordnet; ii) the approach adopted to connect the terminological database to the generic one; iii) some issues about the mapping between the Italian terms and the Princeton WordNet; iv) a first outline of a specific maritime domain ontology.}, KEYWORDS = {Lexical databases, Terminology, Ontology}, PAGES = {173-182}, URL = {http://www.c-s-p.org/Flyers/Linguistics-in-the-Twenty-First-Century.htm}, PUBLISHER = {Centro de linguística aplicada, Ministerio de ciencia, tecnología y medio ambiente (Santiago de Cuba, CUB)}, ISBN = {1904303862}, BOOKTITLE = {Linguistics in the Twenty First Century}, EDITOR = {Bermúdez, E. M. and Miyares, L. R.}, } @INCOLLECTION{TURRINI_2006_INCOLLECTION_TCP_136448, AUTHOR = {Turrini, G. and Cignoni, L. and Paccosi, A.}, TITLE = {Addizionario: il dizionario visto dalla parte dei bambini}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/136448}, } @EDITORIAL{BOZZI_2006_EDITORIAL_BC_146074, AUTHOR = {Bozzi, A. and Cignoni, L.}, TITLE = {The Evolution of Texts: Confronting Stemmatological and Genetical Methods}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/146074}, } @EDITORIAL{GOGGI_2006_EDITORIAL_GZ_146076, AUTHOR = {Goggi, S. and Zamorani, N.}, TITLE = {Language Resources and Evaluation}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/146076}, } @EDITORIAL{TURRINI_2006_EDITORIAL_T_146075, AUTHOR = {Turrini, G.}, TITLE = {Hypermedia for Education and Research}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/146075}, } @EDITORIAL{ZAMORANI_2006_EDITORIAL_Z_146073, AUTHOR = {Zamorani, N.}, TITLE = {LREC 2006: 5th International Conference on Language Resources and Evaluation: Book of Abstracts}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/146073}, } @EDITORIAL{ZAMORANI_2006_EDITORIAL_Z_146077, AUTHOR = {Zamorani, N.}, TITLE = {Proceedings of LREC 2006: 5th International Conference on Language Resources and Evaluation}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/146077}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BCGLMPRS_84608, AUTHOR = {Bartolini, R. and Caracciolo, C. and Giovannetti, E. and Lenci, A. and Marchi, S. and Pirrelli, V. and Renso, C. and Spinsanti, L.}, TITLE = {Creation and Use of Lexicons and Ontologies for NL Interfaces to Databases}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84608}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BCGLMPRS_91313, AUTHOR = {Bartolini, R. and Caracciolo, C. and Giovannetti, E. and Lenci, A. and Marchi, S. and Pirrelli, V. and Renso, C. and Spinsanti, L.}, TITLE = {Creation and use of lexicons and ontologies for natural language interface to databases}, YEAR = {2006}, ABSTRACT = {In this paper we present an original approach to natural language query interpretation which has been implemented within the FuLL (Fuzzy Logic and Language) Italian project of BC S.r.l. In particular, we discuss here the creation of linguistic and ontological resources, together with the exploitation of existing ones, for natural language-driven database access and retrieval. Both the database and the queries we experiment with are Italian, but the methodology we broach naturally extends to other languages.}, KEYWORDS = {Natual language processing, ontologies, gis, databases}, PAGES = {6}, URL = {https://publications.cnr.it/doc/91313}, CONFERENCE_NAME = {LREC Conference}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26/05/2006}, BOOKTITLE = {LREC 2006}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BGMMABSB_84664, AUTHOR = {Bartolini, R. and Giovannetti, E. and Marchi, S. and Montemagni, S. and Andreatta, C. and Brunelli, R. and Stecher, R. and Bouquet, P.}, TITLE = {Multimedia Information Extraction in Ontology-based Semantic Annotation of Product Catalogues}, YEAR = {2006}, ABSTRACT = {The demand for efficient methods for extracting knowledge from multimedia content has led to a growing research community investigating the convergence of multimedia and knowledge technologies. In this paper we describe a methodology for extracting multimedia information from product catalogues empowered by the synergetic use and extension of a domain ontology. The methodology was implemented in the Trade Fair Advanced Semantic Annotation Pipeline of the VIKE-framework.}, KEYWORDS = {Semantic Web Technologies, ontology creation, ontology extraction, ontology evolution, semantic annotation of multimedia content}, URL = {https://publications.cnr.it/doc/84664}, CONFERENCE_NAME = {SWAP 2006}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {18-20 December 2006}, } @INPROCEEDINGS{BASSI_2006_INPROCEEDINGS_BDEL_84586, AUTHOR = {Bassi, S. and Dell'Orletta, F. and Esposito, D. and Lenci, A.}, TITLE = {Computational linguistics meets philosophy: a Latent Semantic Analysis of Giordano Bruno's texts}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84586}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Genova}, } @INPROCEEDINGS{BERTAGNA_2006_INPROCEEDINGS_B_84624, AUTHOR = {Bertagna, F.}, TITLE = {Representation and Inference for Open-Domain QA: Strength and Limits of two Italian Semantic Lexicons}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84624}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{CALZOLARI_2006_INPROCEEDINGS_CSSCPBEMSC_84625, AUTHOR = {Calzolari, F. and Sassolini, E. and Sassi, M. and Cucurullo, S. and Picchi, E. and Bertagna, F. and Enea, A. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {Next Generation Language Resources using Grid}, YEAR = {2006}, ABSTRACT = {This paper presents a case study concerning the challenges and requirements posed by next generation language resources, realized as an overall model of open, distributed and collaborative language infrastructure. If a sort of "new paradigm" for language resource sharing is required, we think that the emerging and still evolving technology connected to Grid computing is a very interesting and suitable one for a concrete realization of this vision. Given the current limitations of Grid computing, it is very important to test the new environment on basic language analysis tools, in order to get the feeling of what are the potentialities and possible limitations connected to its use in NLP. For this reason, we have done some experiments on a module of the Linguistic Miner, i.e. the extraction of linguistic patterns from restricted domain corpora. The Grid environment has produced the expected results (reduction of the processing time, huge storage capacity, data redundancy) without any additional cost for the final user.}, KEYWORDS = {grid, acquisition, topic classification}, PAGES = {1858-1861}, URL = {https://publications.cnr.it/doc/84625}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{CALZOLARI_2006_INPROCEEDINGS_C_84626, AUTHOR = {Calzolari, N.}, TITLE = {Introduction of the Conference Chair}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84626}, CONFERENCE_NAME = {LREC 2006: Book of Abstracts 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{CALZOLARI_2006_INPROCEEDINGS_C_84627, AUTHOR = {Calzolari, N.}, TITLE = {Community Culture in Linguistics – an international perspective}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84627}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{CALZOLARI_2006_INPROCEEDINGS_C_84647, AUTHOR = {Calzolari, N.}, TITLE = {Technical and Strategic Issues on Language Resources for a Research Infrastructure}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84647}, CONFERENCE_NAME = {International Symposium on Large-scale Knowledge Resources (LKR2006)}, CONFERENCE_PLACE = {Tokyo}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{CALZOLARI_2006_INPROCEEDINGS_C_84648, AUTHOR = {Calzolari, N.}, TITLE = {International Standards for Computational Lexicons: their relation to terminology}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84648}, CONFERENCE_NAME = {TSTT 2006, International Conference on terminology, Standardization and Technology Transfer}, CONFERENCE_PLACE = {Beijing}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{CASELLI_2006_INPROCEEDINGS_CP_84628, AUTHOR = {Caselli, T. and Prodanof, I.}, TITLE = {Annotating Bridging Anaphors in Italian: in Search of Reliability}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84628}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{CUCURULLO_2006_INPROCEEDINGS_CMPPS_84629, AUTHOR = {Cucurullo, S. and Montemagni, S. and Paoli, M. and Picchi, E. and Sassolini, E.}, TITLE = {Dialectal resources on-line: the ALT-Web experience}, YEAR = {2006}, ABSTRACT = {The paper presents an on-line dialectal resource, ALT-Web, which gives access to the linguistic data of the Atlante Lessicale Toscano, a specially designed linguistic atlas in which lexical data have both a diatopic and diastratic characterisation. The paper focuses on: the dialectal data representation model; the access modalities to the ALT dialectal corpus; ontology-based search.}, KEYWORDS = {Computational dialectology, Dialectal databases, Construction of lexical resources}, PAGES = {1846-1851}, URL = {http://www.lrec-conf.org/lrec2006/}, VOLUME = {Proceedings}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {24-25-26 Maggio 2006}, BOOKTITLE = {Dialectal resources on-line: the ALT-Web experience}, } @INPROCEEDINGS{CUCURULLO_2006_INPROCEEDINGS_CMPPS_84661, AUTHOR = {Cucurullo, S. and Montemagni, S. and Paoli, M. and Picchi, E. and Sassolini, E.}, TITLE = {Atlante Dialettale in rete: ALT-Web}, YEAR = {2006}, ABSTRACT = {The paper presents an on-line dialectal resource, ALT-Web, which gives access to the linguistic data of the Lexical Atlas of Tuscany or Atlante Lessicale Toscano, a specially designed linguistic atlas in which lexical data have both a diatopic and diastratic characterisation. The paper illustrates ALT-Web with particular emphasis on: 1) the dialectal data representation model; 2) the access modalities to the ALT dialectal corpus designed to produce an output tailored to the specific needs of the different classes of users (both professionals and common citizens); 3) ontology-based search. These represent three main features which differentiate ALT-Web both from the previous digitalised ALT version and, most interestingly, from other on-line dialectal resources. At the time of writing, this is the first resource of this type in Italy, and one of the few at the international level.}, KEYWORDS = {dialectal resources, information retrieval}, PAGES = {661-672}, URL = {http://www.euralex.org/publications/}, VOLUME = {2}, PUBLISHER = {Edizioni dell'ORSO (Alessandria, ITA)}, ISBN = {8876949186}, CONFERENCE_NAME = {12° EURALEX International Congress}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {6-9 Settembre 2006}, BOOKTITLE = {Proceedings in 12° EURALEX International Congress, Congresso internazionale di lessicografia}, EDITOR = {Corino, E. and Marello, C. and Onesti, C.}, } @INPROCEEDINGS{DELLORLETTA_2006_INPROCEEDINGS_DLMP_84630, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Searching treebanks for functional constraints: cross-lingual experiments in grammatical relation assignment}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84630}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, } @INPROCEEDINGS{DELLORLETTA_2006_INPROCEEDINGS_DLMP_84660, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Probing the space of grammatical variation: induction of cross-lingual grammatical constraints from treebanks}, YEAR = {2006}, ABSTRACT = {The paper reports on a detailed quantitative analysis of distributional language data of both Italian and Czech, highlighting the relative contribution of a number of distributed grammatical factors to sentence-based identification of subjects and direct objects. The work uses a Maximum Entropy model of stochastic resolution of conflicting grammatical constraints and is demonstrably capable of putting explanatory theoretical accounts to the test of usage-based empirical verification.}, PAGES = {21-28}, URL = {https://publications.cnr.it/doc/84660}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {1-932432-78-7}, CONFERENCE_NAME = {Coling/ACL 2006}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {22 July 2006}, BOOKTITLE = {Proceedings of the Workshop on Frontiers in Linguistically Annotated Corpora 2006 (LAC 06)}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FBGCMPS_84649, AUTHOR = {Francopoulo, G. and Bel, N. and George, M. and Calzolari, N. and Monachini, M. and Pet, M. and Soria, C.}, TITLE = {Lexical markup framework (LMF) for NLP multilingual resources}, YEAR = {2006}, ABSTRACT = {Optimizing the production, maintenance and extension of lexical resources is one of the crucial aspects impacting Natural Language Processing (NLP). A second aspect involves optimizing the process leading to their integration into applications. In this respect, we believe that the production of a consensual specification on multilingual lexicons can be a useful aid for the various NLP actors. Within ISO, one purpose of LMF (ISO-24613) is to define a standard for lexicons that covers multilingual data.}, PAGES = {1-8}, URL = {https://publications.cnr.it/doc/84649}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {COLING-ACL Workshop on Multilingual Lexical Resources and Interoperability}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {2006}, BOOKTITLE = {Proceedings of the Workshop on Multilingual Language Resources and Interoperability}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FDMR_84631, AUTHOR = {Francopoulo, G. and Declerck, T. and Monachini, M. and Romary, L.}, TITLE = {The relevance of standards for research infrastructures}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84631}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FGCMBPS_84632, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {LMF for multilingual, specialized lexicons}, YEAR = {2006}, PAGES = {27-32}, URL = {https://publications.cnr.it/doc/84632}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FGCMBPS_84633, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {Lexical Markup Framework (LMF)}, YEAR = {2006}, ABSTRACT = {Optimizing the production, maintenance and extension of lexical resources is one the crucial aspects impacting Natural Language Processing (NLP). A second aspect involves optimizing the process leading to their integration in applications. With this respect, we believe that the production of a consensual specification on lexicons can be a useful aid for the various NLP actors. Within ISO, the purpose of LMF is to define a standard for lexicons. LMF is a model that provides a common standardized framework for the construction of NLP lexicons. The goals of LMF are to provide a common model for the creation and use of lexical resources, to manage the exchange of data between and among these resources, and to enable the merging of large number of individual electronic resources to form extensive global electronic resources. In this paper, we describe the work in progress within the sub-group ISO-TC37/SC4/WG4. Various experts from a lot of countries have been consulted in order to take into account best practices in a lot of languages for (we hope) all kinds of NLP lexicons.}, PAGES = {233-236}, URL = {https://publications.cnr.it/doc/84633}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{FRANCOPOULO_2006_INPROCEEDINGS_FMRS_84650, AUTHOR = {Francopoulo, G. and Monachini, M. and Romary, L. and Salmont Alt, S.}, TITLE = {Lexical Markup Framework: Working to Reach a Consensual ISO Standard on Lexicons}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84650}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {22 Maggio 2006}, BOOKTITLE = {Lexical Markup Framework: Working to Reach a Consensual ISO Standard on Lexicons-Tutorial}, } @INPROCEEDINGS{GIOULI_2006_INPROCEEDINGS_GLGPMSCC_84634, AUTHOR = {Giouli, V. and Labropoulou, P. and Gavrilidou, M. and Piperidis, S. and Monachini, M. and Soria, C. and Calzolari, N. and Choukri, K.}, TITLE = {Language Resources Production Models: the Case of the INTERA Multilingual Corpus and Terminology}, YEAR = {2006}, ABSTRACT = {This paper reports on the multilingual Language Resources (MLRs), i.e. parallel corpora and terminological lexicons for less widely digitally available languages, that have been developed in the INTERA project and the methodology adopted for their production. Special emphasis is given to the reality factors that have influenced the MLRs development approach and their final constitution. Building on the experience gained in the project, a production model has been elaborated, suggesting ways and techniques that can be exploited in order to improve LRs production taking into account realistic issues.}, KEYWORDS = {multilingual parallel corpora, language resources production models, less widely digitally available languages}, PAGES = {609-614}, URL = {https://publications.cnr.it/doc/84634}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{GOLA_2006_INPROCEEDINGS_GR_84651, AUTHOR = {Gola, E. and Ruimy, N.}, TITLE = {Traduzione automatica e processi di comprensione: il lessico}, YEAR = {2006}, PAGES = {291-306}, URL = {https://publications.cnr.it/doc/84651}, VOLUME = {03}, PUBLISHER = {Aracne (Roma, ITA)}, ISBN = {88-548-0733-8}, CONFERENCE_NAME = {Tradurre e comprendere. Pluralità dei linguaggi e delle culture}, CONFERENCE_PLACE = {Piano di Sorrento}, CONFERENCE_DATE = {29/09-01/10/2005}, BOOKTITLE = {Tradurre e comprendere-pluralità dei linguaggi e delle culture}, EDITOR = {Pititto, R. and Venezia, S.}, } @INPROCEEDINGS{MARCHETTI_2006_INPROCEEDINGS_MTRRBMSCHH_84652, AUTHOR = {Marchetti, A. and Tesconi, M. and Ronzano, F. and Rosella, M. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Huang, C. R. and Hsieh, S. K.}, TITLE = {Towards an Architecture for the GlobalWordNet Initiative}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84652}, CONFERENCE_NAME = {SWAP-06, the 3rd Italian Semantic Web Workshop}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{MARCHETTI_2006_INPROCEEDINGS_MTRRBMSCHH_263653, AUTHOR = {Marchetti, A. and Tesconi, M. and Ronzano, F. and Rosella, M. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N. and Huang, C. and Hsieh, S.}, TITLE = {Toward an Architecture for the Global Wordnet Initiative}, YEAR = {2006}, ABSTRACT = {Enhancing the development of multilingual lexicons is of foremost importance for intercultural collaboration to take place, as multilingual lexicons are the cornerstone of several multilingual applications. However, the development and maintenance of large-scale, robust multilingual dictionaries is a tantalizing task. Moreover, Semantic Web's growing interest towards the availability of high-quality lexical resources and their multilingual interoperability, is focusing more and more attention on this topic. In this paper we present a tool, based on a web service architecture, enabling semi-automatic generation of bilingual lexicons through linking of distributed monolingual lexical resources. In addition to lexicon development, the architecture also allows enrichment of monolingual source lexicons through exploitation of the semantic information encoded in corresponding entries. In the paper we describe our case study applied to the Italian and Chinese wordnets, and we illustrate how the architecture can be extended to access distributed multilingual WordNets over the Internet, paving the way to exploitation in a cross-lingual framework of the wealth of information built over the last decade.}, KEYWORDS = {Lexical resource, wordnet, multilingual interoperability, semantic web}, PAGES = {7-35}, URL = {http://ceur-ws.org/Vol-201/35.pdf}, CONFERENCE_NAME = {SWAP 2006-Semantic Web Applications and Perspectives}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {18-20 December, 2006}, } @INPROCEEDINGS{MARCONI_2006_INPROCEEDINGS_M_84635, AUTHOR = {Marconi, L.}, TITLE = {Message of the Chair of the Local Organising Committee}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84635}, CONFERENCE_NAME = {LREC 2006: Book of Abstracts 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{MARINELLI_2006_INPROCEEDINGS_M_84654, AUTHOR = {Marinelli, R.}, TITLE = {Computational Resources and Electronic Corpora in Metaphors Evaluation}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84654}, CONFERENCE_NAME = {Second International Conference of the German Cognitive Linguistics Association}, CONFERENCE_PLACE = {Munich}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{MARINELLI_2006_INPROCEEDINGS_MB_84636, AUTHOR = {Marinelli, R. and Bindi, R.}, TITLE = {Proper Names and Linguistic Dynamics}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84636}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{MARINELLI_2006_INPROCEEDINGS_MRS_84637, AUTHOR = {Marinelli, R. and Roventini, A. and Spadoni, G.}, TITLE = {Using Core Ontology for Domain Lexicon Structuring}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84637}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{MARINELLI_2006_INPROCEEDINGS_MS_84621, AUTHOR = {Marinelli, R. and Spadoni, G.}, TITLE = {Some Considerations in Structuring a Terminological Knowledge Base}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84621}, CONFERENCE_NAME = {Third International WordNet Conference}, CONFERENCE_PLACE = {Seoul}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{MARINELLI_2006_INPROCEEDINGS_MS_84653, AUTHOR = {Marinelli, R. and Spadoni, G.}, TITLE = {From Dictionary to Knowledge Base System}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84653}, CONFERENCE_NAME = {MACL (Multilingualism \& Applied Comparative Linguistics)}, CONFERENCE_PLACE = {Brussel}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{MONACHINI_2006_INPROCEEDINGS_M_84638, AUTHOR = {Monachini, M.}, TITLE = {LMF semantic package and mapping of existing semantic lexicons}, YEAR = {2006}, ABSTRACT = {The definition of a standard for the representation of lexical data has progressively become mandatory in the linguistic and computational linguistic community to cope with the ever encreasing number of digital lexical data that are gathered and disseminated worldwide. LMF should be seen by the community as a tool for modelling one's own lexical data, with the possibile result tat people will provide useful feedback on the usability and needed evolution of the standard project.}, PAGES = {29}, URL = {https://publications.cnr.it/doc/84638}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {22 Maggio 2006}, BOOKTITLE = {Lexical Markup Framework: Working to Reach a Consensual ISO Standard on Lexicons-Tutorial}, EDITOR = {Francopoulo, G. and Monachini, M. and Romary, L. and Salmon Alt, S.}, } @INPROCEEDINGS{MONACHINI_2006_INPROCEEDINGS_MCCFMMOU_84639, AUTHOR = {Monachini, M. and Calzolari, N. and Choukri, K. and Friedrich, J. and Maltese, G. and Mammini, M. and Odijk, J. and Ulivieri, M.}, TITLE = {Unified Lexicon and Unified Morphosyntactic Specifications for Written and Spoken Italian}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84639}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{MONTEMAGNI_2006_INPROCEEDINGS_MPP_84659, AUTHOR = {Montemagni, S. and Paoli, M. and Picchi, E.}, TITLE = {ALT-WEB: l’'Atlante Lessicale Toscano in rete}, YEAR = {2006}, ABSTRACT = {Scopo dell'articolo è la presentazione di ALT-Web, ovvero l'Atlante Lessicale Toscano in rete. ALT-Web è stato ideato per rendere il patrimonio linguistico-culturale testimoniato dall'Atlante Lessicale Toscano una risorsa educativa realmente disponibile in modo che possa fornire un contributo alla conservazione della memoria dell'identità culturale toscana e al contempo costituisca un prezioso punto di riferimento per lo studio di dinamiche linguistiche sia a livello areale sia a livello socio-culturale. La sua collocazione in rete porta inevitabilmente ALT-Web a rivolgersi a una vasta gamma di utenti non più circoscritta agli addetti ai lavori (ovvero dialettologi, linguisti, etno-linguisti), ma che include anche insegnanti, operatori culturali (ad esempio, personale di musei e di istituzioni culturali pubbliche e private) fino al cittadino navigatore di Internet che voglia capire di più della propria identità linguistica e culturale. Il vasto e variegato bacino di utenza a cui intende rivolgersi ALT-Web ha portato alla trasformazione della versione informatizzata dell'Atlante Lessicale Toscano (conosciuta come DBT-ALT) in una rete ipertestuale con modalità e funzionalità di accesso differenziate in relazione alle diverse classi di utenza; a questo aspetto, è legata l'altra interpretazione dell'acronimo ALT-Web, ovvero quella di "ALT come rete". L'articolo illustra aspetti del processo di progettazione e realizzazione dell'opera che rivestono un qualche interesse per il linguista e il dialettologo. In particolare, dopo un breve excursus che riepiloga le caratteristiche principali della risorsa di partenza, l'articolo illustra la progettazione e realizzazione di ALT-Web, partendo dall'analisi dei requisiti e la definizione delle caratteristiche generali per arrivare ad aspetti più specifici che riguardano le modalità di accesso ai materiali e la normalizzazione dei materiali dialettali in trascrizione fonetica.}, KEYWORDS = {Dialettologia Computazionale-Risorse dialettali in rete-Atlante lessicale}, PAGES = {209-241}, URL = {https://publications.cnr.it/doc/84659}, PUBLISHER = {Antenore (Roma, ITA)}, ISBN = {88-8455-606-6}, CONFERENCE_NAME = {Lessicografia Dialettale. Ricordando Paolo Zolli. Atti del Convegno di Studi}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {9-11 dicembre 2004}, BOOKTITLE = {Lessicografia dialettale: ricordando Paolo Zolli-Atti del convegno di studi, Venezia, 9-11 dicembre 2004}, EDITOR = {Bruni, F. and Marcato, C.}, } @INPROCEEDINGS{PARDELLI_2006_INPROCEEDINGS_PSGO_84640, AUTHOR = {Pardelli, G. and Sassi, M. and Goggi, S. and Orsolini, P.}, TITLE = {Natural Language Processing: A Terminological and Statistical Approach}, YEAR = {2006}, ABSTRACT = {The aim of this article is to provide a statistical representation of significant terms used in the field of Natural Language Processing from the 1960s till nowadays, in order to draft a survey on the most significant research trends in that period. By retrieving these keywords it should be possible to highlight the ebb and flow of some thematic topics. The NLP terminological sample derives from a database created for this purpose using the DBT software (Textual Data Base, ILC patent).}, KEYWORDS = {Natural Language Processing, Terminology}, PAGES = {2395-2398}, URL = {https://publications.cnr.it/doc/84640}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {Genoa, 24-26 May}, } @INPROCEEDINGS{ROVENTINI_2006_INPROCEEDINGS_R_84641, AUTHOR = {Roventini, A.}, TITLE = {Linking Verbal Entries of Different Lexical Resources}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84641}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genoa}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{ROVENTINI_2006_INPROCEEDINGS_RR_84658, AUTHOR = {Roventini, A. and Ruimy, N.}, TITLE = {Linking and harmonizing different lexical resources: a comparison of verbal entries}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84658}, CONFERENCE_NAME = {GWC 2006}, CONFERENCE_PLACE = {Seoul}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{RUIMY_2006_INPROCEEDINGS_R_84642, AUTHOR = {Ruimy, N.}, TITLE = {Merging two Ontology-based Lexical Resources}, YEAR = {2006}, ABSTRACT = {ItalWordNet (IWN) and PAROLE/SIMPLE/CLIPS (PSC), the two largest electronic, general-purpose lexical resources of Italian language present many compatible aspects although they are based on two different lexical models having their own underlying principles and peculiarities. Such compatibility prompted us to study the feasibility of semi-automatically linking and eventually merging the two lexicons. To this purpose, the mapping of the ontologies on which basis both lexicons are structured was performed and the sets of semantic relations enabling to relate lexical units were compared. An overview of this preliminary phase is provided in this paper. The linking methodology and related problematic issues are described. Beyond the advantage for the end user to dispose of a more exhaustive and in-depth lexical information combining the potentialities and most outstanding features offered by the two lexical models, resulting benefits and enhancements for the two resources are illustrated that definitely legitimize the soundness of this linking and merging initiative.}, KEYWORDS = {Lexical resource, semantic information, ontology mapping, linking}, PAGES = {1716-1721}, URL = {https://publications.cnr.it/doc/84642}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-28/05/2006}, } @INPROCEEDINGS{RUIMY_2006_INPROCEEDINGS_R_84643, AUTHOR = {Ruimy, N.}, TITLE = {Structuring a Domain Vocabulary in a General Knowledge Environment}, YEAR = {2006}, ABSTRACT = {The study which is reported here aims at investigating the extent to which the conceptual and representational tools provided by a lexical model designed for the semantic representation of general language may suit the requirements of knowledge modelling in a domain-specific perspective. A general linguistic ontology and a set of semantic links, which allow classifying, describing and interconnecting word senses, play a central role in structuring and representing such knowledge. The health and medicine vocabulary has been taken as a case study for this investigation.}, KEYWORDS = {Lexical resource, ontology, semantic relations, semantic network, domain specific knowledge}, PAGES = {2407-2411}, URL = {https://publications.cnr.it/doc/84643}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-28/05/2006}, } @INPROCEEDINGS{SASSI_2006_INPROCEEDINGS_S_84662, AUTHOR = {Sassi, M.}, TITLE = {Martí y Carpentier, voces de la América}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/84662}, CONFERENCE_NAME = {XXVIII Convegno Internazionale di Americanistica}, CONFERENCE_PLACE = {Perugia}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{SORIA_2006_INPROCEEDINGS_STBCMM_171465, AUTHOR = {Soria, C. and Tesconi, M. and Bertagna, F. and Calzolari, N. and Marchetti, A. and Monachini, M.}, TITLE = {Moving to dynamic computational lexicons with LeXFlow}, YEAR = {2006}, ABSTRACT = {LeXFlow is a framework for semi-automatic integration of lexicons, already expressed in standardized format. LeXFlow is intended as a tool for, on the one hand, paving the way to the development of dynamic multi-source lexicons; and on the other, for fostering the adoption of standards. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a particular case of workflow instance, where lexical entries move across agents and become dynamically updated. To this end, we have designed a lexical flow (LF) corresponding to the scenario where an entry of a lexicon A becomes enriched via basically two steps. First, by virtue of being mapped onto a corresponding entry belonging to a lexicon B, the entry(LA) inherits the semantic relations available in B. Second, by resorting to an automatic application that acquires information about semantic relations from corpora, the relations acquired are integrated into the entry and proposed to the human encoder. As a result of the lexical flow, in addition, for each starting lexical entry(LA) mapped onto a corresponding entry(LB) the flow produces a new entry representing the merging of the original two entries.}, KEYWORDS = {computational lexicons, collaborative authoring}, PAGES = {12}, URL = {https://publications.cnr.it/doc/171465}, ISBN = {2-9517408-2-4}, CONFERENCE_NAME = {LREC 2006: 5th International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {24-26 Maggio 2006}, } @INPROCEEDINGS{TESCONI_2006_INPROCEEDINGS_TMBMHCS_173931, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Huang, C. and Calzolari, N. and Soria, C.}, TITLE = {Towards agent-based cross-lingual interoperability of distributed lexical resources}, YEAR = {2006}, ABSTRACT = {In this paper we present an application fostering the integration and interoperability of computational lexicons, focusing on the particular case of mutual linking and cross-lingual enrichment of two wordnets, ItalWordNet and Sinica-BOW lexicons. This is intended as a case-study investingating the needs and requirements of semi-automatic integration and interoperability of lexical resources.}, KEYWORDS = {wordnet, multilingual computational lexicons, collaborative authoring}, PAGES = {17-24}, URL = {https://publications.cnr.it/doc/173931}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {ACL Workshop on Multilingual Lexical Resources and Interoperability}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {15-23 Luglio 2006}, BOOKTITLE = {Proceedings of the Workshop on Multilingual Language Resources and Interoperability}, } @INPROCEEDINGS{TESCONI_2006_INPROCEEDINGS_TMBMSC_83590, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LeXFlow: a system for cross-fertilization of computational lexicons}, YEAR = {2006}, ABSTRACT = {This demo presents LeXFlow, a workflow management system for crossfertilization of computational lexicons. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a set of workflow types, where lexical entries move across agents in the process of being dynamically updated. A prototype of LeXFlow has been implemented with extensive use of XML technologies (XSLT, XPath, XForms, SVG) and open-source tools (Cocoon, Tomcat, MySQL). LeXFlow is a web-based application that enables the cooperative and distributed management of computational lexicons.}, KEYWORDS = {computational lexicons, collaborative authoring}, URL = {https://publications.cnr.it/doc/83590}, DOI = {10.3115/1225403.1225406}, CONFERENCE_NAME = {COLING-ACL '06 Proceedings of the COLING/ACL on Interactive presentation sessions}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {16-23 luglio 2006}, } @INPROCEEDINGS{TESCONI_2006_INPROCEEDINGS_TMBMSC_84656, AUTHOR = {Tesconi, M. and Marchetti, A. and Bertagna, F. and Monachini, M. and Soria, C. and Calzolari, N.}, TITLE = {LeXFlow: a framework for cross-fertilization of computational lexicons}, YEAR = {2006}, ABSTRACT = {This demo presents LeXFlow, a workflow management system for cross-fertilization of computational lexicons. Borrowing from techniques used in the domain of document workflows, we model the activity of lexicon management as a set of workflow types, where lexical entries move across agents in the process of being dynamically updated. A prototype of LeXFlow has been implemented with extensive use of XML technologies (XSLT, XPath, XForms, SVG)and open-source tools (Cocoon, Tomcat, MySQL). LeXFlow is a web-based application that enables the cooperative and distributed management of computational lexicons.}, PAGES = {9-12}, URL = {https://publications.cnr.it/doc/84656}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {COLING/ACL 2006}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {15-23 Luglio 2006}, BOOKTITLE = {Proceedings of the COLING/ACL 2006 Interactive Presentation Sessions}, } @INPROCEEDINGS{TOKUNAGA_2006_INPROCEEDINGS_TSCCMSHPXYK_84657, AUTHOR = {Tokunaga, T. and Sornlertlamvanich, V. and Charoenporn, T. and Calzolari, N. and Monachini, M. and Soria, C. and Huang, C. and Prevot, L. and Xia, Y. and Yu, H. and Kiyoaki, S.}, TITLE = {Infrastructure for standardization of Asian language resources}, YEAR = {2006}, ABSTRACT = {As an area of great linguistic and cultural diversity, Asian language resources have received much less attention than their western counterparts. Creating a common standard for Asian language resources that is compatible with an international standard has at least three strong advantages: to increase the competitive edge of Asian countries, to bring Asian countries to closer to their western counterparts, and to bring more cohesion among Asian countries. To achieve this goal, we have launched a two year project to create a common standard for Asian language resources. The project is comprised of four research items, (1) building a description framework of lexical entries, (2) building sample lexicons, (3) building an upper-layer ontology and (4) evaluating the proposed framework through an application. This paper outlines the project in terms of its aim and approach.}, PAGES = {827-834}, URL = {https://publications.cnr.it/doc/84657}, ISBN = {1-932432-69-8}, CONFERENCE_NAME = {COLING/ACL 2006}, CONFERENCE_PLACE = {Sydney (Australia)}, CONFERENCE_DATE = {15-26 luglio 2006}, BOOKTITLE = {Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions}, } @INPROCEEDINGS{TURRINI_2006_INPROCEEDINGS_TBP_84645, AUTHOR = {Turrini, G. and Baroni, P. and Paccosi, A.}, TITLE = {AddizionarioPLUS: uno strumento interattivo per l'educazione interculturale}, YEAR = {2006}, ABSTRACT = {Il lavoro descrive AddizionarioPLUS, versione rinnovata e ampliata di Addizionario, software per lo sviluppo linguistico e cognitivo dei bambini della scuola primaria. I principali cambiamenti introdotti nella nuova versione riguardano a) l'interfaccia grafica, b) il linguaggio di programmazione utilizzato, c) una nuova architettura che renderà possibili attività collaborative, d) un modulo destinato al maestro, e) l'ampliamento del numero e della tipologia degli esercizi, f) la realizzazione di un ricco apparato di aiuti in linea. L'implementazione del prototipo di AddizionarioPLUS, ormai in fase avanzata, procede, come già fu per Addizionario, in stretto contatto con bambini e insegnanti.}, KEYWORDS = {strumenti multimediali interattivi, sviluppo linguistico e cognitivo, educazione interculturale}, PAGES = {483-488}, URL = {https://publications.cnr.it/doc/84645}, CONFERENCE_NAME = {Didamatica 2006}, CONFERENCE_PLACE = {Cagliari}, CONFERENCE_DATE = {11-13/05/2006}, } @INPROCEEDINGS{BARTOLINI_2006_INPROCEEDINGS_BGMMABSNBB_84663, AUTHOR = {Bartolini, R. and Giovannetti, E. and Marchi, S. and Montemagni, S. and Andreatta, C. and Brunelli, R. and Stecher, R. and Niederée, C. and Bouquet, P. and Bortoli, S.}, TITLE = {Ontology Learning in Multimedia Information Extraction from Product Catalogues}, YEAR = {2006}, ABSTRACT = {We propose a methodology for extracting multimedia information from product catalogues empowered by the synergetic use and extension of a domain ontology. The use of domain ontologies in this context additionally opens up innovative ways of catalogue use. The method is characterized by incrementally feeding and exploiting the ontology during an information extraction process, implemented by the semantic annotation of the analysed document, and by providing support for detecting existing similar ontologies to enable reuse of (parts of) them.}, KEYWORDS = {knowledge-drive multimedia analysis, ontology learning, semi-automatic content annotation tools}, URL = {https://publications.cnr.it/doc/84663}, CONFERENCE_NAME = {BOEMIE 2006}, CONFERENCE_PLACE = {Podebrady, Czech Republic}, CONFERENCE_DATE = {6 ottobre 2006}, } @INPROCEEDINGS{MARINELLI_2006_INPROCEEDINGS_M_112931, AUTHOR = {Marinelli, R.}, TITLE = {Proposta di contributo da parte dellILC alla REI (Rete di eccellenza dell'italiano istituzionale) con un database di terminologia marittima}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/112931}, CONFERENCE_NAME = {Secondo incontro della Rete di eccellenza dell'italiano istituzionale}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{PIRRELLI_2006_INPROCEEDINGS_P_112917, AUTHOR = {Pirrelli, V.}, TITLE = {Parlare per sapere: la lingua come accesso alla conoscenza}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/112917}, CONFERENCE_NAME = {Conferenza TAL 2006: Uomini e macchine, un colloquio possibile}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{PIRRELLI_2006_INPROCEEDINGS_PLM_112916, AUTHOR = {Pirrelli, V. and Lenci, A. and Montemagni, S.}, TITLE = {Probing the space of grammatical variation: induction of cross-lingual grammatical constraints from treebanks}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/112916}, CONFERENCE_NAME = {Language resources and language research: typology, second language acquisition, English Linguistics}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {2006}, } @INPROCEEDINGS{RUIMY_2006_INPROCEEDINGS_R_112892, AUTHOR = {Ruimy, N.}, TITLE = {Il modello lessicale SIMPLE: dal monolingue al bilingue}, YEAR = {2006}, ABSTRACT = {Il modello lessicale PAROLE-SIMPLE ha permesso la realizzazione di lessici elettronici multilivelli armonizzati per le 12 lingue della CE. Successivamente, nell'ambito del progetto nazionale italiano 'Corpora e Lessici dell'Italiano Parlato e Scritto', è stato sviluppato il lessico CLIPS utilizzando sia il modello che i dati del lessico italiano PAROLESIMPLE. CLIPS, la cui costruzione si è appena conclusa, rappresenta attualmente la più vasta risorsa lessicale elettronica dell'italiano. E' adesso allo studio un progetto di utilizzo di questo lessico come input per la creazione di una risorsa lessicale elettronica bilingue italiano-francese. Due scenari alternativi sono al momento in corso di valutazione: 1) il collegamento semi-automatizzato di due lessici elettronici monolingui basati sullo stesso modello, i.e. CLIPS ed il lessico francese PAROLE-SIMPLE; 2) la derivazione di un database lessicale bilingue a partire dal lessico CLIPS e da coppie di sensi corrispondenti IT-FR estratti da un dizionario bilingue. Questa seconda ipotesi si basa sulla ricerca di correlazioni tra l'informazione fornita dagli indicatori di senso nelle entrate bilingui e i vari elementi descrittivi contenuti nel lessico CLIPS.}, PAGES = {279-290}, URL = {https://publications.cnr.it/doc/112892}, ISBN = {84-935340-4-8}, CONFERENCE_NAME = {Tercero Seminario de la Escuela Interlatina de Altos Estudios en Lingüística Aplicada}, CONFERENCE_PLACE = {San Millán de la Cogolla}, CONFERENCE_DATE = {22-25 ottobre 2003}, BOOKTITLE = {La lexicografía plurilingüe en lenguas latinas}, } @TECHREPORT{BINDI_2006_TECHREPORT_BMBGPR_157405, AUTHOR = {Bindi, R. and Marinelli, R. and Biagini, L. and Goggi, S. and Picchi, E. and Rossi, S.}, TITLE = {LE-PAROLE, Italian Corpus Description, Part Available for Distribution}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157405}, } @TECHREPORT{CALZOLARI_2006_TECHREPORT_C_157397, AUTHOR = {Calzolari, N.}, TITLE = {Language Technologies (LT)}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157397}, } @TECHREPORT{CIGNONI_2006_TECHREPORT_C_157398, AUTHOR = {Cignoni, L.}, TITLE = {Filastrocche in inglese per la scuola materna ed elementare}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157398}, } @TECHREPORT{CIGNONI_2006_TECHREPORT_C_157399, AUTHOR = {Cignoni, L.}, TITLE = {Regole Editoriali}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157399}, } @TECHREPORT{MARINELLI_2006_TECHREPORT_M_157401, AUTHOR = {Marinelli, R.}, TITLE = {La terminologia Marittima-un database semantico-lessicale}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157401}, } @TECHREPORT{MONACHINI_2006_TECHREPORT_MSCFB_157402, AUTHOR = {Monachini, M. and Soria, C. and Calzolari, N. and Francopoulo, G. and Bel, N.}, TITLE = {WD of Lexica standard for CD ballot}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157402}, } @TECHREPORT{QUOCHI_2006_TECHREPORT_QMCDS_157403, AUTHOR = {Quochi, V. and Monachini, M. and Calzolari, N. and Del Gratta, R. and Sassolini, E.}, TITLE = {Bio-Lexicon Model and Preliminary ISO Conformant Data Categories}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157403}, } @TECHREPORT{RUIMY_2006_TECHREPORT_RMC_157404, AUTHOR = {Ruimy, N. and Monachini, M. and Calzolari, N.}, TITLE = {Lessico Computazionale Multilivello dell'Italiano PAROLE-SIMPLE-CLIPS}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157404}, } @TECHREPORT{SABA_2006_TECHREPORT_S_157407, AUTHOR = {Saba, A.}, TITLE = {Il lessico del Tractado de la Sphera de Iohannes de Sacrobusto}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157407}, } @TECHREPORT{SABA_2006_TECHREPORT_S_157408, AUTHOR = {Saba, A.}, TITLE = {Il lessico del Arte de marear de Juan de Moya. Ed. 1564}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157408}, } @TECHREPORT{SASSI_2006_TECHREPORT_SC_157406, AUTHOR = {Sassi, M. and Cinini, A.}, TITLE = {Content-analysis dei provvedimenti della sezione disciplinare del C. S. M}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157406}, } @MISC{BARTOLINI_2006_MISC_BDLMMP_151563, AUTHOR = {Bartolini, R. and Dell'Orletta, F. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-to-Knowledge (T2K) Versione 2}, YEAR = {2006}, ABSTRACT = {Versione 2. Text-to-Knowledge (T2K) è una piattaforma software di supporto avanzato alla gestione documentale per la creazione dinamica di repertori terminologici e ontologie di dominio a partire da testi e per l'indicizzazione concettuale di documenti. Il sistema T2K si propone di offrire una batteria integrata di strumenti avanzati di analisi linguistica del testo, analisi statistica e apprendimento automatico del linguaggio, destinati a offrire una rappresentazione accurata del contenuto di una base documentale non strutturata, per scopi di indicizzazione avanzata e navigazione intelligente. I risultati di questo processo di acquisizione sono annotati in forma di metadati XML, offrendo in tal modo la prospettiva di una sempre crescente e diretta interoperabilità con sistemi automatici per la produzione di contenuti digitali selezionati e strutturati dinamicamente su misura, per diversi profili di utenza. Versioni prototipali di T2K sono già operative su alcuni portali della pubblica amministrazione e sono state applicate per l'indicizzazione di contenuti didattici multimediali. E' in corso l'integrazione della tecnologia T2K nel sistema di gestione informatica di documentazione scientifica del CNR.}, KEYWORDS = {text to knowledge, nlp, estrazione terminologica, ontology learning, indicizzazione terminologica}, URL = {https://publications.cnr.it/doc/151563}, } @MISC{BINDI_2006_MISC_B_151555, AUTHOR = {Bindi, R.}, TITLE = {CLIC}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151555}, } @MISC{CAPPELLI_2006_MISC_CBRPNAPF_151554, AUTHOR = {Cappelli, G. and Bozzi, A. and Ruffolo, P. and Passarotti, M. and Nascimento, A. A. and Alberto, P. F. and Pena, A. N. and Frade, S.}, TITLE = {LECTIO (Latim em Exercìcio Contìnuo através de Tecnologia Informàtica Orientada)}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151554}, } @MISC{CECCOTTI_2006_MISC_CS_151558, AUTHOR = {Ceccotti, M. L. and Sassi, M.}, TITLE = {Dall'Archivio Elettronico delle Opere di Carlo Emilio Gadda. Studi e ricerche con strumenti computazionali dell'opera dell'ingegnere-scrittore}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151558}, } @MISC{CIGNONI_2006_MISC_C_157400, AUTHOR = {Cignoni, L.}, TITLE = {Editorial Rules, libraweb}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/157400}, } @MISC{MARINELLI_2006_MISC_MS_151562, AUTHOR = {Marinelli, R. and Spadoni, G.}, TITLE = {MARITERM Database di terminologia marittima}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151562}, } @MISC{MONTEMAGNI_2006_MISC_M_151556, AUTHOR = {Montemagni, S.}, TITLE = {La Treebank Sintattico Semantica dell'Italiano del progetto SI-TAL}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151556}, } @MISC{PICCHI_2006_MISC_PMSCP_151557, AUTHOR = {Picchi, E. and Montemagni, S. and Sassolini, E. and Cucurullo, S. and Paoli, M.}, TITLE = {ALTWEB}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151557}, } @MISC{PICCHI_2006_MISC_PSCSC_151561, AUTHOR = {Picchi, E. and Sassi, M. and Ceccotti, M. L. and Sassolini, E. and Cucurullo, S.}, TITLE = {Linguistic Miner}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151561}, } @MISC{ROVENTINI_2006_MISC_RABCM_151559, AUTHOR = {Roventini, A. and Alonge, A. and Bertagna, F. and Calzolari, N. and Marinelli, R.}, TITLE = {ItalWordNet database semantico-lessicale dell’italiano}, YEAR = {2006}, URL = {https://publications.cnr.it/doc/151559}, } @ARTICLE{BERTAGNA_2005_ARTICLE_BCS_30871, AUTHOR = {Bertagna, F. and Chiran, L. and Simi, M.}, TITLE = {ILC-UniPi Italian QA}, YEAR = {2005}, ABSTRACT = {This paper introduces the general architecture of a prototype for monolingual Italian QA. The adopted strategies, the tools and resources for the linguistic processing are presented, together with the system results. We would like to thank Simone Pecunia and Giuseppe Attardi for their indispensable help and Nicoletta Calzolari and Irina Prodanof for their comments and suggestions. We also thank Roberto Bartolini, Alessandro Lenci, Simonetta Montemagni and Vito Pirrelli for the kind concession of text analysis tools.}, PAGES = {569-580}, URL = {https://publications.cnr.it/doc/30871}, VOLUME = {3491}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{CALZOLARI_2005_ARTICLE_C_64513, AUTHOR = {Calzolari, N.}, TITLE = {Foreword}, YEAR = {2005}, PAGES = {9-17}, URL = {https://publications.cnr.it/doc/64513}, VOLUME = {22-23}, } @ARTICLE{CALZOLARI_2005_ARTICLE_C_64514, AUTHOR = {Calzolari, N.}, TITLE = {Antonio Zampolli and computational linguistics}, YEAR = {2005}, PAGES = {497-517}, URL = {https://publications.cnr.it/doc/64514}, VOLUME = {15 /4}, } @ARTICLE{CALZOLARI_2005_ARTICLE_CI_168525, AUTHOR = {Calzolari, N. and Ide, N.}, TITLE = {Language Resources and Evaluation}, YEAR = {2005}, PAGES = {1-118}, URL = {https://publications.cnr.it/doc/168525}, VOLUME = {39}, PUBLISHER = {Kluwer Academic Publishers (Dordrecht, Paesi Bassi)}, ISSN = {0010-4817}, JOURNAL = {Computers and the humanities (Print)}, } @ARTICLE{IDE_2005_ARTICLE_IC_30868, AUTHOR = {Ide, N. and Calzolari, N.}, TITLE = {Introduction to the Special Inaugural Issue}, YEAR = {2005}, ABSTRACT = {This first issue of Language Resources and Evaluation is dedicated to the memory of Antonio Zampolli, whom few would dispute is the one person who has led the way in promoting and establishing the development of language resources (LR) of all kinds for the past four decades. In this inaugural issue, we have attempted to bring together articles by major figures in the field in order to provide an overview of the history, state of the art, and the future of the creation, annotation, exploitation, evaluation, and distribution of LR. Hopefully, this collection of articles will serve not only as a tribute to Antonio, but also as a framework out of which this journal – which almost certainly would not have existed were it not for him – can grow.}, KEYWORDS = {Antonio Zampolli, language resources and evaluation}, PAGES = {1-7}, URL = {https://publications.cnr.it/doc/30868}, VOLUME = {39}, PUBLISHER = {Kluwer Academic Publishers (Dordrecht, Paesi Bassi)}, ISSN = {0010-4817}, JOURNAL = {Computers and the humanities (Print)}, } @ARTICLE{LENCI_2005_ARTICLE_LMP_64502, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Acquiring and Representing Meaning: Theoretical and Computational Perspectives}, YEAR = {2005}, PAGES = {19-66}, URL = {https://publications.cnr.it/doc/64502}, VOLUME = {22-23}, } @ARTICLE{MAEGAARD_2005_ARTICLE_MCCO_30872, AUTHOR = {Maegaard, B. and Choukri, K. and Calzolari, N. and Odijk, J.}, TITLE = {ELRA – European Language Resources Association-Background, Recent Developments and Future Perspectives}, YEAR = {2005}, ABSTRACT = {The European Language Resources Association (ELRA) was founded in 1995 with the mission of providing language resources (LR) to European research institutions and companies. In this paper we describe the background, the mission and the major activities since then.}, KEYWORDS = {evaluation, language resources, production, standards, validation}, PAGES = {9-23}, URL = {https://publications.cnr.it/doc/30872}, VOLUME = {39}, DOI = {10.1007/s10579-005-2692-5}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{MONACHINI_2005_ARTICLE_MS_64515, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Building Multilingual Terminological Lexicons for Less Widely Available Languages}, YEAR = {2005}, ABSTRACT = {Availability of Linguistic Resources for the development of Human Language Technology applications is nowadays recognized as a critical issue with both political and economic impact and implications on the sphere of cultural identity. This paper reports about the experience gained during the INTERA European project for the production of multilingual terminological lexicons for less widely available languages, i.e. those languages that suffer from poor representation over the net and from scarce computational resources, but yet are requested by the market. It discusses the procedure followed within the project, focuses on the problems faced which had an impact on the initial goals, presents the necessary modifications that resulted from these problems, evaluates the market needs as attested by various surveys, and describes the methodology that is proposed for the efficient production of Multilingual Terminological Lexicons.}, PAGES = {251-261}, URL = {https://publications.cnr.it/doc/64515}, VOLUME = {15}, PUBLISHER = {Polish Scientific Publishers PWN (Warszawa, Polonia)}, ISSN = {1230-2384}, JOURNAL = {Archives of Control Sciences}, } @ARTICLE{PARDELLI_2005_ARTICLE_PO_255647, AUTHOR = {Pardelli, G. and Orsolini, P.}, TITLE = {Bibliography of Antonio Zampolli (from 1962 to 2004)}, YEAR = {2005}, KEYWORDS = {Bibliography, Computational Linguistics}, PAGES = {501-517}, URL = {https://publications.cnr.it/doc/255647}, VOLUME = {15 (LI)}, PUBLISHER = {Polish Scientific Publishers PWN (Warszawa, Polonia)}, ISSN = {1230-2384}, JOURNAL = {Archives of Control Sciences}, } @BOOK{LENCI_2005_BOOK_LMP_136436, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Acquiring and Representing Word Meaning: Computational perspectives}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/136436}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Pisa-Roma, ITA)}, ISBN = {88-8147-413-1}, } @BOOK{LENCI_2005_BOOK_LMP_136437, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Testo e computer-Elementi di linguistica computazionale}, YEAR = {2005}, ABSTRACT = {In che modo il computer può aiutarci a comprendere come funziona la nostra lingua? Cosa significa analizzare un testo con l'aiuto di un calcolatore? In che misura possiamo estendere le potenzialità del computer rendendolo capace di interagire con gli utenti umani nella loro lingua' Queste e altre domande sono l'oggetto di indagine della linguistica computazionale, una disciplina che ha al suo centro proprio il rapporto tra lingua e computer. Il libro fornisce gli elementi di base della linguistica computazionale partendo da un interesse primario per il testo, la sua struttura e il suo contenuto. Il volume propone una sintesi equilibrata e accessibile tra sapere e fare, nozioni di base e loro applicazione, ed è destinato in primo luogo agli studenti delle facoltà umanistiche e scientifiche interessati all'interazione tra scienze umane e informatica, ma anche agli studiosi che vogliano imparare a usare il computer come strumento di ricerca sul linguaggio.}, KEYWORDS = {Linguistica Computazionale}, PAGES = {255}, URL = {https://publications.cnr.it/doc/136437}, PUBLISHER = {Carocci (Roma, ITA)}, ISBN = {8843034251}, } @INCOLLECTION{BERNDT_2005_INCOLLECTION_BBCT_136439, AUTHOR = {Berndt, A. and Bianchi, F. and Cignoni, L. and Turrini, G.}, TITLE = {COME IN: Children On-line in Multimedia Environments and Intercultural Networks}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/136439}, PUBLISHER = {Wit Press (Southampton, GBR)}, } @INCOLLECTION{QUOCHI_2005_INCOLLECTION_Q_136443, AUTHOR = {Quochi, V.}, TITLE = {Issues on the acquisition of Italian complex nominals from text corpora: a computational approach combining syntactic and semantic information}, YEAR = {2005}, ABSTRACT = {The paper addressed the issue of Italian Complex Nominals from an (automatic) acquisition and representational perspective. Just like English noun compounds, ICNs blur the distinction between the syntactic and the lexical component because they are (at least) partially non-transparent but, nevertheless, show regularities both at the syntactic and at the semantic level. This contribution reports on an experiment conducted to identify the highest possible number of productive syntactic-semantic patterns of ICN formation, and to make explicit the particular semantic relation that exists between the head of the phrase and its modifier(s). I rely on a non-traditional generative theory of the lexicon, namely the Generative Lexicon, as a model for the representation/ interpretation of ICNs which provides us with a structured representation of the internal semantics of lexical items. The experiment explored the representational power of the qualia structure with respect to ICNs.}, KEYWORDS = {complex nominals, multiword expressions, lexicon, lexical representation, generative lexicon}, PAGES = {153-174}, URL = {https://publications.cnr.it/doc/136443}, PUBLISHER = {Edizioni Plus srl (Pisa, ITA)}, ISBN = {9788884922366}, BOOKTITLE = {Studies in the Semantics of Lexical Combinatory Patterns}, EDITOR = {Bertuccelli, M.}, } @EDITORIAL{PIRRELLI_2005_EDITORIAL_PM_146069, AUTHOR = {Pirrelli, V. and Montemagni, S.}, TITLE = {Acquisition and Representation of Word Meaning: Theoretical and computational perspectives}, YEAR = {2005}, KEYWORDS = {Lexical semantics, Distributional semantics, Lexicon acquisition}, URL = {https://publications.cnr.it/doc/146069}, VOLUME = {XXII-XXIII}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Pisa-Roma, ITA)}, ISBN = {88-8147-413-1}, } @INPROCEEDINGS{BARTOLINI_2005_INPROCEEDINGS_BGLMP_84576, AUTHOR = {Bartolini, R. and Giorgetti, D. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Automatic Incremental Term Acquisition from Domain Corpora}, YEAR = {2005}, ABSTRACT = {We describe a technique for the acquisition of terms from Italian domain text corpora, which relies both on sophisticated linguistic analysis and on statistical measures applied to linguistically processed text rather than to raw text as it is usually the case. The main advantage of this technique is that minimal a priori knowledge of term structure is required, thus allowing to explore and discover terms in a given domain without imposing a strict pattern matching structure on them, and also to easily extend it to different domains. The approach we present in this paper is incremental as it may be iterated to discover terms of increasing complexity built on top of terms discovered in the previous iteration. The reason why it is convenient to adopt such an incremental approach is that it allows to "clean" data from noise in the first step, elicitating the constituent terms, and then to refine term acquisition on "skimmed" term data.}, PAGES = {293-300}, URL = {https://publications.cnr.it/doc/84576}, CONFERENCE_NAME = {7th International conference on Terminology and Knowledge Engineering (TKE2005)}, CONFERENCE_PLACE = {Copenhagen}, CONFERENCE_DATE = {2005}, BOOKTITLE = {Proceedings of TKE 2005-7th International Conference on Terminology and Knowledge Engineering}, } @INPROCEEDINGS{BIAGIOLI_2005_INPROCEEDINGS_BFPMS_172458, AUTHOR = {Biagioli, C. and Francesconi, E. and Passerini, A. and Montemagni, S. and Soria, C.}, TITLE = {Automatic semantics extraction in law documents}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/172458}, CONFERENCE_NAME = {Tenth International Conference on Artificial Intelligence and Law (ICAIL 2005)}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{BIANCHI_2005_INPROCEEDINGS_BT_84593, AUTHOR = {Bianchi, F. and Turrini, G.}, TITLE = {COME IN: un sito per l'insegnamento/apprendimento delle lingue straniere nella scuola primaria}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84593}, CONFERENCE_NAME = {Didamatica 2005}, CONFERENCE_PLACE = {Potenza}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_84577, AUTHOR = {Calzolari, N.}, TITLE = {Language Resources and Content Interoperability. Technical, strategic and political issues for a new generation of Language Resources}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84577}, CONFERENCE_NAME = {IX Simposio Internacional de Comunicación Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_84578, AUTHOR = {Calzolari, N.}, TITLE = {Language Resources: priorities and challenges}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84578}, CONFERENCE_NAME = {Symposium on Natural Processing and Image Recognition}, CONFERENCE_PLACE = {Kyoto}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_84594, AUTHOR = {Calzolari, N.}, TITLE = {Antonio Zampolli-A life Computational Linguistics}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84594}, CONFERENCE_NAME = {L\&T'05-2nd Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan, Poland}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_84646, AUTHOR = {Calzolari, N.}, TITLE = {Language Resources, Language Technology, Linguistics: isn’t this too narrow?}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84646}, CONFERENCE_NAME = {Human Language Technologies as a Challenge for Computer Science and Linguistics, 2nd L\&T'05}, CONFERENCE_PLACE = {Poznan (Poland)}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_CS_84595, AUTHOR = {Calzolari, N. and Soria, C.}, TITLE = {A new paradigm for an Open Distributed Language Resource Infrastructure: the case of Computational Lexicons}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84595}, CONFERENCE_NAME = {Knowledge Collection from Volunteer Contributors. Papers from the 2005 AAAI Spring Symposium}, CONFERENCE_PLACE = {Stanford}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CASELLI_2005_INPROCEEDINGS_CP_84596, AUTHOR = {Caselli, T. and Prodanof, I.}, TITLE = {A corpus-based model for bridging anaphora resolution in Italian}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84596}, CONFERENCE_NAME = {Conference RANLP 2005}, CONFERENCE_PLACE = {Borovets, Bulgaria}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CUTUGNO_2005_INPROCEEDINGS_CMRR_175675, AUTHOR = {Cutugno, P. and Marconi, L. and Ratti, D. and Rolando, C.}, TITLE = {Informaciones estadísticas en CALIS: Corpus y Archivo Léxical del Italiano Escrito}, YEAR = {2005}, ABSTRACT = {Informaciones estadísticas en CALIS: Corpus y Archivo Léxical del Italiano Escrito. CALIS es una base de datos de la lengua italiana escrita de 3.798.275 palabras y esta formada de textos escritos de varios generes pesados oportunamente y selecionados en tres distintos sectores: diaros, periodicos y libros. Este producto representa el italiano efectivamente leido mas bien que toda la lengua italiana escrita. Esta eleccion se justifica en cuanto se deseaba construir un corpus, y en consecuencia un lexico de frecuencia, que se acercara los mas posible al lexico mental de un hablante de media cultura y no a un diccionario de la lengua italiana. Los diarios con 1.836.119 palabras estan extraido de tres diarios que son los mas importantes y leidos en Italia: Il Corriere Della Sera, Repubblica, La Stampa. En cada diario se han elegidos textos de 9 diferentes subsectores: economia, cronica local, cronica mundana, cronica negra, politica exterior, politica interior, ciencia, espectaculo y deporte. El sector de los periodicos esta constituido por 1.306.653 palabras elejdas entre 12 differentes subsectores: arte-ciencia-tecnica, auto-nautica, ninos-muchachos, casa-hobby, feminil, fotonovelas, informacion general, cronica mundana, radio-television, deporte, viajes-ecologia y otro. El sector de los libros esta constituido por 655.503 palabras elejdas entre 13 generos literarios: arte, ninos, fantascienza, gialli espionaje, hobby y viajes, narrativa clasica, narrativa moderna, rosa, ensaystica, ciencias naturales y exactas, ciencias sociales y humanas, teatro y poesia. En este trabajo se analizan las informaciones extraidas del lexico de frecuencia del corpus lematizado de CALIS (Corpus e Archivio Lessicale Italiano Scritto) relativamente a las diferentes categorias gramaticales, a los datos de frecuencia y de dispercion de cada lemma y de cada forma en los distintos sectores y subsectores. Se efectuan por algunos sectores analisis sobre el acrecentamiento de las formas/lemas con el fin de efectuar una evaluacion sobre las posibles y corectas dimensiones de un corpus para ser representativo de la lengua analizada.}, KEYWORDS = {Italiano, Analisi Statistica, Lessico di Frequenza}, PAGES = {361-365}, URL = {http://www.santiago.cu/hosting/linguistica/descargar.php?d=194}, PUBLISHER = {Centro de linguística aplicada, Ministerio de ciencia, tecnología y medio ambiente (Santiago de Cuba, CUB)}, ISBN = {959-7174-04-9}, CONFERENCE_NAME = {IX Simposio Internacional Comunicación Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {24-28 Gennaio 2005}, BOOKTITLE = {Actas-I IX Simposio Internacional Comunicación Social}, EDITOR = {Moreno, C. A. and Bolaños, J. P. and Rojas, L. H. and Miyares, L. R.}, } @INPROCEEDINGS{DELLORLETTA_2005_INPROCEEDINGS_DLMP_84579, AUTHOR = {Dell'Orletta, F. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Climbing the path to grammar: a maximum entropy model of subject/object learning}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84579}, CONFERENCE_NAME = {Psychocomputational Models of Human Language Acquisition (PsychoCompLA-2005)}, CONFERENCE_PLACE = {Ann Arbour (USA)}, } @INPROCEEDINGS{GAVRILIDOU_2005_INPROCEEDINGS_GLMPS_84580, AUTHOR = {Gavrilidou, M. and Labropoulou, P. and Monachini, M. and Piperidis, S. and Soria, C.}, TITLE = {Building Multilingual Language Resources}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84580}, ISBN = {954-91743-2-8}, CONFERENCE_NAME = {Language and Speech Infrastructure for Information Access in the Balkan Countries}, CONFERENCE_PLACE = {Borovets, Bulgaria}, CONFERENCE_DATE = {25/2/2005}, } @INPROCEEDINGS{MARCONI_2005_INPROCEEDINGS_MRRC_84581, AUTHOR = {Marconi, L. and Ratti, D. and Rolando, C. and Cutugno, P.}, TITLE = {Hemeroteca telemática: instrumento para la organizacion y la circulación de la información}, YEAR = {2005}, ABSTRACT = {En estos últimos anos los progresos tecnológicos han aumentado, de forma exponencial, la capacidad de transmitir la información y han facilitado también la posibilidad de archivar los datos y recuperarlos. La hemeroteca telemática es una propuesta para el desarrollo y la realización de un archivo digital exhaustivo de todos los diarios italianos por medio de tecnologías adelantadas que faciliten el acceso, el recupero y la utilización de la información. La exhaustividad del archivo y la metodología de acceso a la informacion son las características principales del proyecto. La característica de la hemeroteca actual es de estar formada por un material muy deteriorable cual el papel, especialmente el papel usado para los diarios, muy sutil y sometido a usura en la sustancia y en el color. Las películas han resuelto el problema de la conservación de los datos pero solo parcialmente sea por los costes muy altos sea porque no representan un real auxilio a la búsqueda de la información. La realización de una hemeroteca digital podría ser además un ejemplo de prueba para el desarrollo de metodologías y instrumentos lingüísticos en sectores especializados de la información. De hecho los diarios representan un sector de la información bastante bien estructurado ya que están divididos en planas según el contenido, crónica, política del interior y del exterior, deporte, cultura, entretenimiento, etc.; además llevan informaciones de tipo no textual como fotos y viñitas satíricas que constituyen parte integrante de la información. De otro lado la mayor facilidad de acceso a enormes cantidades de información no significa que la información sea realmente utilizable por los expertos del sector o por el público. La explosión de la información en la red ha bien puesto en evidencia el problema de la saturación del canal por falta de instrumentos y estrategias aptos a la búsqueda y selección de la información pertinente. Una base de datos de la información periodística podría en fin constituir un archivo viviente, una testificación de la sociedad, un recurso esencial y altamente eficiente para nuestro país también desde un punto de vista económico, una empresa capaz de producir nuevos posibilidades trabajo y nuevas competencias, un instrumento en condiciones de contestar adecuadamente al desafió de la globalización. En nuestro estudio se analiza la metodología para la construcción de una hemeroteca italiana, se evidencian los problemas, se hace algunas hipótesis hacia las soluciones y se traza el camino para dar empuje a esta obra.}, KEYWORDS = {Italiano, Quotidiani, Emeroteca}, PAGES = {601-604}, URL = {http://www.santiago.cu/hosting/linguistica/descargar.php?d=211}, PUBLISHER = {Centro de linguística aplicada, Ministerio de ciencia, tecnología y medio ambiente (Santiago de Cuba, CUB)}, ISBN = {959-7174-05-7}, CONFERENCE_NAME = {IX Simposio Internacional de Comunicación Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {24-28 Gennaio 2005}, BOOKTITLE = {Actas-II IX Simposio Internacional Comunicación Social}, EDITOR = {Moreno, C. A. and Bolaños, J. P. and Rojas, L. H. and Miyares, L. R. and Gutiérrez, Y. C.}, } @INPROCEEDINGS{MARINELLI_2005_INPROCEEDINGS_MBR_84582, AUTHOR = {Marinelli, R. and Bindi, R. and Roventini, A.}, TITLE = {Metonymic and Metaphorical Uses of Proper Names}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84582}, CONFERENCE_NAME = {IX Simposio Internacional de Comunicación Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{MARINELLI_2005_INPROCEEDINGS_MR_84583, AUTHOR = {Marinelli, R. and Roventini, A.}, TITLE = {Some Considerations about the Italian Maritime Lexicon Structuring}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84583}, CONFERENCE_NAME = {IX Simposio Internacional de Comunicación Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{MONACHINI_2005_INPROCEEDINGS_MC_84585, AUTHOR = {Monachini, M. and Calzolari, N.}, TITLE = {Initiatives towards the integration of Lexicons: MILE is taking steps forward}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84585}, CONFERENCE_NAME = {Machine Translation}, CONFERENCE_PLACE = {Kothen, Germany}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{MONACHINI_2005_INPROCEEDINGS_MS_84597, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Building Multilingual Terminological Lexicons for Less Widely Available Languages}, YEAR = {2005}, ABSTRACT = {Availability of Linguistic Resources for the development of Human Language Technology applications is nowadays recognized as a critical issue with both political and economic impact and implications on the sphere of cultural identity. This paper reports about the experience gained during the INTERA European project for the production of multilingual terminological lexicons for less widely available languages, i.e. those languages that suffer from poor representation over the net and from scarce computational resources, but yet are requested by the market. It discusses the procedure followed within the project, focuses on the problems faced which had an impact on the initial goals, presents the necessary modifications that resulted from these problems, evaluates the market needs as attested by various surveys, and describes the methodology that is proposed for the efficient production of Multilingual Terminological Lexicons.}, PAGES = {129-133}, URL = {https://publications.cnr.it/doc/84597}, PUBLISHER = {IMPRESJA Wydawnictwa Elektroniczne S. A (Poznan, POL)}, ISBN = {83-7111-341-2}, CONFERENCE_NAME = {2nd Language \& Technology Conference Human Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan}, CONFERENCE_DATE = {21-23 Aprile 2005}, BOOKTITLE = {2nd Language \& Technology Conference Human Language Technologies as a Challenge for Computer Science and Linguistics Poznan}, EDITOR = {Vetulani, Z.}, } @INPROCEEDINGS{PARDELLI_2005_INPROCEEDINGS_PO_84598, AUTHOR = {Pardelli, G. and Orsolini, P.}, TITLE = {"Special Session in Memory of Antonio Zampolli"}, YEAR = {2005}, ABSTRACT = {It is certain that this bibliography needs to be further updates but it is with pleasure that we present it during this Special Session dedicated to Antonio Zampolli, along with a a short Curriculum vitae. A special thanks goes to Nicoletta Calzolari , Director of the Institute of Computational Linguistics, who always encouraged and supported our work with precious advices.}, PAGES = {1-50}, URL = {https://publications.cnr.it/doc/84598}, CONFERENCE_NAME = {L\&T'05-2nd Language Technologies as a Challenge for Computer Science and Linguistics in Memory of Antonio Zampolli}, CONFERENCE_PLACE = {Poznan}, CONFERENCE_DATE = {april 21-23, 2005}, } @INPROCEEDINGS{RUIMY_2005_INPROCEEDINGS_RBC_84599, AUTHOR = {Ruimy, N. and Bouillon, P. and Cartoni, B.}, TITLE = {Inferring a Semantically Annotated Generative French Lexicon from an Italian Lexical Resource}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84599}, CONFERENCE_NAME = {GL 2005, Third International Workshop on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Genève}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{RUIMY_2005_INPROCEEDINGS_RR_84600, AUTHOR = {Ruimy, N. and Roventini, A.}, TITLE = {Towards the Linking of two Electronic Lexical Databases of Italian}, YEAR = {2005}, ABSTRACT = {In this paper we address the issue of the semiautomatic link of ItalWordNet and PAROLE/SIMPLE/CLIPS, two electronic lexical databases of Italian language, based on different lexical semantic models with their own underlying principles and peculiarities. Nevertheless, the two resources present many compatible aspects that led us to take their analysis further, convinced that a considerable gain could be achieved by their link. In this paper, an outline of the mapping of both their ontological frameworks and semantic relations is provided, the linking methodology and related problems are then described, finally the reciprocal benefits and enhancements the two lexicons would gain from their linking are illustrated.}, URL = {https://publications.cnr.it/doc/84600}, CONFERENCE_NAME = {L\&T'05-2nd Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznan}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{SORIA_2005_INPROCEEDINGS_S_84584, AUTHOR = {Soria, C.}, TITLE = {Constraints on the use of connectives in discourse}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84584}, CONFERENCE_NAME = {First International Symposium on the Exploration and Modelling of Meaning}, CONFERENCE_PLACE = {Biarritz, France}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{SORIA_2005_INPROCEEDINGS_SM_84601, AUTHOR = {Soria, C. and Monachini, M.}, TITLE = {Methods, Models and Standardization Issues for the Creation of Linguistic Resources: the Case of Under-Represented Languages}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/84601}, CONFERENCE_NAME = {TALN \& RECITAL 2005: 12ème conférence annuelle sur le Traitement Automatique des Langues Naturelles}, CONFERENCE_PLACE = {Dourdan-France}, CONFERENCE_DATE = {6-10 Giugno 2005}, } @INPROCEEDINGS{BOZZI_2005_INPROCEEDINGS_B_112929, AUTHOR = {Bozzi, A.}, TITLE = {Electronic edition and digital textual criticism: methodological aspects}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112929}, CONFERENCE_NAME = {Edition critique/èdition électronique: bases de données textuelles. Pratique et théorie}, CONFERENCE_PLACE = {Lyon}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{BOZZI_2005_INPROCEEDINGS_B_112930, AUTHOR = {Bozzi, A.}, TITLE = {Digital libraries and scholarly editing: the SPWC System}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112930}, CONFERENCE_NAME = {Alte Archive – Noue Technologien}, CONFERENCE_PLACE = {Wien, Goettweig}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112901, AUTHOR = {Calzolari, N.}, TITLE = {Language Resources and Content Interoperability – Technical, strategic and political issues for a new generation of Language Resources}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112901}, CONFERENCE_NAME = {9th International Symposium on Social Communication}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112902, AUTHOR = {Calzolari, N.}, TITLE = {Building Blocks for Semantic Annotation: towards Content Interoperability}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112902}, CONFERENCE_NAME = {6oth anniversary of Bente Maegaard}, CONFERENCE_PLACE = {Copenhagen}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112903, AUTHOR = {Calzolari, N.}, TITLE = {Antonio Zampolli-A life for Computational Linguistics}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112903}, CONFERENCE_NAME = {Human Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznañ}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112904, AUTHOR = {Calzolari, N.}, TITLE = {Language Resources, Language Technology, Linguistics: isn’t this too narrow?}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112904}, CONFERENCE_NAME = {Human Language Technologies as a Challenge for Computer Science and Linguistics}, CONFERENCE_PLACE = {Poznañ}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112905, AUTHOR = {Calzolari, N.}, TITLE = {Building blocks for semantic markup: towards a framework for content interoperability}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112905}, CONFERENCE_NAME = {GL ‘2005 Third International Workshop on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Genève}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112906, AUTHOR = {Calzolari, N.}, TITLE = {Language Resources: Standardisation, re-use, automated LR acquisition}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112906}, CONFERENCE_NAME = {Science \& Technology Forum on Multilingualism}, CONFERENCE_PLACE = {Luxembourg}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112907, AUTHOR = {Calzolari, N.}, TITLE = {Language Technology and the Semantic Web}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112907}, CONFERENCE_NAME = {ECOR – European Centre for Ontological Research Inaugural Meeting}, CONFERENCE_PLACE = {Saarbrücken}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112908, AUTHOR = {Calzolari, N.}, TITLE = {Language Resources: towards a framework for content interoperability}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112908}, CONFERENCE_NAME = {AIIA Workshop on Current challenges in speech and natural language processing}, CONFERENCE_PLACE = {Milano}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112909, AUTHOR = {Calzolari, N.}, TITLE = {Language resources: priorities and challenges}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112909}, CONFERENCE_NAME = {International Symposium on Natural Language Processing and Image Recognition}, CONFERENCE_PLACE = {Kyoto}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112910, AUTHOR = {Calzolari, N.}, TITLE = {Linguistica computazionale: sinergie con il progetto DLM}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112910}, CONFERENCE_NAME = {Convegno Lessicologia e Metalinguaggio}, CONFERENCE_PLACE = {Macerata}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112911, AUTHOR = {Calzolari, N.}, TITLE = {Linguistica Computazionale: che cosa ci dice della lingua?}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112911}, CONFERENCE_NAME = {Convegno Istanze epistemologiche e ontologiche emergenti dalle scienze informatiche}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112912, AUTHOR = {Calzolari, N.}, TITLE = {An Infrastructure for Language Technology and more}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112912}, CONFERENCE_NAME = {Language Documentation and Language Description}, CONFERENCE_PLACE = {Bielefeld}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_C_112913, AUTHOR = {Calzolari, N.}, TITLE = {Technical and Strategic Issues on Language Resources for a Research Infrastructure}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112913}, CONFERENCE_NAME = {Large-scale Knowledge Resources (LKR2006)}, CONFERENCE_PLACE = {Tokyo}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{CALZOLARI_2005_INPROCEEDINGS_CBLM_157387, AUTHOR = {Calzolari, N. and Bertagna, F. and Lenci, A. and Monachini, M.}, TITLE = {Boosting Lexical Resources for the Semantic Web. Generative Lexicon and Lexicon Interoperability}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157387}, CONFERENCE_NAME = {GL2005-3rd International Workshop on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Genève, Switzerland}, CONFERENCE_DATE = {19-21 Maggio 2005}, } @INPROCEEDINGS{CASELLI_2005_INPROCEEDINGS_C_112915, AUTHOR = {Caselli, T.}, TITLE = {An annotation scheme for bridging anaphors and its evaluation}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112915}, CONFERENCE_NAME = {International Conference-Language resources and linguistic research: Typology, Second Language Acquisition, English Linguistics}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{PIRRELLI_2005_INPROCEEDINGS_P_112918, AUTHOR = {Pirrelli, V.}, TITLE = {On the cognitive autonomy of morphological processing}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112918}, CONFERENCE_NAME = {4èmes Décembrettes}, CONFERENCE_PLACE = {Toulouse}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{PIRRELLI_2005_INPROCEEDINGS_PH_112921, AUTHOR = {Pirrelli, V. and Herreros, I.}, TITLE = {Learning Morphology by Itsel}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112921}, CONFERENCE_NAME = {5th Mediterranean Morphology Meeting}, CONFERENCE_PLACE = {Fréjus. France}, CONFERENCE_DATE = {2005}, } @INPROCEEDINGS{PIRRELLI_2005_INPROCEEDINGS_PL_112922, AUTHOR = {Pirrelli, V. and Lenci, A.}, TITLE = {Dalla raccolta dati alla diagnostica: prospettive per una}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/112922}, CONFERENCE_NAME = {XIII Congresso della Società Italiana di Psicofisiologia}, CONFERENCE_PLACE = {Marina di Carrara}, CONFERENCE_DATE = {2005}, } @TECHREPORT{BALDI_2005_TECHREPORT_BBCP_157364, AUTHOR = {Baldi, M. and Bozzi, A. and Canziani, G. and Passarotti, M.}, TITLE = {I Contradicentium Medicorum Libri}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157364}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BCLMP_157365, AUTHOR = {Bartolini, R. and Caracciolo, C. and Lenci, A. and Marchi, S. and Pirrelli, V.}, TITLE = {Motore semantico. Documento di progettazione e sviluppo}, YEAR = {2005}, ABSTRACT = {Il presente documento descrive architettura, funzionalità e algoritmo di un componente software dedicato, designato come "Motore Semantico", che ha lo scopo di produrre rappresentazioni logico-concettuali, ontologicamente interpretate, di interrogazioni in linguaggio naturale su una base di dati di tipo anche GIS.}, KEYWORDS = {NLP}, PAGES = {1-42}, URL = {https://publications.cnr.it/doc/157365}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BGMM_157366, AUTHOR = {Bartolini, R. and Giorgetti, D. and Marchi, S. and Montemagni, S.}, TITLE = {ILC-CNR Contribution to Deliverable 4. 1}, YEAR = {2005}, ABSTRACT = {The goal of the semantic annotation is the annotation of entities and relations starting from input documents conformant with the harmonisation output schema as defined within WP3. This harmonisation schema will focus on the structural and logical organisation of the documents, while WP4 will concentrate on the annotation of textual entities and image elements. The results of semantic annotation are intended to populate the domain ontology.}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157366}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMMP_157367, AUTHOR = {Bartolini, R. and Lenci, A. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Personalizzazione degli Italian NLP tools}, YEAR = {2005}, ABSTRACT = {Il presente documento intende offrire criteri e risultati della fase di personalizzazione dei moduli per l'analisi automatica del testo (Italian NLP tools o "AnITA") all'interno dell'architettura prevista nell'ambito del progetto FuLL.}, KEYWORDS = {NLP}, PAGES = {13}, URL = {https://publications.cnr.it/doc/157367}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMP_157369, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Modellazione del motore sintattico e delle strutture dati di supporto}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157369}, } @TECHREPORT{BARTOLINI_2005_TECHREPORT_BLMMP_157370, AUTHOR = {Bartolini, R. and Lenci, L. and Marchi, S. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: Acquisizione semi-automatica di ontologie per l'indicizzazione semantica di documenti}, YEAR = {2005}, ABSTRACT = {Text-2-Knowledge, Acquisizione semi-automatica di ontologie per l'indicizzazione semantica di documenti}, KEYWORDS = {nlp, terminology extraction}, URL = {https://publications.cnr.it/doc/157370}, } @TECHREPORT{BOZZI_2005_TECHREPORT_B_157372, AUTHOR = {Bozzi, A.}, TITLE = {COMTOOCI project}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157372}, } @TECHREPORT{CUCURULLO_2005_TECHREPORT_CMPPS_157373, AUTHOR = {Cucurullo, S. and Montemagni, S. and Paoli, M. and Picchi, E. and Sassolini, E.}, TITLE = {Atlante Lessicale Toscano in rete (ALT-Web). Relazione finale}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157373}, } @TECHREPORT{DECLERCK_2005_TECHREPORT_DKBM_157374, AUTHOR = {Declerck, T. and Kessler, M. and Bel, N. and Monachini, M.}, TITLE = {Evaluation of initiatives for morpho-syntactic and syntactic annotation}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157374}, } @TECHREPORT{FRANCOPOULO_2005_TECHREPORT_FBBDMBSG_157377, AUTHOR = {Francopoulo, G. and Bontcheva, K. and Bunt, H. and Declerck, T. and Monachini, M. and Budin, G. and Schiffrin, A. and Gillam, L.}, TITLE = {Periodic Progress Report}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157377}, } @TECHREPORT{FRANCOPOULO_2005_TECHREPORT_FBBDMBSG_157378, AUTHOR = {Francopoulo, G. and Bontcheva, K. and Bunt, H. and Declerck, T. and Monachini, M. and Budin, G. and Schiffrin, A. and Gillam, L.}, TITLE = {Annual Progress Report}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157378}, } @TECHREPORT{FRANCOPOULO_2005_TECHREPORT_FBMN_157376, AUTHOR = {Francopoulo, G. and Bunt, H. and Monachini, M. and Nioche, J.}, TITLE = {Risk Management Plan}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157376}, } @TECHREPORT{FRANCOPOULO_2005_TECHREPORT_FGCMBPS_157379, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {Language Resource Management – Lexical Markup Framework}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157379}, } @TECHREPORT{GIORGETTI_2005_TECHREPORT_GMM_157380, AUTHOR = {Giorgetti, D. and Marchi, S. and Montemagni, S.}, TITLE = {ILC-CNR Contribution to Deliverable 5. 1}, YEAR = {2005}, ABSTRACT = {This document describes the high level infrastructure designed as part of the project VIKEF for creating a Virtual Information and Knowledge Environment (VIKE), namely an environment made up of explicit representation of the information and knowledge implicitly contained in one or more collections of Information-Content-Knowledge (ICK) resources, and of a collection of services operating on this explicit representation of information and knowledge; it is a virtual environment, as the representation and the services for accessing information and knowledge is almost completely independent from the physical properties of the original data.}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157380}, } @TECHREPORT{LENCI_2005_TECHREPORT_LMP_157381, AUTHOR = {Lenci, A. and Marchi, S. and Pirrelli, V.}, TITLE = {Motore del dialogo. Documento di progettazione e sviluppo}, YEAR = {2005}, ABSTRACT = {Il presente documento intende offrire i criteri generali e le funzionalità di base relativi alla progettazione del motore di dialogo nell'ambito del progetto FuLL}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157381}, } @TECHREPORT{LENCI_2005_TECHREPORT_LPS_157382, AUTHOR = {Lenci, A. and Pirrelli, V. and Soria, C.}, TITLE = {Modellazione del motore di dialogo e delle strutture dati di supporto}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157382}, } @TECHREPORT{MAMMINI_2005_TECHREPORT_MUM_157383, AUTHOR = {Mammini, M. and Ulivieri, M. and Monachini, M.}, TITLE = {Unified Lexica: Common sample lexicon and harmonized morpho-syntactic specifications between PAROLE and LCStar}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157383}, } @TECHREPORT{MARCHI_2005_TECHREPORT_MM_157384, AUTHOR = {Marchi, S. and Montemagni, S.}, TITLE = {ILC-CNR Contribution to Deliverable 3. 1}, YEAR = {2005}, ABSTRACT = {This document presents the first set of knowledge and content acquisition components. Starting from the Annotation Schema definition, it will then describe the Harmonization support and the Annotation components, as well as the various resources needed all along the current chain.}, KEYWORDS = {NLP}, URL = {https://publications.cnr.it/doc/157384}, } @TECHREPORT{MONACHINI_2005_TECHREPORT_MSCDW_157385, AUTHOR = {Monachini, M. and Soria, C. and Choukri, K. and Declerck, T. and Wittenburg, P.}, TITLE = {Final Evaluation Report}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157385}, } @TECHREPORT{MONACHINI_2005_TECHREPORT_MSUCDM_157386, AUTHOR = {Monachini, M. and Soria, C. and Ulivieri, M. and Calzolari, N. and Declerck, T. and Mammini, M.}, TITLE = {Evaluation of existing standards for NLP Lexica: Proposal for Candidate Data Categories}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157386}, } @TECHREPORT{SASSI_2005_TECHREPORT_SC_171621, AUTHOR = {Sassi, M. and Cinini, A.}, TITLE = {L'archivio della Disciplinare come deposito per la content-analysis}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/171621}, } @TECHREPORT{SASSOLINI_2005_TECHREPORT_SE_373510, AUTHOR = {Sassolini, E. and Evangelista, A.}, TITLE = {P8/L488-01: report tecnico di progetto}, YEAR = {2005}, ABSTRACT = {Diffusione della cultura e valorizzazione del patrimonio letterario della lingua italiana e della lingua araba attraverso una diffusione telematica di banche dati letterarie.}, KEYWORDS = {trattamento della lingua araba, applicazione web}, PAGES = {12}, URL = {https://publications.cnr.it/doc/373510}, } @MISC{BARTOLINI_2005_MISC_BDGMLMP_151548, AUTHOR = {Bartolini, R. and Dell'Orletta, F. and Giorgetti, D. and Marchi, S. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-to-Knowledge (T2K)}, YEAR = {2005}, ABSTRACT = {Piattaforma di estrazione e indicizzazione terminologica.}, KEYWORDS = {NLP, estrazione terminologica}, URL = {https://publications.cnr.it/doc/151548}, } @MISC{BARTOLINI_2005_MISC_BMLMP_151550, AUTHOR = {Bartolini, R. and Marchi, S. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {NLPtools}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151550}, } @MISC{BERTAGNA_2005_MISC_B_151539, AUTHOR = {Bertagna, F.}, TITLE = {Sistema di Open-Domain Question Answering per l’Italiano}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151539}, } @MISC{BERTINETTO_2005_MISC_BBLMRRT_151540, AUTHOR = {Bertinetto, P. M. and Burani, C. and Laudanna, A. and Marconi, L. and Ratti, D. and Rolando, C. and Thornton, A. M.}, TITLE = {CoLFIS: Corpus e Archivio Lessicale dell’Italiano Scritto contemporaneo}, YEAR = {2005}, ABSTRACT = {Dal progetto speciale, "Banca Dati Lessicali dell'Italiano Scritto Contemporaneo" del CNR, sviluppato in collaborazione con la Scuola Normale Superiore di Pisa e l'Istituto di Scienze e Tecnologie della Cognizione del CNR di Roma, è stato realizzato il "Corpus e Archivio Lessicale dell'Italiano Scritto contemporaneo" (CoLFIS), suddiviso in tre settori contenente, in maniera opportunamente bilanciata, testi appartenenti a vari generi di scrittura. I sottoinsiemi, o settori, individuati in CoLFIS sono: la "stampa quotidiana" suddivisa in tre testate giornalistiche: "Il Corriere Della Sera", "La Repubblica", "La Stampa", a loro volta in ogni testata le occorrenze sono poi suddivise in sette sottoinsiemi; le "Pubblicazioni Periodiche" sono ulteriormente suddivise in 12 sottosettori; i "libri" sono suddivisi in 13 generi letterari. Il corpus rappresenta soprattutto l'italiano effettivamente letto piuttosto che tutto l'italiano scritto dal momento che, nel definire gli insiemi di testi da includere, non si sono tenuti presenti tutti i possibili usi scritti dell'italiano, tutti i tipi di testi teoricamente producibili ed effettivamente prodotti, ma piuttosto i tipi di testi che, da una indagine sociologica e statistica, sono risultati essere i più letti. Tale scelta si giustifica in quanto si è voluto costruire un corpus e un lessico di frequenza che si avvicinassero il più possibile al lessico mentale di un parlante di media cultura e non un a dizionario esaustivo della lingua italiana. Il corpus di CoLFIS è stato totalmente lemmatizzato e sono stati sviluppati algoritmi software di analisi statistica per estrarre i lessici di frequenza nel corpus totale e nei diversi settori costituenti il corpus lemmatizzato, per ogni lemma e per le relative forme. CoLFIS ha dato luogo a diversi prodotti disponibili in formato digitale tra cui un formario, i lessici di frequenza per i diversi settori e un lessico di frequenza totale dotati di appropriati indici di frequenza. Il corpus esiste in versione testuale e in versione lemmatizzata. Si tratta dell'unico corpus di riferimento creato su base statistica e interamente lemmatizzato della lingua italiana.}, URL = {http://www.ge.ilc.cnr.it/lessico.php}, } @MISC{BERTINETTO_2005_MISC_BBLMRRT_175454, AUTHOR = {Bertinetto, P. M. and Burani, C. and Laudanna, A. and Marconi, L. and Ratti, D. and Rolando, C. and Thornton, A. M.}, TITLE = {Corpus e lessico di frequenza dell'Italiano scritto (CoLFIS). [Corpus and Frequency Lexicon of Written Italian]}, YEAR = {2005}, ABSTRACT = {Informaciones estadísticas en CALIS: Corpus y Archivo Léxical del Italiano Escrito. CALIS es una base de datos de la lengua italiana escrita de 3.798.275 palabras y esta formada de textos escritos de varios generes pesados oportunamente y selecionados en tres distintos sectores: diaros, periodicos y libros. Este producto representa el italiano efectivamente leido mas bien que toda la lengua italiana escrita. Esta eleccion se justifica en cuanto se deseaba construir un corpus, y en consecuencia un lexico de frecuencia, que se acercara los mas posible al lexico mental de un hablante de media cultura y no a un diccionario de la lengua italiana. Los diarios con 1.836.119 palabras estan extraido de tres diarios que son los mas importantes y leidos en Italia: Il Corriere Della Sera, Repubblica, La Stampa. En cada diario se han elegidos textos de 9 diferentes subsectores: economia, cronica local, cronica mundana, cronica negra, politica exterior, politica interior, ciencia, espectaculo y deporte. El sector de los periodicos esta constituido por 1.306.653 palabras elejdas entre 12 differentes subsectores: arte-ciencia-tecnica, auto-nautica, ninos-muchachos, casa-hobby, feminil, fotonovelas, informacion general, cronica mundana, radio-television, deporte, viajes-ecologia y otro. El sector de los libros esta constituido por 655.503 palabras elejdas entre 13 generos literarios: arte, ninos, fantascienza, gialli espionaje, hobby y viajes, narrativa clasica, narrativa moderna, rosa, ensaystica, ciencias naturales y exactas, ciencias sociales y humanas, teatro y poesia. En este trabajo se analizan las informaciones extraidas del lexico de frecuencia del corpus lematizado de CALIS (Corpus e Archivio Lessicale Italiano Scritto) relativamente a las diferentes categorias gramaticales, a los datos de frecuencia y de dispercion de cada lemma y de cada forma en los distintos sectores y subsectores. Se efectuan por algunos sectores analisis sobre el acrecentamiento de las formas/lemas con el fin de efectuar una evaluacion sobre las posibles y corectas dimensiones de un corpus para ser representativo de la lengua analizada.}, KEYWORDS = {Corpora, Lessico, Italiano, DataBase}, URL = {http://www.ge.ilc.cnr.it/strumenti.php}, } @MISC{BOZZI_2005_MISC_B_151529, AUTHOR = {Bozzi, A.}, TITLE = {SPWC-COMTOOCI}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151529}, } @MISC{CECCOTTI_2005_MISC_CS_151530, AUTHOR = {Ceccotti, M. L. and Sassi, M.}, TITLE = {Dall'Archivio Elettronico delle Opere di Carlo Emilio Gadda. Studi e ricerche con strumenti computazionali dell'opera dell'ingegnere-scrittore}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151530}, } @MISC{FRANCOPOULO_2005_MISC_FGCMBPS_151541, AUTHOR = {Francopoulo, G. and George, M. and Calzolari, N. and Monachini, M. and Bel, N. and Pet, M. and Soria, C.}, TITLE = {Lexical Markup Framework}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151541}, } @MISC{GAVRILIDOU_2005_MISC_GLMPS_151543, AUTHOR = {Gavrilidou, M. and Labropoulou, P. and Monachini, M. and Piperidis, S. and Soria, C.}, TITLE = {INTERA Business model}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151543}, } @MISC{GIORGOLO_2005_MISC_GHP_151551, AUTHOR = {Giorgolo, G. and Herreros, I. and Pirrelli, V.}, TITLE = {SOM-ware}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151551}, } @MISC{MAMMINI_2005_MISC_MUM_151544, AUTHOR = {Mammini, M. and Ulivieri, M. and Monachini, M.}, TITLE = {Lessici Unificati “su richiesta”}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151544}, } @MISC{MAMMINI_2005_MISC_MUM_151545, AUTHOR = {Mammini, M. and Ulivieri, M. and Monachini, M.}, TITLE = {Lessico Unificato}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151545}, } @MISC{MAMMINI_2005_MISC_MUM_151546, AUTHOR = {Mammini, M. and Ulivieri, M. and Monachini, M.}, TITLE = {Specifiche Lessicali Morfo-sintattiche Unificate}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151546}, } @MISC{MARINELLI_2005_MISC_MS_151534, AUTHOR = {Marinelli, R. and Spadoni, G.}, TITLE = {MARITERM Database di terminologia marittima}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151534}, } @MISC{MONACHINI_2005_MISC_MS_151528, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Terminologia Multilingue (inglese-greco-serbo-sloveno-bulgaro)}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151528}, } @MISC{MONACHINI_2005_MISC_MSPSR_151547, AUTHOR = {Monachini, M. and Soria, C. and Picchi, E. and Sassolini, E. and Ruffolo, P.}, TITLE = {Procedure e tecniche di acquisizione semi-automatica di terminologie da testi paralleli}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151547}, } @MISC{MONACHINI_2005_MISC_MSUCDM_151542, AUTHOR = {Monachini, M. and Soria, C. and Ulivieri, M. and Calzolari, N. and Declerck, T. and Mammini, M.}, TITLE = {Data Category Registry}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151542}, } @MISC{PARDELLI_2005_MISC_PSGO_151553, AUTHOR = {Pardelli, G. and Sassi, M. and Goggi, S. and Orsolini, P.}, TITLE = {NLPterminology}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151553}, } @MISC{PECCHIA_2005_MISC_PCCS_151531, AUTHOR = {Pecchia, L. and Caligaris, C. and Cappelli, G. and Saba, A.}, TITLE = {Corpus di Linguaggio Infantile}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151531}, } @MISC{PICCHI_2005_MISC_PMSCP_151532, AUTHOR = {Picchi, E. and Montemagni, S. and Sassolini, E. and Cucurullo, S. and Paoli, M.}, TITLE = {ALTWEB}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151532}, } @MISC{PICCHI_2005_MISC_PSCSC_151533, AUTHOR = {Picchi, E. and Sassi, M. and Ceccotti, M. L. and Sassolini, E. and Cucurullo, S.}, TITLE = {Linguistic Miner}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151533}, } @MISC{ROVENTINI_2005_MISC_RABCM_151536, AUTHOR = {Roventini, A. and Alonge, A. and Bertagna, F. and Calzolari, N. and Marinelli, R.}, TITLE = {ItalWordNet database semantico-lessicale dell’italiano}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151536}, } @MISC{RUIMY_2005_MISC_RMC_151535, AUTHOR = {Ruimy, N. and Monachini, M. and Calzolari, N.}, TITLE = {Lessico elettronico multi-livello dell'italiano: PAROLE-SIMPLE-CLIPS}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151535}, } @MISC{SABA_2005_MISC_S_157388, AUTHOR = {Saba, A.}, TITLE = {Lessico del testo Arte de marear de Juan de Moya}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157388}, } @MISC{SABA_2005_MISC_S_157389, AUTHOR = {Saba, A.}, TITLE = {Lessico del testo Tractado de la Sphera de Iohannes de Sacrobusto}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/157389}, } @MISC{SABA_2005_MISC_SSCPCG_151537, AUTHOR = {Saba, A. and Sassi, M. and Carpi, E. and Periñán, B. and Calí, S. and Garcia Macho, M. L.}, TITLE = {Corpus del LéNESO (Léxico Náutico del Español del Siglo de Oro)}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151537}, } @MISC{SORIA_2005_MISC_SM_151538, AUTHOR = {Soria, C. and Monachini, M.}, TITLE = {MILE-OWL}, YEAR = {2005}, URL = {https://publications.cnr.it/doc/151538}, } @ARTICLE{ALLEGRINI_2004_ARTICLE_AGGW_30870, AUTHOR = {Allegrini, P. and Giuntoli, M. and Grigolini, P. and West, B. J.}, TITLE = {From knowledge, knowability and the search for objective randomness to a new vision of complexity}, YEAR = {2004}, ABSTRACT = {Herein we consider various concepts of entropy as measures of the complexity of phenomena and in so doing encounter a fundamental problem in physics that affects how we understand the nature of reality. In essence the difficulty has to do with our understanding of randomness, irreversibility and unpredictability using physical theory, and these in turn undermine our certainty regarding what we can and what we cannot know about complex phenomena in general. The sources of complexity examined herein appear to be channels for the amplification of naturally occurring randomness in the physical world. Our analysis suggests that when the conditions for the renormalization group apply, this spontaneous randomness, which is not a reflection of our limited knowledge, but a genuine property of nature, does not realize the conventional thermodynamic state, and a new condition, intermediate between the dynamic and the thermodynamic state, emerges. We argue that with this vision of complexity, life, which with ordinary statistical mechanics seems to be foreign to physics, becomes a natural consequence of dynamical processes.}, PAGES = {11-32}, URL = {https://publications.cnr.it/doc/30870}, VOLUME = {20}, PUBLISHER = {American Institute of Physics (Woodbury, NY, Stati Uniti d'America)}, ISSN = {1054-1500}, JOURNAL = {Chaos (Woodbury N. Y.)}, } @ARTICLE{ALLEGRINI_2004_ARTICLE_AGP_30869, AUTHOR = {Allegrini, P. and Grigolini, P. and Palatella, L.}, TITLE = {Intermittency and scale-free networks: a dynamical model for human language complexity}, YEAR = {2004}, ABSTRACT = {In this paper we try to model certain features of human language complexity by means of advanced concepts borrowed from statistical mechanics. We use a time series approach, the diffusion entropy (DE) method, to compute the complexity of an italian corpus of newspapers and magazines. We find that the anomalous scaling index is compatible with a simple dynamical model, a random walk on a complex scale-free network, which is linguistically related to Saussurre’s paradigms. The network complexity is independently measured on the same corpus, looking at the co-occurrence of nouns and verbs. This connection of cognitive complexity with long-range time correlations also provides an explanation for the famous Zipf’s law in terms of the generalized central limit theorem.}, PAGES = {95-105}, URL = {https://publications.cnr.it/doc/30869}, VOLUME = {20}, } @ARTICLE{BELCASTRO_2004_ARTICLE_BR_64516, AUTHOR = {Belcastro, C. and Ruffolo, P.}, TITLE = {A mathematical classification of the Platonic corpus}, YEAR = {2004}, PAGES = {1-19}, URL = {https://publications.cnr.it/doc/64516}, VOLUME = {20-21}, } @ARTICLE{BOZZI_2004_ARTICLE_B_64507, AUTHOR = {Bozzi, A.}, TITLE = {Tecnologie e beni librari: il digitale in biblioteca e in archivio}, YEAR = {2004}, PAGES = {439-445}, URL = {https://publications.cnr.it/doc/64507}, VOLUME = {2}, } @ARTICLE{BOZZI_2004_ARTICLE_B_64508, AUTHOR = {Bozzi, A.}, TITLE = {Verso una filologia computazionale: la prima Euroconferenza della European Science Foundation}, YEAR = {2004}, ABSTRACT = {Digital technology offers to the philological disciplines many new tools and techniques for the editorial work. These have been the topics discussed at the first Euroconference sponsored by the European Science Foundation, which was held in September 2003 near Pisa. More than 70 scholars and young researchers from many attended the conference. The following paper gives a brief description of the themes discussed and the future developments envisaged for the different sectors in which philological disciplines are subdivided.}, KEYWORDS = {Filologia computazionale, digitalizzazione dei documenti, edizioni elettroniche}, PAGES = {128-138}, URL = {https://publications.cnr.it/doc/64508}, VOLUME = {32}, PUBLISHER = {Centro de estudos classicos (Lisboa, Portogallo)}, ISSN = {0870-0133}, JOURNAL = {Euphrosyne: revista de filologia classica}, } @ARTICLE{BOZZI_2004_ARTICLE_B_64509, AUTHOR = {Bozzi, A.}, TITLE = {Postfazione a Zampolli Antonio, Filologia e informatica: le origini della filologia computazionale}, YEAR = {2004}, PAGES = {21-24}, URL = {https://publications.cnr.it/doc/64509}, VOLUME = {32}, } @ARTICLE{BOZZI_2004_ARTICLE_BCL_64505, AUTHOR = {Bozzi, A. and Cignoni, L. and Lebrave, J.}, TITLE = {Introduction}, YEAR = {2004}, ABSTRACT = {This volume contains a number of contributions resulting from the scientific discussions held during the Euroconference "Philological Disciplines and Digital Technology" (Il Ciocco, Castelvecchio Pascoli, Italy, 7-11 September, 2003), sponsored by the European Science Foundation (ESF), the Conseil National de la Recherche Scientifique (CNRS) and the Regione Toscana (Tuscan Region). Therefore, the articles published here are not the Proceedings of that Conference but, more interestingly, they reflect the themes that were dealt with on that occasion. Owing to the different scientific experiences, the papers cover a wide range of issues but their authors share the same scope, which is to provide an accurate and precise state-of-the-art description of Digital Philology. In the light of the digital revolution, what is the position of philological disciplines? How are critical editors reacting to this? How do they perceive the functions of this technology? Is it seen only as a tool for teaching and for research, or does it assume a more innovative role from a methodological point-of-view? It is said, at least in Italy, that the critical-editor's profession is at a crisis: the publication or re-publication of texts, especially ancient ones, is carried out by an increasingly smaller number of specialists as well as students. The term 'philology', in reality, is more and more often used to define 'stylistics', 'literary critics', 'anthropology of literature', etc. If these considerations are true, how then can technological development and the creation of digital libraries have a positive influence on the editorial profession in order to help reverse the negative trend of a diminished interest by young researchers in the production of critical editions? At the end of the 1960's similar reflections were made already when the development of information technology (IT) for the literary and philological disciplines was still at its pioneering stage: for example on the occasion of the important seminar "La pratique des ordinateurs dans la critique des textes", organised by CNRS in Paris in 1968.}, PAGES = {xi-xiii}, URL = {https://publications.cnr.it/doc/64505}, VOLUME = {20-21}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{BOZZI_2004_ARTICLE_BC_64506, AUTHOR = {Bozzi, A. and Corradini, M. S.}, TITLE = {Aspects and methods of computer-aided textual criticism}, YEAR = {2004}, ABSTRACT = {The development of digital technology in libraries and archives fosters the development of computational tools for the study of sources in view of publication, in Internet or CD, of critical editions. This paper presents a system for computational philology developed at the Institute for Computational Linguistics (ILC) of the National Research Council (CNR), Pisa, for the management of images and texts, experimented on Greek papyri, medieval manuscripts and ancient printed books. The aspects concerning critical apparatus and stemmatics was carried out in collaboration with the Department of Romance Languages and Literatures of Pisa University.}, KEYWORDS = {Philological workstation, Textual criticism, Stemmatology}, PAGES = {49-66}, URL = {https://publications.cnr.it/doc/64506}, VOLUME = {20-21}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{BRESCIANI_2004_ARTICLE_BMBF_183647, AUTHOR = {Bresciani, E. and Menchetti, A. and Bozzi, A. and Fedele, G.}, TITLE = {Sistema di filologia computazionale per testi demotici}, YEAR = {2004}, ABSTRACT = {A project for a Demotic Inscriptions on Ostraka Database is being carried out in collaboration between ILC/CNR (Pisa), the Dpt. of Electronic Engineering (Calabria University) and the Dpt. of the Ancient World History (Egyptological section, Pisa University). The aim of the project is to analyse the digital colour images of demotic texts on Ostraka (Medinat Madi, in Fayyum region) with the aid of compiutational tools. The module descrobed in the paper is a neural component able to learn the graphical features of each demotic symbol, which has been previously segmented in the images thanks to a semiautomatic procedure. A specific neural network tries to recognise the text written in the images linking the symbols segmented within the ostraka images database to the correspondent symbols available on a virtual keyboard. The graphical interface is particularly useful for teaching and research activities on this type of archaeological documentation.}, PAGES = {267-286}, URL = {https://publications.cnr.it/doc/183647}, VOLUME = {15}, PUBLISHER = {All'Insegna del giglio (Firenze, Italia)}, ISSN = {1120-6861}, JOURNAL = {Archeologia e calcolatori}, } @ARTICLE{BRESCIANI_2004_ARTICLE_BMBF_64510, AUTHOR = {Bresciani, E. and Menchetti, A. and Bozzi, A. and Fedeli, G.}, TITLE = {DiPhiloS: un sistema di filologia computazionale applicato a referti medici greci su papiro}, YEAR = {2004}, PAGES = {267-286}, URL = {https://publications.cnr.it/doc/64510}, VOLUME = {15}, } @ARTICLE{CALZOLARI_2004_ARTICLE_C_64511, AUTHOR = {Calzolari, N.}, TITLE = {The Strategic Role of LRs: ENABLER and the Committee for Written LRs and Evaluation}, YEAR = {2004}, PAGES = {3-4}, URL = {https://publications.cnr.it/doc/64511}, VOLUME = {13. 1}, } @ARTICLE{CALZOLARI_2004_ARTICLE_CL_64512, AUTHOR = {Calzolari, N. and Lenci, A.}, TITLE = {Linguistica Computazionale-Strumenti e risorse per il Trattamento Automatico della Lingua}, YEAR = {2004}, PAGES = {56-69}, URL = {https://publications.cnr.it/doc/64512}, VOLUME = {2}, } @ARTICLE{FURFARI_2004_ARTICLE_FSPSB_173367, AUTHOR = {Furfari, F. and Soria, C. and Pirrelli, V. and Signore, O. and Bianchi Bandinelli, R.}, TITLE = {NICHE: Natural Interaction in Computerised Home Environments}, YEAR = {2004}, ABSTRACT = {Future technologies will provide users with increasing control over surrounding devices embedded in a common home environment. Somewhat paradoxically, this could result in an increase rather than a reduction in complexity if support for high-level interfacing is not introduced. This concern prompted the launching of a medium-term project aimed at promoting natural user-home interaction along the lines of the Ambient Intelligence vision.}, KEYWORDS = {HCI, Home Autoamtion, Smart Home}, PAGES = {55-56}, URL = {http://www.ercim.org/publication/Ercim_News/enw58/furfari.html}, VOLUME = {58}, PUBLISHER = {ERCIM (Le Chesnay)}, ISSN = {0926-4981}, JOURNAL = {ERCIM news}, } @ARTICLE{MAGNINI_2004_ARTICLE_MC_64503, AUTHOR = {Magnini, B. and Calzolari, N.}, TITLE = {Temi e prospettive di ricerca per l’elaborazione del linguaggio naturale in Italia-Topics and Research Perspectives for Natural Language Processing in Italy}, YEAR = {2004}, PAGES = {50-51}, URL = {https://publications.cnr.it/doc/64503}, VOLUME = {I (1)}, } @ARTICLE{NASCIMENTO_2004_ARTICLE_NAC_64504, AUTHOR = {Nascimento, A. and Alberto, P. and Cappelli, G.}, TITLE = {OLISSIPO-entre filologia e informática: recursos para gerir o estudo do texto latino}, YEAR = {2004}, PAGES = {111-125}, URL = {https://publications.cnr.it/doc/64504}, VOLUME = {32}, } @ARTICLE{PASSAROTTI_2004_ARTICLE_P_64518, AUTHOR = {Passarotti, M.}, TITLE = {Development and perspectives of the Latin morphological analyser LEMLAT}, YEAR = {2004}, PAGES = {397-414}, URL = {https://publications.cnr.it/doc/64518}, VOLUME = {20-21}, } @ARTICLE{PASSAROTTI_2004_ARTICLE_PR_64519, AUTHOR = {Passarotti, M. and Ruffolo, P.}, TITLE = {L'utilizzo del lemmatizzatore LEMLAT per una sistemazione dell'omografia in latino}, YEAR = {2004}, PAGES = {99-110}, URL = {https://publications.cnr.it/doc/64519}, VOLUME = {32}, } @ARTICLE{ZAMPOLLI_2004_ARTICLE_Z_64520, AUTHOR = {Zampolli, A.}, TITLE = {Filologia e informatica: le origini della filologia computazionale}, YEAR = {2004}, PAGES = {11-21}, URL = {https://publications.cnr.it/doc/64520}, VOLUME = {32}, } @BOOK{ALLEGRINI_2004_BOOK_AGP_136446, AUTHOR = {Allegrini, P. and Grigolini, P. and Palatella, L.}, TITLE = {Cognitive scale-free networks as a model for intermittency in human language}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/136446}, PUBLISHER = {World Scientific Publ. Co (Singapore, SGP)}, } @BOOK{BOZZI_2004_BOOK_BCL_136435, AUTHOR = {Bozzi, A. and Cignoni, L. and Lebrave, J. L.}, TITLE = {Digital Technology and Philological Disciplines}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/136435}, } @BOOK{SABA_2004_BOOK_S_136445, AUTHOR = {Saba, A.}, TITLE = {El léxico del Compendio de la arte de navegar de Rodrigo Zamorano}, YEAR = {2004}, ABSTRACT = {A dos años de distancia de la publicación del Léxico del Breve compendio de la Esfera y de la arte de navegar de Martín Cortés, aparece el Compendio de la arte de navegar de Rodrigo Zamorano, ambos a cargo de Antonina Saba*. Esta obra constituye una provechosa contribución al incremento de un corpus de textos españoles del siglo XVI, nacido con el intento de crear un Léxico náutico del Español del Siglo de Oro (LÉNESO**). Este proyecto se está llevando a cabo con la contribución del Istituto di Linguistica Computazionale del CNR e del Dipartimento di Lingue romanze dell'Università di Pisa, del Departamento de Lengua Española y Lingüística General y de la Sección de Medios Impresos de la UNED de Madrid. El objetivo final es contribuir a la realización del "Diccionario de la Navegación del Siglo de Oro", dirigido por María Lourdes Garcia-Macho, subvencionado por la Dirección General de Enseñanza Superior e Investigación Científica del Ministerio de la Educación y Cultura. El enorme desarrollo económico y cultural que tuvo España después del descubrimiento de América, debido principalmente al comercio y a los intercambios con el Nuevo Mundo, favoreció la expansión de disciplinas relacionadas con la cosmografía, la astronomía, la cartografía, la meteorología, dando un impulso notable a la técnica de la navegación. El mérito de Zamorano y de sus compañeros Cortés, Chaves, Guevara, Medina, Palacio, Poza, Siria y otros, fue el aliento que dieron al cultivo de estas disciplinas nuevas e innovadoras. Los libros náuticos y los manuales de navegación de la época son una fuente inestimable de neologismos y tecnicismos en varios campos científicos y sobre todo el relacionado con la navegación, sin olvidar el aporte de topónimos que surgieron de los derroteros y de las cartas náuticas. El volumen que presentamos contiene la concordancia lematizada, los índices de frecuencia de los lemas y de los nombres propios, el diccionario inverso del Compendio de la arte de navegar, y un sistema de consulta del texto contenido en un CD-ROM, que constituyen una novedad y un estímulo para toda investigación sobre la lengua de la navegación.}, KEYWORDS = {analisi linguistica, lessicografia, lessico specializzato}, PAGES = {ix-xxix}, URL = {https://publications.cnr.it/doc/136445}, VOLUME = {33067EU01A01}, ISBN = {8436206894}, EDITOR = {Saba, A.}, } @INCOLLECTION{AGOSTINIANI_2004_INCOLLECTION_AMPP_136438, AUTHOR = {Agostiniani, L. and Montemagni, S. and Paoli, M. and Picchi, E.}, TITLE = {Lessicografia dialettale e computer: questioni di rappresentazione e recupero dei dati}, YEAR = {2004}, KEYWORDS = {Lessicografia computazionale, Lessicografia Dialettale}, URL = {https://publications.cnr.it/doc/136438}, PUBLISHER = {Centro Interuniversitario di Studi Veneti (Venezia, ITA)}, } @INCOLLECTION{BARTOLINI_2004_INCOLLECTION_BLMPS_30867, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Automatic Classification and Analysis of Provisions in Italian Legal Texts: A Case Study}, YEAR = {2004}, ABSTRACT = {In this paper we address the problem of automatically enriching legal texts with semantic annotation, an essential pre–requisite to effective indexing and retrieval of legal documents. This is done through illustration of SALEM (Semantic Annotation for LEgal Management), a computational system developed for automated semantic annotation of (Italian) law texts. SALEM is an incremental system using Natural Language Processing techniques to perform two tasks: i) classify law paragraphs according to their regulatory content, and ii) extract relevant text fragments corresponding to specific semantic roles that are relevant for the different types of regulatory content. The paper sketches the overall architecture of SALEM and reports results of a preliminary case study on a sample of Italian law texts.}, KEYWORDS = {Annotazione semantica, Classificazione automatica}, PAGES = {593-604}, URL = {https://rdcu.be/dftjm}, VOLUME = {3292}, DOI = {10.1007/978-3-540-30470-8_72}, PUBLISHER = {Springer (Berlin, DEU)}, ISBN = {978-3-540-23664-1}, BOOKTITLE = {On the Move to Meaningful Internet Systems 2004: OTM 2004 Workshops. OTM 2004}, EDITOR = {Meersman, R. and Tari, Z. and Corsaro, A.}, } @INCOLLECTION{CALZOLARI_2004_INCOLLECTION_C_136442, AUTHOR = {Calzolari, N.}, TITLE = {Computational Lexicons and Corpora: Complementary Components in Human Language Technology}, YEAR = {2004}, ABSTRACT = {Language resources (LR) are unanimously recognised as a necessary preliminary platform for developing an adequate human language technology (HLT). In this paper I touch on a few issues related to computational lexicon and textual corpora, highlighting how evidence shows that they are not only two closely interrelated linguistic objects, but should be considered as complementary view on the lexical space.}, PAGES = {89-107}, URL = {https://publications.cnr.it/doc/136442}, PUBLISHER = {John Benjamins (Amsterdam, NLD)}, ISBN = {1588115372}, BOOKTITLE = {Linguistic Today-Facing a Greater Challenge}, EDITOR = {Van Sterkenburg, P.}, } @INCOLLECTION{CALZOLARI_2004_INCOLLECTION_CP_136440, AUTHOR = {Calzolari, N. and Paoloni, A.}, TITLE = {Il Trattamento Automatico della Lingua: definizione e aree tecnologiche}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/136440}, PUBLISHER = {Fondazione Ugo Bordoni (Roma, ITA)}, } @INCOLLECTION{CALZOLARI_2004_INCOLLECTION_CP_136441, AUTHOR = {Calzolari, N. and Paoloni, A.}, TITLE = {TAL: Definitions and Technical Areas}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/136441}, PUBLISHER = {Fondazione Ugo Bordoni (Roma, ITA)}, } @INCOLLECTION{PAOLI_2004_INCOLLECTION_PMP_136444, AUTHOR = {Paoli, M. and Montemagni, S. and Picchi, E.}, TITLE = {ALT Web: l'Atlante Lessicale Toscano in rete}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/136444}, PUBLISHER = {Centro Interuniversitario di Studi Veneti (Venezia, ITA)}, } @EDITORIAL{BOZZI_2004_EDITORIAL_BC_146071, AUTHOR = {Bozzi, A. and Cignoni, L.}, TITLE = {Digital Technology and Philological Disciplines}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/146071}, } @INPROCEEDINGS{ALBERTO_2004_INPROCEEDINGS_ACPP_84587, AUTHOR = {Alberto, P. F. and Cappelli, G. and Passarotti, M. and Pena, A.}, TITLE = {Strumenti informatici per l'analisi dei testi latini}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84587}, CONFERENCE_NAME = {Antiguidade Clássica: Que fazer com este património? Colóquio à memória de Victor Jabouille}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BARTOLINI_2004_INPROCEEDINGS_BLMP_84570, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Hybrid Constraints for Robust Parsing: First Experiments and Evaluation}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84570}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BARTOLINI_2004_INPROCEEDINGS_BLMPS_84571, AUTHOR = {Bartolini, R. and Lenci, A. and Montemagni, S. and Pirrelli, V. and Soria, C.}, TITLE = {Semantic Mark-up of Italian Legal Texts Through NLP-based Techniques}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84571}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BERTAGNA_2004_INPROCEEDINGS_B_84602, AUTHOR = {Bertagna, F.}, TITLE = {Using Semantic Language Resources to Support Textual Inference for Question Answering}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84602}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BERTAGNA_2004_INPROCEEDINGS_BCMSU_84572, AUTHOR = {Bertagna, F. and Calzolari, N. and Monachini, M. and Soria, C. and Ulivieri, M.}, TITLE = {Report on the interlingual annotation experience at ILC-CNR}, YEAR = {2004}, URL = {http://www.google.com/url?sa=t\&rct=j\&q=\&esrc=s\&source=web\&cd=1\&ved=0CDQQFjAA\&url=http%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fdownload%3Fdoi%3D10.1.1.92.5078%26rep%3Drep1%26type%3Dpdf\&ei=ksa5UZOcJ8mjhgeziIGQBw\&usg=AFQjCNE1nXfd9hHiIstx1Lq_4VaiGDvkkA\&sig2=fIUJ3FE3BnBgDALu5T832w\&bvm=bv.47883778,d.ZG4}, CONFERENCE_NAME = {Seventh Interlingua Workshop on Determining Interlingua Utility for Machine Translation}, CONFERENCE_PLACE = {Washington DC}, CONFERENCE_DATE = {2 Ottobre 2004}, BOOKTITLE = {Biennal Conference of the AMTA-Determining Interlingua Utility for Machine Translation}, EDITOR = {Habash, N. and Dorr, B. and Hovy, E. and Reeder, F.}, } @INPROCEEDINGS{BERTAGNA_2004_INPROCEEDINGS_BCS_84603, AUTHOR = {Bertagna, F. and Chiran, L. and Simi, M.}, TITLE = {QA at ILC-UniPI: Description of the Prototype}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84603}, CONFERENCE_NAME = {CLEF 2004 Workshop}, CONFERENCE_PLACE = {Bath, UK}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BERTAGNA_2004_INPROCEEDINGS_BLMC_84573, AUTHOR = {Bertagna, F. and Lenci, A. and Monachini, M. and Calzolari, N.}, TITLE = {The MILE Lexical Classes: Data Categories for Content Interoperability among Lexicons}, YEAR = {2004}, PAGES = {8}, URL = {https://publications.cnr.it/doc/84573}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation-Workshop: A Registry of Linguistic Data Categories within an Integrated Language Resources Repository Area (INTERA)}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {29-5-2004}, } @INPROCEEDINGS{BERTAGNA_2004_INPROCEEDINGS_BLMC_84574, AUTHOR = {Bertagna, F. and Lenci, A. and Monachini, M. and Calzolari, N.}, TITLE = {Content Interoperability of Lexical Resources: Open Issues and MILE Perspectives}, YEAR = {2004}, ABSTRACT = {The paper tackles the issue of content interoperability among lexical resources, by presenting an experiment of mapping differently conceived lexicons, FrameNet and NOMLEX, onto MILE (Multilingual ISLE Lexical Entry), a meta-entry for the encoding of multilingual lexical information, acting as a general schema of shared and common lexical objects. The aim is to (i) raise problems and (ii) test the expressive potentialities of MILE as a standard environment for Computational Lexicons.}, PAGES = {131-134}, URL = {https://publications.cnr.it/doc/84574}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation, held in Memory of Antonio Zampolli. Lisbon, Portugal, Proceedings, Volume I, Paris, The European Language Resources Association (ELRA)}, CONFERENCE_PLACE = {Lisbon, Portugal}, CONFERENCE_DATE = {26-27-28 May 2004}, } @INPROCEEDINGS{BOZZI_2004_INPROCEEDINGS_B_84622, AUTHOR = {Bozzi, A.}, TITLE = {Electronic Publishing and Computational Philology}, YEAR = {2004}, ABSTRACT = {This paper is concerned with the relationship between electronic publishing and digital scholarly textual criticism. Hypertextual techniques and computational tools are compared. These two different methodologies applied to modern and contemporary texts with respect to ancien manuscript tradition are highlighted. Particular attention is focussed on the general criteria employed in the development of a computer-assisted workstation for digital editions of Greek papyri and medieval manuscripts.}, KEYWORDS = {Electronic publishing, Computational philology, Digital Libraries, Textual criticism}, PAGES = {3-24}, URL = {https://publications.cnr.it/doc/84622}, VOLUME = {XXIV-XXV}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Pisa-Roma, ITA)}, ISSN = {0392-6907}, ISBN = {978-88-8147-435-6}, CONFERENCE_NAME = {The Evolution of Texts: Confronting Stemmatological and Genetical Methods}, CONFERENCE_PLACE = {Louvain-la-Neuve}, CONFERENCE_DATE = {September 1-2, 2004}, BOOKTITLE = {The evolution of texts: confronting stemmatological and genetical methods}, EDITOR = {Macé, C. and Baret, P. and Bozzi, A. and Cignoni, L.}, } @INPROCEEDINGS{BOZZI_2004_INPROCEEDINGS_BR_84568, AUTHOR = {Bozzi, A. and Raggioli, A.}, TITLE = {DiPhiloS: un sistema di filologia computazionale applicato a referti medici greci su papiro}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84568}, CONFERENCE_NAME = {Seminario di Studi: Testi medici su papiro}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BUSEMANN_2004_INPROCEEDINGS_BCCK_84569, AUTHOR = {Busemann, S. and Calzolari, N. and Choukri, K. and Krauwer, S.}, TITLE = {Building the LR&E Roadmap: Joint COCOSDA and ICCWLRE Meeting}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84569}, CONFERENCE_NAME = {LREC 2004}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{CALZOLARI_2004_INPROCEEDINGS_C_84588, AUTHOR = {Calzolari, N.}, TITLE = {Introduction of the Conference Chair}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84588}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{CALZOLARI_2004_INPROCEEDINGS_C_84589, AUTHOR = {Calzolari, N.}, TITLE = {Computational Lexicons-Open and Distributed Lexical Infrastructure}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84589}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{CALZOLARI_2004_INPROCEEDINGS_C_84590, AUTHOR = {Calzolari, N.}, TITLE = {Computational Lexicons-Dynamic Lexicons: New types of reseources which are Corpus and Lexicon together}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84590}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{CALZOLARI_2004_INPROCEEDINGS_C_84604, AUTHOR = {Calzolari, N.}, TITLE = {European Initiatives to Promote Cooperation between Speech and Text Communities}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84604}, CONFERENCE_NAME = {ICSLP, Interspeech 2004, 8th International Conference on Spoken Language Processing}, CONFERENCE_PLACE = {Korea}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{CALZOLARI_2004_INPROCEEDINGS_C_172460, AUTHOR = {Calzolari, N.}, TITLE = {A new vision for Language Resources: the role of International Cooperation}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/172460}, CONFERENCE_NAME = {Multilingual Information Service System for the Beijing 2008 Olympics Forum}, CONFERENCE_PLACE = {Beijing, China}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{CALZOLARI_2004_INPROCEEDINGS_CCGMBFLMMP_84592, AUTHOR = {Calzolari, N. and Choukri, K. and Gavrilidou, M. and Maegaard, B. and Baroni, P. and Fersøe, H. and Lenci, A. and Mapelli, V. and Monachini, M. and Piperidis, S.}, TITLE = {ENABLER Thematic Network of National Projects: Technical, Strategic and Political Issues of LRs}, YEAR = {2004}, ABSTRACT = {In this paper we present general strategies concerning Language Resources (LRs) - Written, Spoken and, recently, Multimodal - as developed within the ENABLER Thematic Network. LRs are a central component of the so-called "linguistic infrastructure" (the other key element being Evaluation), necessary for the development of any Human Language Technology (HLT) application. They play a critical role, as horizontal technology, in different emerging areas of FP6, and have been recognized as a priority within a number of national projects around Europe and world-wide. The availability of LRs is also a "sensitive" issue, touching directly the sphere of linguistic and cultural identity, but also with economical, societal and political implications. This is going to be even more true in the new Europe with 25 languages on a par.}, KEYWORDS = {Language Resources, Strategic and Political Issues, Written and Spoken, Linguistic Infrastructure, Supranational Coordination}, PAGES = {937-940}, URL = {http://www.lrec-conf.org/proceedings/lrec2004/}, VOLUME = {III}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004-Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbon}, CONFERENCE_DATE = {24-30/05/2004}, BOOKTITLE = {Proceedings of the Fourth International Conference on Language Resources and Evaluation}, EDITOR = {Lino, M. T. and Xavier, M. F. and Ferreira, F. and Costa, R. and Silva, R.}, } @INPROCEEDINGS{CAPPELLI_2004_INPROCEEDINGS_CA_84605, AUTHOR = {Cappelli, G. and Alberto, P.}, TITLE = {The OLISSIPO and LECTIO Projects}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84605}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{DECLERCK_2004_INPROCEEDINGS_DBCL_84606, AUTHOR = {Declerck, T. and Buitelaar, P. and Calzolari, N. and Lenci, A.}, TITLE = {Towards a Language Infrastructure for the Semantic Web}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84606}, CONFERENCE_NAME = {2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{FERDEGHINI_2004_INPROCEEDINGS_FMBPB_84623, AUTHOR = {Ferdeghini, E. M. and Marcheschi, P. and Bozzi, A. and Prediletto, R. and Benassi, A.}, TITLE = {Radiologic Image Library for Pathology Related Searches}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84623}, CONFERENCE_NAME = {Computers in Cardiology}, CONFERENCE_PLACE = {Chicago}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{FERSE_2004_INPROCEEDINGS_FM_84607, AUTHOR = {Fersøe, H. and Monachini, M.}, TITLE = {ELRA Validation Methodology and Standard Promotion for Linguistic Resources}, YEAR = {2004}, ABSTRACT = {This paper describes the results of work made for ELRA during 2003-2004. It describes the methodology for validation of written language resources (WLRs), specifically lexica, which has been developed for ELRA and tested on a few resources in the ELRA catalogue. It discusses the importance of key issues in lexicon creation and validation such as the adoption of standards for the coding of linguistic content and the importance of documentation. It reports on the experience gained from applying the methodology to lexical resources in the ELRA catalogue arguing that the checks must be reasonable, informative, on a suitable level of detail, and generic. It proposes a set of basic elements to be included in future discussions on establishing standards for lexicon resources. In conclusion it sketches the work to be undertaken in 2004 to promote validation and the adoption of standards.}, PAGES = {941-944}, URL = {https://publications.cnr.it/doc/84607}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {26-27-28/05/2004}, } @INPROCEEDINGS{HEPPLE_2004_INPROCEEDINGS_HIAMMG_84609, AUTHOR = {Hepple, M. and Ireson, N. and Allegrini, P. and Marchi, S. and Montemagni, S. and Gómez Hidalgo, J. M.}, TITLE = {NLP-enhanced Content filtering within the POESIA Project}, YEAR = {2004}, ABSTRACT = {This paper introduces the POESIA internet filtering system, which is open-source, and which combines standard filtering methods, such as positive/negative URL lists, with more advanced techniques, such as image processing and NLP-enhanced text filtering. The description here focusses on components providing textual content filtering for three European languages (English, Italian and Spanish), employing NLP methods to enhance performance. We address also the acquisition of language data needed to develop these filters, and the evaluation of the system and its components.}, KEYWORDS = {Image processing, Natural language processing systems, Open systems}, PAGES = {1967-1970}, URL = {https://www.aclweb.org/anthology/L04-1507/}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {26-28 May 2004}, BOOKTITLE = {Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC 2004)}, EDITOR = {Lino, M. T. and Xavier, M. F. and Ferreira, F. and Costa, R. and Silva, R.}, } @INPROCEEDINGS{MARINELLI_2004_INPROCEEDINGS_M_84610, AUTHOR = {Marinelli, R.}, TITLE = {Proper Names and Polysemy: From a Lexicographic Experience}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84610}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{MARINELLI_2004_INPROCEEDINGS_MRE_84611, AUTHOR = {Marinelli, R. and Roventini, A. and Enea, A.}, TITLE = {Building a Maritime Domain Lexicon: a Few Considerations on the Database Structure and the Semantic Coding}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84611}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{MONACHINI_2004_INPROCEEDINGS_MCMRU_84612, AUTHOR = {Monachini, M. and Calzolari, F. and Mammini, M. and Rossi, S. and Ulivieri, M.}, TITLE = {Unifying Lexicons in view of a Phonological and Morphological Lexical DB}, YEAR = {2004}, ABSTRACT = {The present work falls in the line of activities promoted by the European Languguage Resource Association (ELRA) Production Committee (PCom) and raises issues in methods, procedures and tools for the reusability, creation, and management of Language Resources. A two-fold purpose lies behind this experiment. The first aim is to investigate the feasibility, define methods and procedures for combining two Italian lexical resources that have incompatible formats and complementary information into a Unified Lexicon (UL). The adopted strategy and the procedures appointed are described together with the driving criterion of the merging task, where a balance between human and computational efforts is pursued. The coverage of the UL has been maximized, by making use of simple and fast matching procedures. The second aim is to exploit this newly obtained resource for implementing the phonological and morphological layers of the CLIPS lexical database. Implementing these new layers and linking them with the already exisitng syntactic and semantic layers is not a trivial task. The constraints imposed by the model, the impact at the architectural level and the solution adopted in order to make the whole database 'speak' efficiently are presented. Advantages vs. disadvantages are discussed.}, PAGES = {1107-1110}, URL = {https://publications.cnr.it/doc/84612}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {26-27-28 May 2004}, } @INPROCEEDINGS{PANUZZI_2004_INPROCEEDINGS_PPM_84613, AUTHOR = {Panuzzi, A. and Picchi, E. and Moneglia, M.}, TITLE = {Using PiTagger for Lemmatization and PoS Tagging of a Spontaneous Speech Corpus: C-Oral-Rom Italian}, YEAR = {2004}, ABSTRACT = {The automatic lemmatization and morpho-syntactic annotation of spoken language is a quite recent and complex task for Natural Language Processing. The state of the art on written corpora don't provide us with a satisfactory level of analysis regarding spontaneous spoken language (Uchimoto et al., 2002; Moreno \& Guirao, 2003). The spontaneous speech corpus Italian C-ORALROM has been tagged with Part of Speech (Pos) and morpho-syntactic information, using and adapting an already existing tool trained on Italian written resources (PiTagger, developed by Eugenio Picchi, ILC-CNR Pisa). The incidence of spoken domain on the performance is within a 10% of errors detected in the manual evaluation procedure. Some issues concerning spoken language emerged. The definition of significant contexts for PoS statistics is to be provided by utterance boundaries; moreover, the relevance of a series of phenomena related to the prosodic parsing has been highlighted: fragmentation phenomena, a relative lack of information for all word adjacent to utterance boundaries; under-specification of PoS for words in connection to secondary prosodic breaks and one word utterances.}, KEYWORDS = {Lemmatization, Pos Tagging}, PAGES = {563-566}, URL = {http://www.lrec-conf.org/lrec2004/}, VOLUME = {2}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {26-27-28 May 2004}, BOOKTITLE = {Proceedings: in LREC 2004: Fourth International Conference on Language Resources and Evaluation}, } @INPROCEEDINGS{PARDELLI_2004_INPROCEEDINGS_PSG_84614, AUTHOR = {Pardelli, G. and Sassi, M. and Goggi, S.}, TITLE = {From Weaver to the ALPAC Report}, YEAR = {2004}, ABSTRACT = {This paper presents a sample pertaining to the creation and the use of words in the field of Natural Language Processing (NLP) in the years 1949-1966. These words have been statistically sorted and the results could be taken as a proof that electronic processing of linguistic data leads to the diffusion of clear and concise words for describing a complex concept which would need a circumlocution to be described instead. The aim of this article is to provide an evolutionary overview of these new lexical forms in the various languages for the period taken into account and, whereas possible, a data register and a tabular representation have been prepared as well.}, KEYWORDS = {Terminology, Natural Language Processing}, PAGES = {2005-2008}, URL = {https://publications.cnr.it/doc/84614}, PUBLISHER = {European Language Resources Association (ELRA)-Evaluations and Language resources Distribution Agency (ELDA) (Paris, FRA)}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {26th, 27th \& 28 May 2004}, EDITOR = {Lino, T. and Xavier, M. F. and Ferreira, F. and Costa, R. and Silvia, R.}, } @INPROCEEDINGS{PICCHI_2004_INPROCEEDINGS_PCCSS_84615, AUTHOR = {Picchi, E. and Ceccotti, M. L. and Cucurullo, S. and Sassi, M. and Sassolini, E.}, TITLE = {Linguistic Miner. An Italian Linguistic Knowledge System}, YEAR = {2004}, ABSTRACT = {Linguistic Miner is a project carried out at ILC whose objective is the development of an integrated system to build, organise and manage a corpus of Italian texts (of various origins and formats), and to design and constantly add new tools for the automatic extraction of tiered linguistic knowledge to be made available for many teaching, publishing, and other cultural purposes. The project is based on a notion that is preliminary to all the systems for corpus-based linguistic analysis: a language represented by the largest possible collection of heterogeneous texts is the best source of linguistic information at any level of analysis considered. The first goals of such a system are the semi-automated construction of an Italian data mine for the extraction of linguistic information, the validation of linguistic patterns, the installation of useful tools and resources for a range of different categories of Italian language users. The main feature of the project is its purpose of building large language reference corpora allowing for the creation and use of effective tools for the handling and processing, as well as the automatic linguistic synthesis, of such corpora.}, KEYWORDS = {linguistic analysis, information extraction}, PAGES = {1811-1814}, URL = {http://www.lrec-conf.org/lrec2004/}, VOLUME = {V}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {26-27-28 Maggio 2004}, BOOKTITLE = {Proceedings of the 4th International Conference on Language Resources and Evaluation}, } @INPROCEEDINGS{QUOCHI_2004_INPROCEEDINGS_Q_84616, AUTHOR = {Quochi, V.}, TITLE = {Representing Italian Complex Nominals: A Pilot Study}, YEAR = {2004}, ABSTRACT = {A corpus-based investigation of Italian Complex Nominals (CNs), of the form N+PP, which aims at clarifying their syntactic and semantic constitution, is presented. The main goal is to find out useful parameters for their representation in a computational lexicon. As a reference model we have taken an implementation of Pustejovsky's Generative Lexicon Theory (1995), the SIMPLE Italian Lexicon, and in particular the Extended Qualia Structure. Italian CN formation mainly exploits post-modification; of particular interest here are CNs of the kind N+PP since this syntactic pattern is highly productive in Italian and such CNs very often translate compound nouns of other languages. One of the major problems posed by CNs for interpretation is the retrieval or identification of the semantic relation linking their components, which is (at least partially) implicit on the surface. Studying a small sample, we observed some interesting facts that could be useful when setting up a larger experiment to identify semantic relations and/or automatically learn the syntactic peculiarities of given semantic paradigms. Finally, a set of representational features exploiting the results from our corpus is proposed.}, KEYWORDS = {Multiword expressions Complex Nominals, Italian language}, PAGES = {1863-1866}, URL = {https://publications.cnr.it/doc/84616}, ISBN = {2-9517408-1-6}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona, Portogallo}, CONFERENCE_DATE = {26-28/05/2004}, BOOKTITLE = {Proceedings of the Fourth International Conference on Language Resources and Evaluation, LREC'04}, } @INPROCEEDINGS{ROVENTINI_2004_INPROCEEDINGS_RM_84617, AUTHOR = {Roventini, A. and Marinelli, R.}, TITLE = {Extending the Italian WordNet with the Specialized language of the Maritime Domain}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84617}, CONFERENCE_NAME = {Second International WordNet Conference, GWC 2004}, CONFERENCE_PLACE = {Brno, Czech Republic}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{RUIMY_2004_INPROCEEDINGS_RBC_84618, AUTHOR = {Ruimy, N. and Bouillon, P. and Cartoni, B.}, TITLE = {Semi-Automatic Derivation of a French Lexicon from CLIPS}, YEAR = {2004}, ABSTRACT = {In this paper we describe the methodology developed in the framework of a feasibility study for the derivation of a semantically annotated French lexicon from a monolingual Italian lexical resource. Firstly, an outline of the source lexicon is provided. Then, the two different and complementary strategies that have been experimented for pairing off the relevant monolingual Italian entries and their translational equivalents are described. Finally, the results achieved through each of the illustrated methodologies are presented, their viability is evaluated and a general assessment of the experiment performed is provided.}, KEYWORDS = {cognate, sense indicator, semantic lexicon, matching rules, multilingual morphology}, PAGES = {1099-1102}, URL = {https://publications.cnr.it/doc/84618}, CONFERENCE_NAME = {LREC 2004: Fourth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {24-30/05/2004}, } @INPROCEEDINGS{ULIVIERI_2004_INPROCEEDINGS_UGBC_84619, AUTHOR = {Ulivieri, M. and Guazzini, E. and Bertagna, F. and Calzolari, N.}, TITLE = {Senseval-3: The Italian All-words Task}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84619}, CONFERENCE_NAME = {SENSEVAL-3: Third International Workshop on the Evaluation of Systems for the Semantic Analysis of Text}, CONFERENCE_PLACE = {Barcellona}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{ZAMPOLLI_2004_INPROCEEDINGS_Z_84620, AUTHOR = {Zampolli, A.}, TITLE = {Due programmi di interesse nazionale per il trattamento automatico dell'italiano}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/84620}, CONFERENCE_NAME = {VI Convegno SILFI: Tradizione \& Innovazione}, CONFERENCE_PLACE = {Duisburg}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BOZZI_2004_INPROCEEDINGS_B_112927, AUTHOR = {Bozzi, A.}, TITLE = {The DIPHILOS workstation for critical apparatus management: some experiments on medieval provençal texts}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112927}, CONFERENCE_NAME = {Textual Criticism and Genetics Confronting Methods}, CONFERENCE_PLACE = {Louvain-la-Neuve}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BOZZI_2004_INPROCEEDINGS_B_112928, AUTHOR = {Bozzi, A.}, TITLE = {Analisi linguistica e documenti digitali: un servizio innovativo per la fruizione dei beni librari}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112928}, CONFERENCE_NAME = {TAL in biblioteca. Conferenza a cura del Forum per il Trattamento Automatico della Lingua}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{BOZZI_2004_INPROCEEDINGS_BC_112926, AUTHOR = {Bozzi, A. and Corradini, M. S.}, TITLE = {Aspetti di critica testuale assistita da calcolatore}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112926}, CONFERENCE_NAME = {XXIV Congrès International de Linguistique et de Philologie Romanes (CILPR 2004)}, CONFERENCE_PLACE = {Aberystwyth}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{CALZOLARI_2004_INPROCEEDINGS_C_112898, AUTHOR = {Calzolari, N.}, TITLE = {Le risorse linguistiche: corpora, dizionari, ontologie, …}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112898}, CONFERENCE_NAME = {Trattamento del Linguaggio Naturale: prospettive della ricerca e del mercato in Italia}, CONFERENCE_PLACE = {Perugia}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{CALZOLARI_2004_INPROCEEDINGS_C_112899, AUTHOR = {Calzolari, N.}, TITLE = {Il Trattamento Automatico della Lingua}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112899}, CONFERENCE_NAME = {Conferenza stampa per la presentazione del Libro Bianco del Forum TAL}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{CALZOLARI_2004_INPROCEEDINGS_C_112900, AUTHOR = {Calzolari, N.}, TITLE = {Semantic tagging and semantic lexicons: towards content interoperability}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112900}, CONFERENCE_NAME = {International Colloquium on Word structure and Lexical systems: description, models and applications}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{ENEA_2004_INPROCEEDINGS_E_112924, AUTHOR = {Enea, A.}, TITLE = {La banca dati Guida in rete: wwwisis e strategie di ricerca}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112924}, CONFERENCE_NAME = {La Guida agli archivi della Resistenza}, CONFERENCE_PLACE = {Milano}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{ENEA_2004_INPROCEEDINGS_E_112925, AUTHOR = {Enea, A.}, TITLE = {Prove di integrazione fra due applicativi: Isis e Guarini}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112925}, CONFERENCE_NAME = {Accesso agli archivi informatici dell'Istituto: risultati e prospettive}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{PIRRELI_2004_INPROCEEDINGS_P_112919, AUTHOR = {Pirreli, V.}, TITLE = {Probabilistic language modes and language universals}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112919}, CONFERENCE_NAME = {Workshop LARL}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{PIRRELLI_2004_INPROCEEDINGS_PAM_112920, AUTHOR = {Pirrelli, V. and Allegrini, P. and Montemagni, S.}, TITLE = {Classifying text through time: a complexity science approach to dynamic web page filtering}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112920}, CONFERENCE_NAME = {International Conference on Text Mining (CIFT)}, CONFERENCE_PLACE = {La Rochelle Francia}, CONFERENCE_DATE = {2004}, } @INPROCEEDINGS{PIRRELLI_2004_INPROCEEDINGS_PLM_112923, AUTHOR = {Pirrelli, V. and Lenci, A. and Montemagni, S.}, TITLE = {The lexicon in context: distributional evidence and representational issues}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/112923}, CONFERENCE_NAME = {International Colloquium: Word Structure and Lexical Systems: models and applications}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {2004}, } @TECHREPORT{BARONI_2004_TECHREPORT_BCLQU_157368, AUTHOR = {Baroni, P. and Calzolari, N. and Lenci, A. and Quochi, V. and Ulivieri, M.}, TITLE = {Final Resources Landscape}, YEAR = {2004}, ABSTRACT = {ELSNET-4 Deliverable D6.4}, KEYWORDS = {Language Resources, Landscapes}, PAGES = {11}, URL = {https://publications.cnr.it/doc/157368}, } @TECHREPORT{BARONI_2004_TECHREPORT_BCM_157371, AUTHOR = {Baroni, P. and Calzolari, N. and Mammini, M.}, TITLE = {Final Resources Roadmap}, YEAR = {2004}, ABSTRACT = {ELSNET-4 Deliverable D6.3}, KEYWORDS = {Language Resources, Roadmaps}, URL = {https://publications.cnr.it/doc/157371}, } @TECHREPORT{BARTOLINI_2004_TECHREPORT_BGLMP_157375, AUTHOR = {Bartolini, E. and Giorgetti, D. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Text-2-Knowledge: Acquisizione automatica di ontologie per l'indicizzazione semantica di documenti}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157375}, } @TECHREPORT{CECCOTTI_2004_TECHREPORT_CS_157391, AUTHOR = {Ceccotti, M. L. and Sassi, M.}, TITLE = {Gadda in Abruzzo. Concordanze per lemma}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157391}, } @TECHREPORT{GAVRILIDOU_2004_TECHREPORT_GGDLMSPRS_157392, AUTHOR = {Gavrilidou, M. and Giouli, V. and Desipri, E. and Labropoulou, P. and Monachini, M. and Soria, C. and Picchi, E. and Ruffolo, P. and Sassolini, E.}, TITLE = {Report on the multilingual resources production}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157392}, } @TECHREPORT{GAVRILIDOU_2004_TECHREPORT_GGDMS_157393, AUTHOR = {Gavrilidou, M. and Giouli, V. and Desipri, E. and Monachini, M. and Soria, C.}, TITLE = {Report on the model of LRs production. INTERA}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157393}, } @TECHREPORT{KRAUWER_2004_TECHREPORT_KCB_157394, AUTHOR = {Krauwer, S. and Calzolari, N. and Busemann, S.}, TITLE = {The ELSNET Roadmap for Language and Speech Technology}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157394}, } @TECHREPORT{SABA_2004_TECHREPORT_S_157361, AUTHOR = {Saba, A.}, TITLE = {Lessico del testo “Libro de las longitudes y manera que hasta agora se ha tenido en el arte de navegar, con sus demonstraciones y exemplos}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157361}, } @TECHREPORT{SABA_2004_TECHREPORT_S_157362, AUTHOR = {Saba, A.}, TITLE = {Lessico del testo "Ytinerario de navegación de los mares y tierras occidentales"}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157362}, } @MISC{BOZZI_2004_MISC_B_151527, AUTHOR = {Bozzi, A.}, TITLE = {CHLT-LEMLAT}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/151527}, } @MISC{ENEA_2004_MISC_E_220434, AUTHOR = {Enea, A.}, TITLE = {Gli ARCHIVI FOTOGRAFICI degli Istituti per la storia della Resistenza e della società contemporanea in Italia}, YEAR = {2004}, ABSTRACT = {Sono consultabili le descrizioni di archivi fotografici dell'Istituto nazionale e degli Istituti di Novara, Pavia, Sesto San Giovanni, Torino, e Udine, dell'archivio Albe e Lica Steiner (Politecnico di Milano), del Centro Studi e ricerca Silvio Trentin di Jesolo e del Comune di Corbetta. Le descrizioni dei fondi sono a livello di serie, eccezion fatta per l'Istituto di Torino dove sono descritti i singoli documenti con la riproduzione delle immagini.}, KEYWORDS = {archivistica, beni culturali}, URL = {http://www.reteparri.it/risorse-on-line/servizi-archivistici}, } @MISC{SABA_2004_MISC_S_157363, AUTHOR = {Saba, A.}, TITLE = {Ytinerario de navegación de los mares y tierras occidentales}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157363}, } @MISC{SABA_2004_MISC_SC_157395, AUTHOR = {Saba, A. and Carpi, E.}, TITLE = {Suma de Geographía que trata de todas las partidas y provincias del mundo, en especial de las Indias}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157395}, } @MISC{SABA_2004_MISC_SG_157396, AUTHOR = {Saba, A. and García Macho, M. L.}, TITLE = {Instrución náuthica para el buen uso y regimiento de las naos, [. ]}, YEAR = {2004}, URL = {https://publications.cnr.it/doc/157396}, } @ARTICLE{ALLEGRINI_2003_ARTICLE_AAGPR_169343, AUTHOR = {Allegrini, P. and Aquino, G. and Grigolini, P. and Palatella, L. and Rosa, A.}, TITLE = {Generalized master equation via aging continuous-time random walks}, YEAR = {2003}, ABSTRACT = {We discuss the problem of the equivalence between continuous-time random walk (CTRW) and generalized master equation (GME). The walker, making instantaneous jumps from one site of the lattice to another, resides in each site for extended times. The sojourn times have a distribution density (t) that is assumed to be an inverse power law with the power index µ. We assume that the Onsager principle is fulfilled, and we use this assumption to establish a complete equivalence between GME and the Montroll-Weiss CTRW. We prove that this equivalence is confined to the case where (t) is an exponential. We argue that is so because the Montroll-Weiss CTRW, as recently proved by Barkai [E. Barkai, Phys. Rev. Lett. 90, 104101 (2003)], is nonstationary, thereby implying aging, while the Onsager principle is valid only in the case of fully aged systems. The case of a Poisson distribution of sojourn times is the only one with no aging associated to it, and consequently with no need to establish special initial conditions to fulfill the Onsager principle. We consider the case of a dichotomous fluctuation, and we prove that the Onsager principle is fulfilled for any form of regression to equilibrium provided that the stationary condition holds true. We set the stationary condition on both the CTRW and the GME, thereby creating a condition of total equivalence, regardless of the nature of the waiting-time distribution. As a consequence of this procedure we create a GME that is a bona fide master equation, in spite of being non-Markov. We note that the memory kernel of the GME affords information on the interaction between system of interest and its bath. The Poisson case yields a bath with infinitely fast fluctuations. We argue that departing from the Poisson form has the effect of creating a condition of infinite memory and that these results might be useful to shed light on the problem of how to unravel non-Markov quantum master equations. ©2003 The American Physical Society}, PAGES = {056123-056134}, URL = {https://publications.cnr.it/doc/169343}, VOLUME = {68}, } @ARTICLE{ALLEGRINI_2003_ARTICLE_ABCGHMPR_30858, AUTHOR = {Allegrini, P. and Balocchi, R. and Chillemi, S. and Grigolini, P. and Hamilton, P. and Maestri, R. and Palatella, L. and Raffaelli, G.}, TITLE = {Long-and short-term analysis of heartbeat sequences: Correlation with mortality risk in congestive heart failure patients}, YEAR = {2003}, ABSTRACT = {We analyze RR heartbeat sequences with a dynamic model that satisfactorily reproduces both the long- and the short-time statistical properties of heart beating. These properties are expressed quantitatively by means of two significant parameters, the scaling concerning the asymptotic effects of long-range correlation, and the quantity 1– establishing the amount of uncorrelated fluctuations. We find a correlation between the position in the phase space (,) of patients with congestive heart failure and their mortality risk.}, PAGES = {062901-062905}, URL = {https://publications.cnr.it/doc/30858}, VOLUME = {67}, } @ARTICLE{ALLEGRINI_2003_ARTICLE_ABGHIMPRSVY_171128, AUTHOR = {Allegrini, P. and Benci, V. and Grigolini, P. and Hamilton, P. and Ignaccolo, M. and Menconi, G. and Palatella, L. and Raffaelli, G. and Scafetta, N. and Virgilio, M. and Yang, J.}, TITLE = {Compression and diffusion: a joint approach to detect complexity}, YEAR = {2003}, ABSTRACT = {The adoption of the Kolmogorov–Sinai entropy is becoming a popular research tool among physicists, especially when applied to a dynamical system fitting the conditions of validity of the Pesin theorem. The study of time series that are a manifestation of system dynamics whose rules are either unknown or too complex for a mathematical treatment, is still a challenge since the KS entropy is not computable, in general, in that case. Here we present a plan of action based on the joint action of two procedures, both related to the KS entropy, but compatible with computer implementation through fast and efficient programs. The former procedure, called compression algorithm sensitive to regularity (CASToRE), establishes the amount of order by the numerical evaluation of algorithmic compressibility. The latter, called complex analysis of sequences via scaling and randomness assessment (CASSANDRA), establishes the complexity degree through the numerical evaluation of the strength of an anomalous effect. This is the departure, of the diffusion process generated by the observed fluctuations, from ordinary Brownian motion. The CASSANDRA algorithm shares with CASToRE a connection with the Kolmogorov complexity. This makes both algorithms especially suitable to study the transition from dynamics to thermodynamics, and the case of non-stationary time series as well. The benefit of the joint action of these two methods is proven by the analysis of artificial sequences with the same main properties as the real time series to which the joint use of these two methods will be applied in future research work}, PAGES = {517-535}, URL = {https://publications.cnr.it/doc/171128}, VOLUME = {15}, } @ARTICLE{ALLEGRINI_2003_ARTICLE_AMP_64466, AUTHOR = {Allegrini, P. and Montemagni, S. and Pirrelli, V.}, TITLE = {Example-based automatic induction of semantic classes through entropic scores}, YEAR = {2003}, ABSTRACT = {Abstract - The paper deals in some detail with the application of examplebased machine learning techniques to the task of automatically acquiring semantic information from functionally annotated texts. Special emphasis is placed on the use of “analogical proportions” as a means of structuring the knowledge embodied in attested examples, and weighing up their contribution to a variety of lexico-semantic classification tasks. Careful quantitative analysis of automatically acquired information proves to shed considerable light on the semantic inter-connectivity of input data, their structure and organising principles.}, PAGES = {1-45}, URL = {https://publications.cnr.it/doc/64466}, VOLUME = {16-17}, } @ARTICLE{BOZZI_2003_ARTICLE_B_64465, AUTHOR = {Bozzi, A.}, TITLE = {Aspetti e problemi di spoglio elettronico di un archivio testuale: il caso dei Grammatici Latini antichi}, YEAR = {2003}, KEYWORDS = {Latino, Linguistica, Base di Dati, Filologia, Grammatica}, PAGES = {533-550}, URL = {https://publications.cnr.it/doc/64465}, VOLUME = {31}, } @ARTICLE{BOZZI_2003_ARTICLE_BC_64471, AUTHOR = {Bozzi, A. and Corradini, M. S.}, TITLE = {The Diphilos workstation: a computational system for digital philology}, YEAR = {2003}, ABSTRACT = {Abstract - Digital technology development and the conversion of ancient source documents in digital format allows to design software tools for philological disciplines. The Philological Workstation is able to: 1) manage images and texts; 2) perform an automatic link between each word of the manually transcribed manuscripts and the image-zones where the words are located; 3) associate annotations and variants to the text or to the image; 4) prepare indexes and concordances. A special module is also available to record the critical apparatus information allowing the user to evaluate the different typology each variant is dealing with. A multidimensional scaling algorithm shows the dependence between the collated sources in a 3D space. With regard to the ancient printed books, the workstation is able to train a specific neural system for automatic interpretation and transcription of the text, which, for the Latin language, is verified and corrected by a linguistic spelling checker.}, PAGES = {47-77}, URL = {https://publications.cnr.it/doc/64471}, VOLUME = {16-17}, } @ARTICLE{CALZOLARI_2003_ARTICLE_C_64472, AUTHOR = {Calzolari, N.}, TITLE = {Corpus-based lexicon building: an overview across projects, problems, approaches}, YEAR = {2003}, ABSTRACT = {Abstract - The paper aims at providing an overview of the current situation with respect to the interaction between two different but connected language resources, i.e. lexicons and corpora. The connection has become more and more evident in the field of natural language processing in the last years owing to a series of converging factors, such as the availability of increasingly larger on-line corpora, the trend to use corpus evidence in “printed” lexicography, the presence of more robust automatic tools for corpus analysis, annotation, and extraction of information, the need for computational lexicons to adhere to real usage of language as evidenced in corpora. I shall illustrate some issues regarding the corpus-lexicon relation as it emerges in particular from several representative European projects regarding both the construction of large-scale harmonised resources to be used for various applicative purposes, also of multilingual nature, and the acquisition of lexical information from corpora to enhance and tune existing lexicons. I conclude by hinting at a few issues, related to corpus-lexicon interaction, to be considered a priority in the near future.}, PAGES = {79-116}, URL = {https://publications.cnr.it/doc/64472}, VOLUME = {16-17}, } @ARTICLE{CALZOLARI_2003_ARTICLE_C_64482, AUTHOR = {Calzolari, N.}, TITLE = {Trattamento Automatico della Lingua e Risorse Linguistiche}, YEAR = {2003}, PAGES = {84-89}, URL = {https://publications.cnr.it/doc/64482}, VOLUME = {XXI}, } @ARTICLE{CALZOLARI_2003_ARTICLE_C_64498, AUTHOR = {Calzolari, N.}, TITLE = {"Antonio Zampolli. Una vita per la Linguistica Computazionale"}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/64498}, VOLUME = {2}, } @ARTICLE{CALZOLARI_2003_ARTICLE_CB_64459, AUTHOR = {Calzolari, N. and Bindi, R.}, TITLE = {Acquisition of lexical information from a large textual Italian corpus}, YEAR = {2003}, ABSTRACT = {information others than those usually found in machine readable dictionaries or manually encoded by lexicographers are urgently needed. Different sources must be exploited if we want to overcome the “lexical bottleneck” of Natural Language Processing. Very interesting data can be found by processing large textual corpora, where the actual usage of the language can be truly investigated. These data refer, typically, to various kinds of syntagmatic relations, which are particularly problematic in many NLP applications. The paper describes how this data can be at least partially extracted by processing and analysing large text corpora, with quantitative/statistic methods. We describe two types of quantitative analyses whose aim is to extract information on the strength of association between two words, and on fixed phrases and idioms. We observe how the measure of the association ratio provides quantitative evidence to a number of lexical, syntactic and semantic relationships between word-pairs. One of the claims is that the linguistic information embodied in all these quite different types of lexical collocations can be helpful for lexical disambiguation in analysis and crucial for lexical selection in generation. This is a step towards a more objective lexicography and a more “data-based” linguistics.}, PAGES = {117-131}, URL = {https://publications.cnr.it/doc/64459}, VOLUME = {16-17}, } @ARTICLE{CAMUGLIA_2003_ARTICLE_CCR_64457, AUTHOR = {Camuglia, G. and Camuglia, R. M. and Ribarov, K.}, TITLE = {Computer processing of a Clopen language system: old-church Slavonic}, YEAR = {2003}, ABSTRACT = {Abstract - The aim of this work is to explain and reveal the mutual benefits of computational processing of a dead language and a contemporary language. The dead language considered is Old-Church Slavonic. We shall try to point out that processing of a dead language is important not only for a diachronic study of the language material, and that annotation is not the straightforward process that it may seem to be. We also describe briefly two frameworks for the processing of Old- Church Slavonic: DBT and STIN-O-SANCT.}, PAGES = {133-150}, URL = {http://www.torrossa.it/pages/ipplatform/itemDetails.faces}, VOLUME = {16-17}, DOI = {10.1400/18156}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{CAPPELLI_2003_ARTICLE_CMCC_64480, AUTHOR = {Cappelli, A. and Moretti, L. and Catarsi, M. N. and Caligaris, C.}, TITLE = {Strumenti per l'accesso multilingue al contenuto dell'informazione}, YEAR = {2003}, PAGES = {151-182}, URL = {https://publications.cnr.it/doc/64480}, VOLUME = {16-17}, } @ARTICLE{CAPPELLI_2003_ARTICLE_C_64467, AUTHOR = {Cappelli, G.}, TITLE = {OLISSIPO: strumento per l'estrazione automatica del vocabolario di base}, YEAR = {2003}, PAGES = {183-200}, URL = {https://publications.cnr.it/doc/64467}, VOLUME = {16-17}, } @ARTICLE{CAPPELLI_2003_ARTICLE_CP_64494, AUTHOR = {Cappelli, G. and Passarotti, M.}, TITLE = {LemLat: uno strumento computazionale per l'analisi linguistica del latino. Sviluppo e prospettive}, YEAR = {2003}, PAGES = {519-531}, URL = {https://publications.cnr.it/doc/64494}, VOLUME = {31}, } @ARTICLE{CAROTA_2003_ARTICLE_CP_64474, AUTHOR = {Carota, F. and Prodanof, I.}, TITLE = {A corpus-based account of suffix productivity in Italian}, YEAR = {2003}, PAGES = {201-220}, URL = {https://publications.cnr.it/doc/64474}, VOLUME = {16-17}, } @ARTICLE{CECCOTTI_2003_ARTICLE_CS_64460, AUTHOR = {Ceccotti, M. L. and Sassi, M.}, TITLE = {L'archivio elettronico delle opere di Carlo Emilio Gadda. Da redattori a fruitori di un data base testuale}, YEAR = {2003}, PAGES = {221-250}, URL = {https://publications.cnr.it/doc/64460}, VOLUME = {16-17}, } @ARTICLE{CECCOTTI_2003_ARTICLE_CS_64495, AUTHOR = {Ceccotti, M. L. and Sassi, M.}, TITLE = {L'Archivio elettronico delle Opere di Carlo Emilio Gadda in DBT 2000: risultati e prospettive" (The Electronic Archive of Carlo Emilio Gadda's Works: results and prospects)}, YEAR = {2003}, KEYWORDS = {Strumenti lessicali, Database gaddiano, Letteratura italiana, Informatica uman, Carlo Emilio Gadda}, URL = {https://publications.cnr.it/doc/64495}, VOLUME = {SupII}, } @ARTICLE{CIGNONI_2003_ARTICLE_CC_64458, AUTHOR = {Cignoni, L. and Coffey, S.}, TITLE = {At the interface of onomastics and phraseology. Multiword units as proper names, proper names as 'common' phrasal units}, YEAR = {2003}, PAGES = {251-262}, URL = {https://publications.cnr.it/doc/64458}, VOLUME = {16-17}, } @ARTICLE{CIGNONI_2003_ARTICLE_CC_64483, AUTHOR = {Cignoni, L. and Coffey, S.}, TITLE = {Considerations emerging from a frequency study of multiword units in a corpus of contemporary written Italian}, YEAR = {2003}, PAGES = {263-283}, URL = {https://publications.cnr.it/doc/64483}, VOLUME = {16-17}, } @ARTICLE{FERRARI_2003_ARTICLE_FP_64488, AUTHOR = {Ferrari, G. and Prodanof, I.}, TITLE = {Computational modelling of tutorial dialogue}, YEAR = {2003}, PAGES = {285-322}, URL = {https://publications.cnr.it/doc/64488}, VOLUME = {16-17}, } @ARTICLE{GIORGETTI_2003_ARTICLE_GS_170365, AUTHOR = {Giorgetti, D. and Sebastiani, F.}, TITLE = {Automating survey coding by multiclass text categorization techniques}, YEAR = {2003}, ABSTRACT = {Survey coding is the task of assigning a symbolic code from a predefined set of such codes to the answer given in response to an open-ended question in a questionnaire (aka survey). This task is usually carried out to group respondents according to a predefined scheme based on their answers. Survey coding has several applications, especially in the social sciences, ranging from the simple classification of respondents to the extraction of statistics on political opinions, health and lifestyle habits, customer satisfaction, brand fidelity, and patient satisfaction. Survey coding is a difficult task, because the code that should be attributed to a respondent based on the answer she has given is a matter of subjective judgment, and thus requires expertise. It is thus unsurprising that this task has traditionally been performed manually, by trained coders. Some attempts have been made at automating this task, most of them based on detecting the similarity between the answer and textual descriptions of the meanings of the candidate codes. We take a radically new stand, and formulate the problem of automated survey coding as a text categorization problem, that is, as the problem of learning, by means of supervised machine learning techniques, a model of the association between answers and codes from a training set of precoded answers, and applying the resulting model to the classification of new answers. In this article we experiment with two different learning techniques: one based on naive Bayesian classification, and the other one based on multiclass support vector machines, and test the resulting framework on a corpus of social surveys. The results we have obtained significantly outperform the results achieved by previous automated survey coding approaches.}, KEYWORDS = {survey coding, text classification, machine learning, information retrieva}, PAGES = {1269-1277}, URL = {https://publications.cnr.it/doc/170365}, VOLUME = {54}, PUBLISHER = {John Wiley \& Sons (New York, N. Y, Stati Uniti d'America)}, ISSN = {1532-2882}, JOURNAL = {Journal of the American Society for Information Science and Technology (Print)}, } @ARTICLE{LENCI_2003_ARTICLE_LCZ_64475, AUTHOR = {Lenci, A. and Calzolari, N. and Zampolli, A.}, TITLE = {SIMPLE: plurilingual semantic lexicons for natural language processing}, YEAR = {2003}, PAGES = {323-352}, URL = {https://publications.cnr.it/doc/64475}, VOLUME = {16-17}, } @ARTICLE{LENCI_2003_ARTICLE_LMP_64476, AUTHOR = {Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Chunk-it. An Italian shallow parser for robust syntactic annotation}, YEAR = {2003}, PAGES = {353-386}, URL = {https://publications.cnr.it/doc/64476}, VOLUME = {16-17}, } @ARTICLE{MARINELLI_2003_ARTICLE_M_64461, AUTHOR = {Marinelli, R.}, TITLE = {Per una storia dell'archivio testuale dell'Istituto di Linguistica Computazionale: dati e gestione del catalogo informatizzato}, YEAR = {2003}, ABSTRACT = {Over the last few years there has been much discussion about the relation between types of data, storage devices and system requirements for the preservation of data. This work carried out for the reorganization of the magnetic Archive of the Institute of Computational Linguistics (ILC), is an example of interaction between data management and recovery methods, as well as between data recording system development and evolution of storage devices. Data structure has also been affected by the technological evolution. It has been necessary to rely on an efficient, well-structured and tested data-base management system. Thus a computerized system was implemented for the joint management of normalized cataloguing files and bibliographic data, using the Information Retrieval System CDS/ISIS. A suitable description corresponding to standard criteria is necessary for a reliable tracing of the documents. The correct use of international standards like ISO to process traditional bibliographic information assures the accessibility, readability and consistency of the data.}, KEYWORDS = {databases, information retrieval systems, textual archives}, PAGES = {387-399}, URL = {https://publications.cnr.it/doc/64461}, VOLUME = {16-17}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{MARINELLI_2003_ARTICLE_MBBGMOPRCZ_64468, AUTHOR = {Marinelli, R. and Biagini, L. and Bindi, R. and Goggi, S. and Monachini, M. and Orsolini, P. and Picchi, E. and Rossi, S. and Calzolari, N. and Zampolli, A.}, TITLE = {The Italian PAROLE corpus: an overview}, YEAR = {2003}, PAGES = {401-421}, URL = {https://publications.cnr.it/doc/64468}, VOLUME = {16-17}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{MEGA_2003_ARTICLE_MAGLPRV_168325, AUTHOR = {Mega, M. S. and Allegrini, P. and Grigolini, P. and Latora, V. and Palatella, L. and Rapisarda, A. and Vinciguerra, S.}, TITLE = {Power-Law Time Distribution of Large Earthquakes}, YEAR = {2003}, ABSTRACT = {We study the statistical properties of time distribution of seismicity in California by means of a new method of analysis, the diffusion entropy. We find that the distribution of time intervals between a large earthquake (the main shock of a given seismic sequence) and the next one does not obey Poisson statistics, as assumed by the current models. We prove that this distribution is an inverse power law with an exponent µ = 2.06±0.01. We propose the long-range model, reproducing the main properties of the diffusion entropy and describing the seismic triggering mechanisms induced by large earthquakes.}, KEYWORDS = {Scaling detection, main-shocks, diffusion entropy}, URL = {https://publications.cnr.it/doc/168325}, VOLUME = {90}, PUBLISHER = {American Physical Society ([Woodbury, N. Y., etc. ], Stati Uniti d'America)}, ISSN = {0031-9007}, JOURNAL = {Physical review letters (Print)}, } @ARTICLE{MONACHINI_2003_ARTICLE_MC_64489, AUTHOR = {Monachini, M. and Calzolari, N.}, TITLE = {Methods for standardization: the case of morphosyntax within the EAGLES project}, YEAR = {2003}, PAGES = {423-460}, URL = {https://publications.cnr.it/doc/64489}, VOLUME = {16-17}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{MONTEMAGNI_2003_ARTICLE_MBBCCLPZFMRBPSZMPD_64477, AUTHOR = {Montemagni, S. and Barsotti, F. and Battista, M. and Calzolari, N. and Corazzari, O. and Lenci, A. and Pirrelli, V. and Zampolli, A. and Fanciulli, F. and Massetani, M. and Raffaelli, R. and Basili, R. and Pazienza, M. T. and Saracino, D. and Zanzotto, F. and Mana, N. and Pianesi, F. and Delmonte, R.}, TITLE = {The syntactic-semantic Treebank of Italian. An Overview}, YEAR = {2003}, PAGES = {461-492}, URL = {https://publications.cnr.it/doc/64477}, VOLUME = {16-17}, } @ARTICLE{MONTEMAGNI_2003_ARTICLE_MPB_64478, AUTHOR = {Montemagni, S. and Picchi, E. and Biagini, L.}, TITLE = {DBT-ALT: a system for storing and querying the data of the 'Atlante Linguistico Toscano'}, YEAR = {2003}, ABSTRACT = {Abstract - Computers can help dialectologists to make full use of the information they have so laboriously and painstakingly acquired: the basic dimensions of dialectal research can be enlarged and its possible outcomes can become more sophisticated. In this paper, we describe a lexical database for dialectal data, DBT-ALT, which has been designed and constructed to contain linguistic data collected for the Atlante Lessicale Toscano (ALT), a lexical atlas of Tuscany. DBT-ALT is illustrated in detail, with particular emphasis on its search functions which allow for complex queries taking into account a wide range of parameters interactively defined by the user on the basis of his/her research interests.}, PAGES = {493-517}, URL = {https://publications.cnr.it/doc/64478}, VOLUME = {18-19}, } @ARTICLE{PARDELLI_2003_ARTICLE_P_64490, AUTHOR = {Pardelli, G.}, TITLE = {BIBLOS: historical, philosophical and philological digital library of the Italian National Research Council}, YEAR = {2003}, ABSTRACT = {The BIBLOS project was established in 1996 for the purpose of creating an Internet site which would combine and organise all the information gathered by the various branches of the National Research Council (CNR) relating to the Humanities. The browsing system is based on a subject catalogue which represents the main access to the file, and which includes information on three different research topics: linguistics, philosophy and antiquities. The Istitutes have already made available the information collected from their research activity and the databases which have been developed, usually in the form of bibliographic catalogues and specialised bibliographies.}, KEYWORDS = {IT for Library, Biblioteche virtuali, Documentazione, Catalogazione, CNR}, PAGES = {519-549}, URL = {https://publications.cnr.it/doc/64490}, VOLUME = {Anno XVIII-XIX, 1998-1999}, DOI = {10.1400/18171}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{PASSAROTTI_2003_ARTICLE_P_64497, AUTHOR = {Passarotti, M.}, TITLE = {La lemmatizzazione. Cos'è, perché si deve fare, come io credo convenga farla}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/64497}, } @ARTICLE{PECCHIA_2003_ARTICLE_PG_64473, AUTHOR = {Pecchia, L. and Guazzini, E.}, TITLE = {An Italian children's corpus of spoken language}, YEAR = {2003}, ABSTRACT = {Abstract - In this paper we describe the criteria adopted for the creation of a corpus of spoken language produced by six-to-eleven-year-old children in different communicative situations, the methodology used for the collection of the data, the transcription, coding and lemmatization phases. This work is to be included among the activities carried out within the framework of the “Corpus di Linguaggio Infantile” (CLI), a special project of the Italian National Research Council (CNR).}, PAGES = {547-572}, URL = {https://publications.cnr.it/doc/64473}, VOLUME = {18-19}, } @ARTICLE{PETERS_2003_ARTICLE_PP_168362, AUTHOR = {Peters, C. and Picchi, E.}, TITLE = {Bilingual lexicons, parallel and comparable corpora: creating the basis for cross language information retrieval}, YEAR = {2003}, ABSTRACT = {Abstract - We summarise our work over the last decade aimed at the design and development of a series of tools studied for use in applications such as language learning, translation studies and bilingual lexicography. The different components of an integrated system for bilingual lexical and textual database management are outlined. Our final goal has been the implementation of a web-based system for crosslanguage information retrieval.}, KEYWORDS = {Cross-language information retrieval, Multilingual corpora, Bilingual lexicography, Translation studies, Second language learning}, PAGES = {573-596}, URL = {https://publications.cnr.it/doc/168362}, VOLUME = {18-19}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{PICCHI_2003_ARTICLE_P_64492, AUTHOR = {Picchi, E.}, TITLE = {PiSystem: sistemi integrati per l'analisi testuale}, YEAR = {2003}, ABSTRACT = {Abstract - This paper provides an overview of the textual and lexical analysis tools implemented at the Institute of Computational Linguistics, which reflect the development of the studies and applications of the Institute from the pioneer stage of lexicography to its current state of progress. The analysis procedures coordinated and integrated in a system called PiSystem are presented, starting from the base element, DBT (Database Testuale), an analysis query system of textual material, with its correlated base functions. The procedures include the following: a) analysis of entire textual corpora; b) new international coding; d) text classification/lemmatization; computer-assisted lemmatization; automatic lemmatization; analysis, navigation and retrieval of linguistic information for lemmatized texts. DBT-DIG, a system specifically designed to deal with Digital Libraries (textual material in character and/or image format), with particular regard to the collection of periodicals available in libraries, is also presented. Other components of the Pi-System are illustrated in detail in articles in this volume: handling of multilingual environments; treatment of bilingual (Italian-Arabic) material; processing, analysis and navigation within the dialectal ALT (Atlante Lessicale Toscano) archive.}, PAGES = {597-627}, URL = {https://publications.cnr.it/doc/64492}, VOLUME = {18-19}, } @ARTICLE{PICCHI_2003_ARTICLE_PSNC_64493, AUTHOR = {Picchi, E. and Sassolini, E. and Nahli, O. and Cucurullo, S.}, TITLE = {Risorse monolingui e multilingui. Corpus bilingue italiano-arabo}, YEAR = {2003}, ABSTRACT = {Abstract - The objective of the project is twofold: on the one hand, the creation and elaboration of software procedures for the Arabic language and, on the other hand, the creation of linguistic resources for the management of large Arabic corpora. The linguistic resources are substantially the following: a) Morphological engine for the Arabic language. The engine is constituted by a number of modules: the algorithms and modules for generation and analysis, an appropriate encoding system for the representation of lexical data and of morphological characteristics of Arabic, the so-called “lemmario”, i.e. the archive of lemmas; b) The automatic alignment of parallel texts in Italian and Arabic language; c) Automatic tagging of Arabic texts, performed by using the above morphological engine; d) Systems for accessing and querying (raw and/or tagged) Arabic texts and parallel Italian-Arabic corpora.}, KEYWORDS = {Morfologia araba, Corpora bilingui, Analisi testuale, Aligner, Tagger}, PAGES = {629-678}, URL = {https://publications.cnr.it/doc/64493}, VOLUME = {18-19}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Ghezzano La Fontina, Italia)}, ISSN = {1824-1573}, JOURNAL = {Linguistica computazionale (Online)}, } @ARTICLE{PIRRELLI_2003_ARTICLE_PB_64462, AUTHOR = {Pirrelli, V. and Battista, M.}, TITLE = {Syntagmatic and paradigmatic issues in computational morphology}, YEAR = {2003}, ABSTRACT = {Abstract - In this paper some germane theoretical issues in inflectional morphology will be addressed from a computational point of view. In particular we shall focus on the proper treatment of verb stem allomorphy in Italian conjugation and discuss several different formal solutions in some detail. To put our discussion on a more computational footing, all our examples are illustrated by using the DATR formalism as our metalanguage. This allows us to combine the advantages of the advanced expressive power and flexibility of DATR with the further bonus of offering a running piece of program code that actually works on the discussed examples. The upshot of the paper is that a computational treatment of Italian conjugation can considerably benefit from recent theoretical advances in word and paradigm morphology, as this level of description allows the rule writer to capture generalizations which would otherwise completely elude a purely syntagmatic approach to allomorphy.}, PAGES = {679-701}, URL = {https://publications.cnr.it/doc/64462}, VOLUME = {18-19}, } @ARTICLE{PRODANOF_2003_ARTICLE_PCM_64484, AUTHOR = {Prodanof, I. and Cappelli, A. and Moretti, L.}, TITLE = {Resources and tools: experiences in language engineering}, YEAR = {2003}, ABSTRACT = {Abstract - In this paper notions such as resources, tools and reusability related to the design and implementation of NLP applications at low costs will be discussed through examples from projects carried out within the frame of European programmes. The components embedded (dictionary and parser) and the ways in which these are ‘reused’ will be presented.}, PAGES = {703-744}, URL = {https://publications.cnr.it/doc/64484}, VOLUME = {18-19}, } @ARTICLE{ROVENTINI_2003_ARTICLE_RABCCGMMSZ_64479, AUTHOR = {Roventini, A. and Alonge, A. and Bertagna, F. and Calzolari, N. and Cancila, J. and Girardi, C. and Magnini, B. and Marinelli, R. and Speranza, M. and Zampolli, A.}, TITLE = {ItalWordNet: building a large semantic database for the automatic treatment of Italian}, YEAR = {2003}, ABSTRACT = {Abstract - This paper describes the main characteristics of the ItalWordNet semantic database, built in the context of the SI-TAL Italian National Project, within which a set of integrated resources and tools for the automatic treatment of the Italian language was realized. The database was created by extending the Italian wordnet developed within the EuroWordNet project, by adding: i) adjectives, adverbs and proper nouns (not dealt with in EuroWordNet); ii) a terminological subset related to the economic-financial domain. The relevant changes involved by these extensions both in the linguistic model and in the data structure are also illustrated. In particular, we discuss: i) the overall architecture of the database; ii) the semantic relations used to encode information on synsets; iii) the changes made to the EuroWordNet Top Ontology structure; iv) the specific characteristics of the terminological subset and the solutions adopted to link it to the generic wordnet. Keywords - synset, semantic database, wordnet, semantic}, KEYWORDS = {Database lessicale, Rete semantica, Relazioni semantiche, Risorse linguistiche}, PAGES = {745-791}, URL = {https://publications.cnr.it/doc/64479}, VOLUME = {18-19}, } @ARTICLE{RUIMY_2003_ARTICLE_RCGSCZ_64469, AUTHOR = {Ruimy, N. and Corazzari, O. and Gola, E. and Spanu, A. and Calzolari, N. and Zampolli, A.}, TITLE = {The PAROLE model and the Italian Syntactic lexicon}, YEAR = {2003}, ABSTRACT = {Abstract - This paper presents an overview of a large scale Syntactic Computational Lexicon of Italian. This lexicon was elaborated in the framework of the EC funded LE-PAROLE project, which developed core, generic and re-usable written language resources in 12 EU languages. All monolingual lexica were built according to the same design principles, same linguistic specifications and representation format. The PAROLE Italian lexicon is representative of modern Italian language use. The entries were selected on a frequency basis from the ILC Corpus and the syntactic structures encoded were partly inferred from their contexts of occurrence. Both the general structure of a PAROLE lexicon and the specificity of its Italian instantiation are presented. Some languagespecific linguistic and lexicographic options concerning crucial issues to a lexicon building process are illustrated. An overview of the syntactic structures encoded for verbs, nouns and adjectives allows lexicon syntactic coverage as well as description fine-grainedness to be estimated.}, PAGES = {793-820}, URL = {https://publications.cnr.it/doc/64469}, VOLUME = {18-19}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{RUIMY_2003_ARTICLE_RMGCDUR_64485, AUTHOR = {Ruimy, N. and Monachini, M. and Gola, E. and Calzolari, N. and Del Fiorentino, M. C. and Ulivieri, M. and Rossi, S.}, TITLE = {A computational semantic lexicon of Italian: SIMPLE}, YEAR = {2003}, ABSTRACT = {Abstract - This paper describes the Italian Semantic Computational Lexicon elaborated in the framework of the European LE-SIMPLE Project. SIMPLE was aimed at adding a layer of semantic information to a subset of PAROLE lexica. The SIMPLE framework is based principally on the Generative Lexicon theory which allows to express the multidimensionality of meaning by means of ‘qualia structure’. Word senses are described according to their position within the SIMPLE ontology, which is based on the principle of orthogonal inheritance and consists of semantic types for characterizing simple nouns, event and property denoting lexical units. The encoding process is guided by templates, that are schematic structures containing clusters of structured information specific to each semantic type. Besides a high degree of granularity of meaning representation, the SIMPLE lexicon presents innovative aspects such as link between the syntactic and semantic levels of information, description of predicative representation and enforcement of selectional restrictions/preferences on arguments.}, PAGES = {821-864}, URL = {https://publications.cnr.it/doc/64485}, VOLUME = {18-19}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{SABA_2003_ARTICLE_SC_64463, AUTHOR = {Saba, A. and Cappelli, G.}, TITLE = {Morfsin and AyDA: two systems for analyzing modern and old Spanish}, YEAR = {2003}, ABSTRACT = {Abstract - In this work we shall present two software systems for automatic text analysis developed at the Institute of Computational Linguistics (ILC) in Pisa. Although originally designed for modern Spanish texts, both can be applied, with appropriate modifications, to Old Spanish. The first one, a mainframe application called Morfsin©, was released in its final version in 1986. The second, developed for the IBMcompatible PC platform, stems from the earlier work on Morfsin and is known by the acronym AyDA (Analyzer and Automatic Disambiguator). AyDA represents a useful tool for linguistics specialists in that it reduces the need to manually tag words morphologically. Moreover, it can provide quite a high degree of automatic disambiguation of functional homographs in a text. The final outcome depends on the number of words recognized, which depends, in turn, on the lexis included in the lookup dictionaries and the type of text being analyzed - modern or old, literary prose or specialized language, etc. This paper provides a description of the basic structure of the two systems, with particular emphasis on AyDA, and reports the results obtained in applying AyDA to texts in Old Spanish.}, PAGES = {865-900}, URL = {https://publications.cnr.it/doc/64463}, VOLUME = {18-19}, } @ARTICLE{SASSI_2003_ARTICLE_SA_64464, AUTHOR = {Sassi, M. and Amoroso, Y.}, TITLE = {Letteratura, diritto e linguistica computazionale. Panorama delle collaborazioni Italia-Cuba}, YEAR = {2003}, ABSTRACT = {Abstract - These notes offer an outline of the collaborations started in 1995 by the Institute of Computational Linguistics (ILC) of Pisa, with some Cuban Scientific Institutions and expanded in several research sectors in the subsequent years. As regards Automatized Lexicography, we propose here a brief description of the results and prospects of work carried out with CEM (Centro de Estudios Martianos), FAC (Fundación Alejo Carpentier) and ILL (Instituto de Literatura y Lingüística). In 1996, as a result of the cooperation of ILC with CEM with regard to methodology and textual codification, we started the creation of an electronic archive of the complete works of Jose Martí (27 tomes). In 1997, in collaboration with FAC, ILC put to practical use the previous experiences for the creation of an electronic archive of the complete works of Alejo Carpentier. In 2002, the collaboration between ILL and SCDI (Sociedad Cubana de Derecho e Informática de la Unión di Juristas de Cuba), resulted in the creation of the “Diccionario de Jurismática”. As far as Legal Information Science is concerned, we propose a presentation of the objectives which have been reached as well as the projects for the future in the area, relating to the study of languages and legal documents as formulated by ILC and SCDI.}, PAGES = {901-924}, URL = {https://publications.cnr.it/doc/64464}, VOLUME = {18-19}, } @ARTICLE{SORIA_2003_ARTICLE_SP_64470, AUTHOR = {Soria, C. and Pirrelli, V.}, TITLE = {A multi-level annotation meta-scheme for dialogue acts}, YEAR = {2003}, ABSTRACT = {Abstract - This article describes a new principled framework for comparison, design and standardization of annotation schemes for dialogue acts. Previous attempts at comparing existing schemes in order to identify a common core of generally agreed-upon dialogue acts share the assumption that tags belonging to different schemes and describing the same general phenomena can always be related through hypo- or hyperonymy relationships. Consequently, general-purpose schemes have often been the result of a merger of different tag sets. In this article, we show the extent to which comparability of different annotation schemes is prevented by the very limited tag inter-translatability. We thus describe an alternative approach to the comparison of dialogue act taxonomies based on a compositional analysis of tags according to independent classificatory dimensions. The framework takes a recognition-based approach to dialogue tagging and defines four independent taxonomies of tags, one for each orthogonal dimension of linguistic and contextual analysis assumed to have a bearing on identification of dialogue acts. We also show how the same framework can be used to design a generalpurpose annotation scheme which combines the features of generality and expressivity by exploiting a modular structure. The advantages and limitations of this proposal over other previous attempts are discussed and concretely exemplified.}, KEYWORDS = {dialogue acts, annotation scheme, pragmatics}, PAGES = {925-952}, URL = {https://publications.cnr.it/doc/64470}, VOLUME = {18-19}, PUBLISHER = {Giardini editori e stampatori (Pisa, Italia)}, ISSN = {0392-6907}, JOURNAL = {Linguistica computazionale (Testo stamp.)}, } @ARTICLE{TURRINI_2003_ARTICLE_TCP_64481, AUTHOR = {Turrini, G. and Cignoni, L. and Paccosi, A.}, TITLE = {From a children's dictionary to a hypermedia laboratory for language learning}, YEAR = {2003}, ABSTRACT = {Abstract - This paper describes Addizionario, a software tool patented by the National Research Council (CNR.) of Italy, addressed to pre- and primary school children for the study of Italian as native or second language. The different stages of development of the system are described which range from a preliminary phase of collection of children’s data to the implementation of a multimedia Dictionary, to the final stage of a language laboratory in which an Activity Book interacting with the Dictionary has been added to the system. The multilingual version of Addizionario recently developed is used in European Socrates-Comenius projects to foster the learning of a native and foreign language. Furthermore, the particular features of the tool, which is child-centred, open, flexible and easy-to-use, make it suitable not only for normal users, but also for those presenting learning difficulties linked to physical or cognitive impairment. Keywords - hypermedia, children’s dictionaries, creativity, language learning, impairment 1. INTRODUCTION This paper describes the most interesting features of Addizionario, an integrated set of tools designed to support pre- and primary school children in the study of Italian as native or as second language at various levels of difficulty and from various points of view. The software, implemented at the Institute of Computational Linguistics (ILC) in Pisa, in collaboration with the Department of Computer Sciences of Turin University, and patented by the Italian National Research Council (CNR) of Italy, reflects the ideas that the ILC group Language Teaching and Information Technologies involved in the study and implementation of information tools for language teaching, has expressed over the last decade.}, PAGES = {953-969}, URL = {https://publications.cnr.it/doc/64481}, VOLUME = {18-19}, } @ARTICLE{ZAMPOLLI_2003_ARTICLE_Z_64486, AUTHOR = {Zampolli, A.}, TITLE = {Le principali attività dell'Istituto di Linguistica Computazionale}, YEAR = {2003}, PAGES = {xvii-lxx}, URL = {https://publications.cnr.it/doc/64486}, VOLUME = {16-17}, } @ARTICLE{ZAMPOLLI_2003_ARTICLE_ZCC_64496, AUTHOR = {Zampolli, A. and Calzolari, N. and Cignoni, L.}, TITLE = {Foreword}, YEAR = {2003}, PAGES = {xiii-xv}, URL = {https://publications.cnr.it/doc/64496}, VOLUME = {16-17}, } @BOOK{ZAMPOLLI_2003_BOOK_ZCC_136418, AUTHOR = {Zampolli, A. and Calzolari, N. and Cignoni, L.}, TITLE = {Computational Linguistics in Pisa-Linguistica Computazionale a Pisa}, YEAR = {2003}, KEYWORDS = {Linguistica Comput, Tecnologie linguist, NLP, Risorse Linguistiche, Applicazioni Ling}, URL = {https://publications.cnr.it/doc/136418}, } @INCOLLECTION{ALLEGRINI_2003_INCOLLECTION_ALMP_136427, AUTHOR = {Allegrini, P. and Lenci, A. and Montemagni, S. and Pirrelli, V.}, TITLE = {Le forme del significato. Acquisizione e rappresentazione dell'informazione semantica}, YEAR = {2003}, KEYWORDS = {Acquisizione, Semantica Lessicale, Ontologia, Machine Learning}, URL = {https://publications.cnr.it/doc/136427}, } @INCOLLECTION{BOZZI_2003_INCOLLECTION_B_136420, AUTHOR = {Bozzi, A.}, TITLE = {Digital documents and computational philology: the Digital Philology System DiPhiloS}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/136420}, PUBLISHER = {Olschki (Firenze, ITA)}, } @INCOLLECTION{BOZZI_2003_INCOLLECTION_BR_136419, AUTHOR = {Bozzi, A. and Raggioli, A.}, TITLE = {Tecnologia digitale negli Istituti Culturali: un case study}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/136419}, } @INCOLLECTION{CALZOLARI_2003_INCOLLECTION_CZ_136431, AUTHOR = {Calzolari, N. and Zampolli, A.}, TITLE = {The EAGLES/ISLE Initiative for Setting Standards: the Computational Lexicon Working Group for Multilingual Lexicons}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/136431}, } @INCOLLECTION{CALZOLARI_2003_INCOLLECTION_CZL_136421, AUTHOR = {Calzolari, N. and Zampolli, A. and Lenci, A.}, TITLE = {Risorse linguistiche per un accesso al 'contenuto'}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/136421}, PUBLISHER = {Angeli (Milano, ITA)}, } @INCOLLECTION{CECCOTTI_2003_INCOLLECTION_CS_157355, AUTHOR = {Ceccotti, M. L. and Sassi, M.}, TITLE = {Sistema}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157355}, } @INCOLLECTION{MONTEMAGNI_2003_INCOLLECTION_MBBCCLZRPMD_136422, AUTHOR = {Montemagni, S. and Barsotti, F. and Battista, M. and Calzolari, N. and Corazzari, O. and Lenci, A. and Zampolli, A. and Raffaelli, R. and Pazienza, M. T. and Mana, N. and Delmonte, R.}, TITLE = {Building the Italian Syntactic-Semantic Treebank}, YEAR = {2003}, KEYWORDS = {Corpora testuali, Annot. sintattica, Annot. semantica, Treebank}, URL = {https://publications.cnr.it/doc/136422}, } @INCOLLECTION{PICCHI_2003_INCOLLECTION_P_136423, AUTHOR = {Picchi, E.}, TITLE = {Esperienze nel settore dell'analisi di corpora testuali: software e strumenti linguistici}, YEAR = {2003}, KEYWORDS = {Analisi testuale, Digital Library, Disambiguazione, Corpora bilingui, Lemmatizzazione}, URL = {https://publications.cnr.it/doc/136423}, PUBLISHER = {Olschki (Firenze, ITA)}, } @INCOLLECTION{PIRRELLI_2003_INCOLLECTION_P_136424, AUTHOR = {Pirrelli, V.}, TITLE = {Machine language learning meets information technology}, YEAR = {2003}, KEYWORDS = {Apprendimento, Sistemi integrati, Semantic web, Machine Learning}, URL = {https://publications.cnr.it/doc/136424}, PUBLISHER = {Angeli (Milano, ITA)}, } @INCOLLECTION{SASSI_2003_INCOLLECTION_S_136433, AUTHOR = {Sassi, M.}, TITLE = {La consultazione dei corpora costituzionali con il DBT}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/136433}, PUBLISHER = {CNR, ITTIG (Firenze, ITA)}, } @INCOLLECTION{ZAMPOLLI_2003_INCOLLECTION_Z_136425, AUTHOR = {Zampolli, A.}, TITLE = {Standards for Language data processing: An Historical overview}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/136425}, PUBLISHER = {Bulzoni (Roma, ITA)}, } @EDITORIAL{CIGNONI_2003_EDITORIAL_CZ_146070, AUTHOR = {Cignoni, L. and Zamorani, N.}, TITLE = {Computational Linguistics in Pisa-Linguistica Computazionale a Pisa. Linguistica Computazionale}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/146070}, } @INPROCEEDINGS{BERTAGNA_2003_INPROCEEDINGS_B_84549, AUTHOR = {Bertagna, F.}, TITLE = {Italian Language Resources in a Question Answering Task}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84549}, CONFERENCE_NAME = {2nd CoLogNET-ElsNET Symposium dedicated to Questions and Answers: Theoretical and Applied Perspectives}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{BUITELAR_2003_INPROCEEDINGS_BDCL_84550, AUTHOR = {Buitelar, P. and Declerck, T. and Calzolari, N. and Lenci, A.}, TITLE = {Language Resources and the Semantic Web}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84550}, CONFERENCE_NAME = {ENABLER/ELSNET Workshop International Roadmap for Language Resources}, CONFERENCE_PLACE = {Utrecht}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{BUITELAR_2003_INPROCEEDINGS_BDCL_84551, AUTHOR = {Buitelar, P. and Declerck, T. and Calzolari, N. and Lenci, A.}, TITLE = {Towards a Language Infrastructure for the Semantic Web}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84551}, CONFERENCE_NAME = {ISWC2003 Workshop on Human Language Technology for the Semantic Web and Web Sercives}, CONFERENCE_PLACE = {Sanibel Island, Florida}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_C_84552, AUTHOR = {Calzolari, N.}, TITLE = {Towards a Gateway between Language Resources and Knowledge Resources}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84552}, CONFERENCE_NAME = {International Conference on Natural Language Processing and Knowledge Engineering}, CONFERENCE_PLACE = {Beijing (Cina)}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_C_84553, AUTHOR = {Calzolari, N.}, TITLE = {Language Resources in the Semantic Web Vision}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84553}, CONFERENCE_NAME = {International Conference on Natural Language Processing and Knowledge Engineering}, CONFERENCE_PLACE = {Beijing (Cina)}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_C_84554, AUTHOR = {Calzolari, N.}, TITLE = {Lexicons and Corpora: between Theory and Practice}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84554}, CONFERENCE_NAME = {VIII Simposio Internacional de Comunicacion Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_C_84555, AUTHOR = {Calzolari, N.}, TITLE = {Risorse Linguistiche per la lingua italiana scritta}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84555}, CONFERENCE_NAME = {Conferenza TIPI-Tecnologie Informatiche nella Promozione della Lingua Italiana}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_C_84557, AUTHOR = {Calzolari, N.}, TITLE = {A State of Art in Computational Linguistics and Language Resources for Language Technology}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84557}, CONFERENCE_NAME = {XVII International Congress of Linguists}, CONFERENCE_PLACE = {Prague}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_CBLM_84556, AUTHOR = {Calzolari, N. and Bertagna, F. and Lenci, A. and Monachini, M.}, TITLE = {New Perspectives for Lexical Resources in the Semantic Web Scenario}, YEAR = {2003}, PAGES = {10-19}, URL = {https://publications.cnr.it/doc/84556}, CONFERENCE_NAME = {GL 2003-Second International Workshop on Generative Approaches to the Lexicon}, CONFERENCE_PLACE = {Genève, Switzerland}, CONFERENCE_DATE = {15-17 May 2003}, EDITOR = {Bouillon, P. and Kanzaki, K.}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_CLQ_84547, AUTHOR = {Calzolari, N. and Lenci, A. and Quochi, V.}, TITLE = {Towards Multiword and Multilingual Lexicons: Between Theory and Practice}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84547}, CONFERENCE_NAME = {Linguistics and Phonetics 2002 Conference}, CONFERENCE_PLACE = {Urayasu (Giappone)}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{CUTUGNO_2003_INPROCEEDINGS_CMMRRR_84566, AUTHOR = {Cutugno, P. and Marconi, L. and Miyares, B. E. and Ratti, D. and Rolando, C. and Ruiz, M. L.}, TITLE = {Thesaurus electrónico Italiano-Español: Instrumento hipertextual para la enseñanza y la traducción automatizada asistida en ambas lenguas}, YEAR = {2003}, PAGES = {585-587}, URL = {https://publications.cnr.it/doc/84566}, CONFERENCE_NAME = {VIII Simposio Internacional de Comunicacion Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{CUTUGNO_2003_INPROCEEDINGS_CMRR_84558, AUTHOR = {Cutugno, P. and Marconi, L. and Ratti, D. and Rolando, C.}, TITLE = {VI e-book: gramática del verbo italiano}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84558}, CONFERENCE_NAME = {VIII Simposio Internacional de Comunicacion Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{CUTUGNO_2003_INPROCEEDINGS_CMRR_84565, AUTHOR = {Cutugno, P. and Marconi, L. and Ratti, D. and Rolando, C.}, TITLE = {The usability of e-book technology for the treatment of linguistic data}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84565}, CONFERENCE_NAME = {Advances in Technology-Based Education: Toward a Knowledge-Based Society II International Conference on Multimedia ICT's in Education}, CONFERENCE_PLACE = {Badajoz (Spagna)}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{GIORGETTI_2003_INPROCEEDINGS_GPS_84560, AUTHOR = {Giorgetti, D. and Prodanof, I. and Sebastiani, F.}, TITLE = {Open-ended survey coding using text categorization techniques}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84560}, CONFERENCE_NAME = {Impact of Technology on the Survey Process}, CONFERENCE_PLACE = {Warwick}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{GIORGETTI_2003_INPROCEEDINGS_GS_171489, AUTHOR = {Giorgetti, D. and Sebastiani, F.}, TITLE = {Multiclass text categorization for automated survey coding}, YEAR = {2003}, ABSTRACT = {Survey coding is the task of assigning a symbolic code from a predefined set of such codes to the answer given in response to an open-ended question in a questionnaire (aka survey). We formulate the problem of automated survey coding as a text categorization problem, i.e. as the problem of learning, by means of supervised machine learning techniques, a model of the association between answers and codes from a training set of pre-coded answers, and applying the resulting model to the classi.cation of new answers. In this paper we experiment with two different learning techniques, one based on naÏve Bayesian classi.cation and the other one based on multiclass support vector machines, and test the resulting framework on a corpus of social surveys. The results we have obtained significantly outperform the results achieved by previous automated survey coding approaches.}, KEYWORDS = {Text categorization, Classifier Design and Evaluation, Learning, Information Search and Retrieval, Sociology}, PAGES = {798-802}, URL = {https://publications.cnr.it/doc/171489}, PUBLISHER = {ACM Press (New York, USA)}, CONFERENCE_NAME = {SAC-03, 18th ACM Symposium on Applied Computing}, CONFERENCE_PLACE = {Melbourne}, CONFERENCE_DATE = {9-12 March 2003}, } @INPROCEEDINGS{GIORGETTI_2003_INPROCEEDINGS_GSP_91138, AUTHOR = {Giorgetti, D. and Sebastiani, F. and Prodanof, I.}, TITLE = {Automatic coding of open-ended surveys using text categorization techniques}, YEAR = {2003}, ABSTRACT = {Open-ended questions do not limit respondents' answers in terms of linguistic form and semantic content, but bring about severe problems in terms of cost and speed, since their coding requires trained professionals to manually identify and tag meaningful text segments. To overcome these problems, a few automatic approaches have been proposed in the past, some based on matching the answer with textual descriptions of the codes, others based on manually building rules that check the answer for the presence or absence of code-revealing words. While the former approach is scarcely effective, the major drawback of the latter approach is that the rules need to be developed manually, and before the actual observation of text data. We propose a new approach, inspired by work in information retrieval (IR), that overcomes these drawbacks. In this approach survey coding is viewed as a task of multiclass text categorization (MTC), and is tackled through techniques originally developed in the .eld of supervised machine learning. In MTC each text belonging to a given corpus has to be classi.ed into exactly one from a set of prede.ned categories. In the supervised machine learning approach to MTC, a set of categorization rules is built automatically by learning the characteristics that a text should have in order to be classified under a given category. Such characteristics are automatically learnt from a set of training examples, i.e. a set of texts whose category is known. For survey coding, we equate the set of codes with categories, and all the collected answers to a given question with texts. Giorgetti and Sebastiani have carried out automatic coding experiments with two di.erent supervised learning techniques, one based on a naÏve Bayesian method and the other based on multiclass support vector machines. Experiments have been run on a corpus of social surveys carried out by the National Opinion Research Center, University of Chicago (NORC). These experiments show that our methods outperform, in terms of accuracy, previous automated methods tested on the same corpus.}, KEYWORDS = {Automatic coding}, PAGES = {173-184}, URL = {https://publications.cnr.it/doc/91138}, CONFERENCE_NAME = {The Impact of Technology on the Survey Process. Fourth International Conference on Survey and Statistical Computing}, CONFERENCE_PLACE = {The Univesity of Warwick, England, UK}, CONFERENCE_DATE = {17-19 September 2003}, } @INPROCEEDINGS{IDE_2003_INPROCEEDINGS_ILC_84561, AUTHOR = {Ide, N. and Lenci, A. and Calzolari, N.}, TITLE = {RDF Instantiation of ISLE/MILE Lexical Entries}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84561}, CONFERENCE_NAME = {ACL 2003 Workshop on Linguistic Annotation: Getting the Model Right}, CONFERENCE_PLACE = {Sapporo}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{LENCI_2003_INPROCEEDINGS_L_84567, AUTHOR = {Lenci, A.}, TITLE = {Computational Lexicons as Resources for Multilingual Content-Based Information Processing}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84567}, CONFERENCE_NAME = {Proceedings of the Workshop on Topics and Perspectives of Natural Language Processing in Italy}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{MARINELLI_2003_INPROCEEDINGS_MRS_84562, AUTHOR = {Marinelli, R. and Roventini, A. and Spadoni, G.}, TITLE = {Linking a subset of maritime terminology to the Italian Wordnet}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84562}, CONFERENCE_NAME = {Third International Conference on Maritime Terminology}, CONFERENCE_PLACE = {Lisbon}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{PICCHI_2003_INPROCEEDINGS_PCCCFSST_84548, AUTHOR = {Picchi, E. and Ceccotti, M. L. and Cignoni, L. and Cucurullo, N. and Fiorentini, G. and Sassi, M. and Sassolini, E. and Turrini, G.}, TITLE = {Linguistic Miner}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84548}, CONFERENCE_NAME = {Congresso annuale AICA 2003: I costi dell'ignoranza e il valore della conoscenza nella società dell'informazione}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{RUIMY_2003_INPROCEEDINGS_RMC_84563, AUTHOR = {Ruimy, N. and Monachini, M. and Calzolari, N.}, TITLE = {Un lexique électronique multi-niveaux de l'italien}, YEAR = {2003}, ABSTRACT = {CLIPS est la plus vaste ressource lexicale électronique de l'italien. Elle comprend 55.000 mots codés à 4 niveaux de description linguistique. La représentation lexicale est basée sur des standards internationaux: CLIPS utilise en effet le même modèle, le même langage de représentation et la même méthodologie que 11 autres lexiques développés au cours des projets européens PAROLE et SIMPLE. Les informations fournies, particulièrement utiles pour des applications de TLH, sont très structurées, granulaires et innovatrices, avec entre autres au niveau sémantique la Extended Qualia Structure, basée sur la théorie du Lexique Génératif, et la représentation prédicative. La description d'une unité lexicale est un continuum à travers les différents niveaux d'information. Les propriétés phonologiques, morphologiques et syntaxiques d'un lemme, ainsi que son/ses schéma(s) d'arguments sont décrits. Au niveau sémantique, chaque lexème/sens est associé à un vaste ensemble structuré d'informations, parmi lesquelles son type ontologique et l'expression - au moyen des relations qualia - des différentes facettes de sa sémantique. La représentation prédicative décrit, quant à elle, le scénario sémantique (dans lequel le mot s'insère) et ses participants auxquels sont attribués rôle thématique et contraintes sémantiques. La relation des niveaux syntaxique et sémantique est assurée par des liens permettant de projeter les structures argumentales sur leur(s) réalisation(s) syntaxique(s). Une telle richesse d'information, et en particulier celle fournie par la Extended Qualia, permet notamment 1) de constituer des réseaux sémantiques, en formulant une requête sur l'ensemble des relations qualia contenant un mot-clé ; 2) d'extraire des noyaux de vocabulaire de domaines spécifiques, en alternant requêtes sur qualia et sens ; 3) d'acquérir des collocations lexicales, en exploitant les liens syntagmatiques évènements/entités exprimés par les qualia ; 4) de désambiguïser la contribution sémantique du modificateur dans certains groupes nominaux complexes, en analysant la structure qualia de la tête.}, PAGES = {1-10}, URL = {https://publications.cnr.it/doc/84563}, ISBN = {80-86732-21-5}, CONFERENCE_NAME = {CIL XVII International Congress of Linguists}, CONFERENCE_PLACE = {Prague}, CONFERENCE_DATE = {24-29 Luglio 2003}, } @INPROCEEDINGS{SORIA_2003_INPROCEEDINGS_SBC_84564, AUTHOR = {Soria, C. and Bertagna, F. and Calzolari, N.}, TITLE = {ItalWordNet in an annotation task: a chance for discussion}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/84564}, CONFERENCE_NAME = {First International WordNet Conference, Global WordNet Association}, CONFERENCE_PLACE = {Mysore}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_C_112894, AUTHOR = {Calzolari, N.}, TITLE = {Tecnologie della lingua}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/112894}, CONFERENCE_NAME = {Tecnologie della Lingua: strategie, politica e mercato}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{CALZOLARI_2003_INPROCEEDINGS_C_112895, AUTHOR = {Calzolari, N.}, TITLE = {Verso una nuova generazione di risorse lessicali e la cooperazione internazionale}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/112895}, CONFERENCE_NAME = {Actas del Tercero Seminario de la Escuela Interlatina de Altos Estudios en Lingüística Aplicada}, CONFERENCE_PLACE = {San Millán de la Cogolla}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{PICCHI_2003_INPROCEEDINGS_P_112896, AUTHOR = {Picchi, E.}, TITLE = {Lessicografia multilingue e trattamento dei testi}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/112896}, CONFERENCE_NAME = {Actas del Tercero Seminario de la Escuela Interlatina de Altos Estudios en Lingüística Aplicada}, CONFERENCE_PLACE = {San Millán de la Cogolla}, CONFERENCE_DATE = {2003}, } @INPROCEEDINGS{PIRRELLI_2003_INPROCEEDINGS_PL_112893, AUTHOR = {Pirrelli, V. and Lenci, A.}, TITLE = {Modelli computazionali dell'apprendimento del linguaggio}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/112893}, CONFERENCE_NAME = {XI Congresso della Società Italiana di Psicofisiologia}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2003}, } @TECHREPORT{ALLEGRINI_2003_TECHREPORT_ACMMHIGCDP_157348, AUTHOR = {Allegrini, P. and Calzolari, N. and Marchi, S. and Montemagni, S. and Hepple, M. and Ireson, N. and Gomez Hidalgo, J. M. and Carrero Garcia, F. and De Buenaga Rodriguez, M. and Puera Sanz, E.}, TITLE = {POESIA Lexical Resources and Tools for Each Language}, YEAR = {2003}, ABSTRACT = {The aim of this report is to review the various resources that the different language processing sites expect to use in the development of their language-specific text filtering components. Some of the required resources are ones that were developed before Poesia, possibly by one the Poesia partners, or possibly elsewhere but being now in the public domain. Such resources may require adaptation to the Poesia task. Other resources required for Poesia will be developed as part of the project. In some cases, this development has already been done or is in progress, whilst in others, it is yet to be undertaken. In what follows, the status of each of the resources described will be made clear in terms of these alternatives.}, KEYWORDS = {Lexical Resources, nlp}, PAGES = {30}, URL = {https://publications.cnr.it/doc/157348}, } @TECHREPORT{AMOROSO_2003_TECHREPORT_ACFFIMMPS_157312, AUTHOR = {Amoroso, Y. and Cammelli, A. and Fameli, E. and Fameli, M. and Inghirami, B. and Mariani, P. and Marinai, M. and Parenti, L. and Sassi, M.}, TITLE = {Diritto alla vita e Diritto all'ambiente nel lessico costituzionale e nella dottrina giuridica. Strumenti e metodi per l'analisi linguistico-concettuale}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157312}, } @TECHREPORT{BARONI_2003_TECHREPORT_BCF_157346, AUTHOR = {Baroni, P. and Calzolari, N. and Fiorentini, G.}, TITLE = {Reources Roadmatp (1st release)}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157346}, } @TECHREPORT{BARONI_2003_TECHREPORT_BCF_317098, AUTHOR = {Baroni, P. and Calzolari, N. and Fiorentini, G.}, TITLE = {Resources Roadmap (First Release)}, YEAR = {2003}, ABSTRACT = {ELSNET-4 Deliverable D6.1}, KEYWORDS = {Language Resources, Roadmaps}, URL = {https://publications.cnr.it/doc/317098}, } @TECHREPORT{BARONI_2003_TECHREPORT_BCFLM_157347, AUTHOR = {Baroni, P. and Calzolari, N. and Fiorentini, G. and Lenci, A. and Monachini, M.}, TITLE = {Resources Landscape Map (1st release)}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157347}, } @TECHREPORT{BARONI_2003_TECHREPORT_BCFLM_317185, AUTHOR = {Baroni, P. and Calzolari, N. and Fiorentini, G. and Lenci, A. and Monachini, M.}, TITLE = {Resources Landscape (First Release)}, YEAR = {2003}, ABSTRACT = {ELSNET-4 Deliverable D6.2}, KEYWORDS = {Language Resources, Landscapes}, URL = {https://publications.cnr.it/doc/317185}, } @TECHREPORT{BARONI_2003_TECHREPORT_BCL_157313, AUTHOR = {Baroni, P. and Calzolari, N. and Lenci, A.}, TITLE = {Extended Configuration of the Network and Final Report}, YEAR = {2003}, ABSTRACT = {ENABLER Deliverable D1.2}, KEYWORDS = {Language Resources, Networks}, PAGES = {21}, URL = {https://publications.cnr.it/doc/157313}, } @TECHREPORT{BERTAGNA_2003_TECHREPORT_BCLM_157314, AUTHOR = {Bertagna, F. and Calzolari, N. and Lenci, A. and Monachini, M.}, TITLE = {Report on the Feasibility and the Organisational Requirements for the Construction of Multilingual LRs}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157314}, } @TECHREPORT{BIANCO_2003_TECHREPORT_BGMU_157335, AUTHOR = {Bianco, I. and Guazzini, E. and Molino, S. and Ulivieri, M.}, TITLE = {Some Aspects of the Clips Semantic Layer: Uses and Advantages}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157335}, } @TECHREPORT{BIRD_2003_TECHREPORT_BHNRCA_157315, AUTHOR = {Bird, S. and Hovy, E. and Nerbonne, J. and Rosner, M. and Calzolari, N. and Arnold, D.}, TITLE = {ACL Anthology-A Digital Archive of Research Papers in Computational Linguistics}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157315}, } @TECHREPORT{CALZOLARI_2003_TECHREPORT_CBLM_157316, AUTHOR = {Calzolari, N. and Bertagna, F. and Lenci, A. and Monachini, M.}, TITLE = {Standards and Best Practice for Multilingual Computational Lexicons-MILE (the Multilingual ISLE Lexical Entry)}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157316}, } @TECHREPORT{CALZOLARI_2003_TECHREPORT_CBLM_157337, AUTHOR = {Calzolari, N. and Bertagna, F. and Lenci, A. and Monachini, M.}, TITLE = {MILE Users? Evaluation and Feedback}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157337}, } @TECHREPORT{CALZOLARI_2003_TECHREPORT_CGP_157339, AUTHOR = {Calzolari, N. and Grishman, R. and Palmer, M.}, TITLE = {Introduction to the CLWG Guidelines}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157339}, } @TECHREPORT{CALZOLARI_2003_TECHREPORT_CMPZ_157344, AUTHOR = {Calzolari, N. and McNaught, J. and Palmer, M. and Zampolli, A.}, TITLE = {ISLE Final Report}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157344}, } @TECHREPORT{CALZOLARI_2003_TECHREPORT_CW_157317, AUTHOR = {Calzolari, N. and Wittenburg, P.}, TITLE = {Report of the ISO Preparation Workhop on Lexicons}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157317}, } @TECHREPORT{CAMMELLI_2003_TECHREPORT_CS_172092, AUTHOR = {Cammelli, A. and Sassi, M.}, TITLE = {Strumenti e metodi per uno studio lessicale della Costituzione della Repubblica Bolivariana del Venezuela}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/172092}, } @TECHREPORT{CECCOTTI_2003_TECHREPORT_CS_157356, AUTHOR = {Ceccotti, M. L. and Sassi, M.}, TITLE = {Iterazioni gaddiane}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157356}, } @TECHREPORT{CECCOTTI_2003_TECHREPORT_CS_157357, AUTHOR = {Ceccotti, M. L. and Sassi, M.}, TITLE = {Annotazioni su composti in-cola. Da Dante a Gadda}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157357}, } @TECHREPORT{CECCOTTI_2003_TECHREPORT_CS_157358, AUTHOR = {Ceccotti, M. L. and Sassi, M.}, TITLE = {Concordanze del Pasticciaccio}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157358}, } @TECHREPORT{ENEA_2003_TECHREPORT_E_157318, AUTHOR = {Enea, A.}, TITLE = {Webmail: un'interfaccia Web per la posta elettronica}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157318}, } @TECHREPORT{ENEA_2003_TECHREPORT_E_157319, AUTHOR = {Enea, A.}, TITLE = {Una soluzione AntiVirus e AntiSpam}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157319}, } @TECHREPORT{ENEA_2003_TECHREPORT_EF_157320, AUTHOR = {Enea, A. and Fiorentini, G.}, TITLE = {Il nuovo sito Internet dell'Istituto di Linguistica Computazionale}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157320}, } @TECHREPORT{ENEA_2003_TECHREPORT_EPOF_157340, AUTHOR = {Enea, A. and Pardelli, G. and Orsolini, P. and Fiorentini, G.}, TITLE = {Pubblicazioni ILC sul WEB}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157340}, } @TECHREPORT{GAVRILIDOU_2003_TECHREPORT_GDLCMS_157321, AUTHOR = {Gavrilidou, M. and Desipri, E. and Labropoulo, P. and Calzolari, N. and Monachini, M. and Soria, C.}, TITLE = {Technical Specifications for the Selection and Encoding of Multilingual Resources}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157321}, } @TECHREPORT{HEID_2003_TECHREPORT_HMPS_157341, AUTHOR = {Heid, U. and Maci, E. and Pirrelli, V. and Soria, C.}, TITLE = {NITE Interim Evaluation Report}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157341}, } @TECHREPORT{LENCI_2003_TECHREPORT_LCM_157345, AUTHOR = {Lenci, A. and Calzolari, N. and Monachini, M.}, TITLE = {Report on LR Related Activities to Be Promoted}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157345}, } @TECHREPORT{MARZI_2003_TECHREPORT_MPA_157342, AUTHOR = {Marzi, C. and Petrongolo, C. and Aprile, N.}, TITLE = {GLARS: Contratti dei servizi comuni di Area}, YEAR = {2003}, ABSTRACT = {Revisione dei contratti di appalto dei servizi di Mensa, vigilanza, e pulizie, per una ottimizzazione dei servizi e risparmio dei costi}, KEYWORDS = {contratti servizi Area della Ricerca Pisa}, URL = {https://publications.cnr.it/doc/157342}, } @TECHREPORT{MONACHINI_2003_TECHREPORT_MBCL_157322, AUTHOR = {Monachini, M. and Bertagna, F. and Calzolari, N. and Lenci, A.}, TITLE = {Improving Harmonisation between Resources: Divergence/Convergence between Specifications and de-facto Standards}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157322}, } @TECHREPORT{MONACHINI_2003_TECHREPORT_MBCUN_157323, AUTHOR = {Monachini, M. and Bertagna, F. and Calzolari, N. and Underwood, N. and Navarretta, C.}, TITLE = {Towards a Standard for the Creation of Lexica}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157323}, } @TECHREPORT{MONACHINI_2003_TECHREPORT_MS_157324, AUTHOR = {Monachini, M. and Soria, C.}, TITLE = {Testing Scenario and Quality Assessment Strategy}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157324}, } @TECHREPORT{QUOCHI_2003_TECHREPORT_QO_157349, AUTHOR = {Quochi, V. and Odjik, J.}, TITLE = {"Appendix F: Representing noun compounds and support verbs in MILE (PISA & XMELLT)"}, YEAR = {2003}, URL = {http://www.ilc.cnr.it/EAGLES96/isle/clwg_doc/ISLE_D2.2-D3.2.zip}, } @TECHREPORT{RUIMY_2003_TECHREPORT_RMC_157325, AUTHOR = {Ruimy, N. and Monachini, M. and Calzolari, N.}, TITLE = {Progetto CLIPS: Specifiche Linguistiche e Manuale di Codifica, Livello sintattico}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157325}, } @TECHREPORT{RUIMY_2003_TECHREPORT_RMC_157326, AUTHOR = {Ruimy, N. and Monachini, M. and Calzolari, N.}, TITLE = {Progetto CLIPS: Specifiche Linguistiche e Manuale di Codifica, Livello semantico}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157326}, } @TECHREPORT{SABA_2003_TECHREPORT_S_157327, AUTHOR = {Saba, A.}, TITLE = {Arte de la verdadera navegación en que se trata de la máchina del mundo" di Pedro de Siria, (ed. 1602, pp. 167)}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157327}, } @TECHREPORT{SABA_2003_TECHREPORT_S_157328, AUTHOR = {Saba, A.}, TITLE = {Lessico del testo "Hidrografía: la más curiosa que hasta aquí ha salido a luz [. ]" di Andrés de Poza, (ed. 1585, pp. 363). Trascrizione digitale: A. Saba}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157328}, } @TECHREPORT{SABA_2003_TECHREPORT_S_157329, AUTHOR = {Saba, A.}, TITLE = {Lessico del testo "Arte de navegar en que se contienen las reglas, declaraciones, secretos y avisos que a la navegación son necessarios [. ]" di Pedro de Medina, (ed. 1545, pp. 211). Trascrizione digitale: B. Periñán}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157329}, } @TECHREPORT{SABA_2003_TECHREPORT_S_157330, AUTHOR = {Saba, A.}, TITLE = {Lessico del testo "Arte de la verdadera navegación en que se trata de la máchina del mundo" di Pedro de Siria, (ed. 1602, pp. 167). Trascrizione digitale: A. Saba}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157330}, } @TECHREPORT{SABA_2003_TECHREPORT_S_157331, AUTHOR = {Saba, A.}, TITLE = {"Hidrografía: la más curiosa que hasta aquí ha salido a luz [. ]" di Andrés de Poza, (ed. 1585, pp. 363)}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157331}, } @TECHREPORT{SABA_2003_TECHREPORT_S_157332, AUTHOR = {Saba, A.}, TITLE = {"Arte de navegar en que se contienen las reglas, declaraciones, secretos y avisos que a la navegación son necessarios [. ]" di Pedro de Medina, (ed. 1545, pp. 211)}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157332}, } @TECHREPORT{SASSI_2003_TECHREPORT_S_157333, AUTHOR = {Sassi, M.}, TITLE = {La consultazione dei corpora costituzionali con DBT}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157333}, } @TECHREPORT{SASSI_2003_TECHREPORT_SA_157334, AUTHOR = {Sassi, M. and Amoroso, Y.}, TITLE = {Il corpus elettronico delle costituzioni iberoamericane}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157334}, } @TECHREPORT{SASSI_2003_TECHREPORT_SA_157352, AUTHOR = {Sassi, M. and Amoroso, Y.}, TITLE = {Il corpus elettronico delle costituzioni ibero-americane}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157352}, } @TECHREPORT{SASSI_2003_TECHREPORT_SC_157351, AUTHOR = {Sassi, M. and Ceccotti, M. L.}, TITLE = {Documentazione dell'attività di consulenza svolta sul database Gadda per studiosi in Italia e all'estero (2002-2003)}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157351}, } @TECHREPORT{SASSI_2003_TECHREPORT_SC_157359, AUTHOR = {Sassi, M. and Ceccotti, M. L.}, TITLE = {Concordanze della Cognizione del dolore}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157359}, } @TECHREPORT{SORIA_2003_TECHREPORT_S_157350, AUTHOR = {Soria, C.}, TITLE = {Advice and recommendations about AsAnAngel's linguistic components}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157350}, } @MISC{BERTAGNA_2003_MISC_BBKRT_157338, AUTHOR = {Bertagna, F. and Bouillon, P. and Kaplanidou, M. and Reeves, R. and Trecci, A.}, TITLE = {Creating MILEs: 100 lexical entries for 3 languages cross-linked}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157338}, } @MISC{CAPPELLI_2003_MISC_C_151525, AUTHOR = {Cappelli, G.}, TITLE = {Studio Morfosintattico del LSM (Linguaggio dei Segni Messicano)}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/151525}, } @MISC{CAPPELLI_2003_MISC_C_151526, AUTHOR = {Cappelli, G.}, TITLE = {"OLISIPPO: estrazione automatica di Vocabolario Basico del Latino"}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/151526}, } @MISC{CUTUGNO_2003_MISC_CMRR_157360, AUTHOR = {Cutugno, P. and Marconi, L. and Ratti, D. and Rolando, C.}, TITLE = {Il linguaggio burocratico e l'uso del computer}, YEAR = {2003}, KEYWORDS = {linguaggio, semplificazione, lessico}, URL = {https://publications.cnr.it/doc/157360}, } @MISC{ENEA_2003_MISC_E_220432, AUTHOR = {Enea, A.}, TITLE = {Gli ARCHIVI CARTACEI degli Istituti per la storia della Resistenza e della società contemporanea in Italia}, YEAR = {2003}, URL = {http://www.italia-resistenza.it/archivi-insmli-ricerca-semplice}, } @MISC{ENEA_2003_MISC_EPOF_157336, AUTHOR = {Enea, A. and Pardelli, G. and Orsolini, P. and Fiorentini, G.}, TITLE = {Banca dati delle Pubblicazioni ILC sul WEB}, YEAR = {2003}, ABSTRACT = {Il repertorio delle pubblicazioni scientifiche dell'Istituto di Linguistica Computazionale disponibile in Internet è strutturato per censire i prodotti dell'attività di ricerca: Libri e loro capitoli; Articoli su riviste scientifiche nazionali e internazionali; Articoli in atti di convegni nazionali e internazionali; Deliverable di progetti comunitari; Rapporti tecnici e CD-Rom. Da pochi anni si sono aggiunte anche le pubblicazioni fruibili da Internet, in particolare dalle riviste elettroniche. A questa nuova tipologia appartengono anche alcuni riferimenti bibliografici delle pubblicazioni ILC. I riferimenti bibliografici si riconducono al: 1) Trattamento Automatico della Lingua, comprendente: risorse linguistiche (corpora testuali, lessici, thesauri), reti concettuali, strumenti di analisi linguistica del testo, strumenti di estrazione e rappresentazione del contenuto testuale; 2) Analisi filologica, letteraria e linguistica del testo: sistemi di gestione di basi di dati testuali, lessicali e dialettali, strumenti multimediali per lo studio della tradizione del testo, strumenti per la documentazione, conservazione e fruizione del patrimonio linguistico-culturale; 3) Applicazioni di varia natura: Traduzione automatica; Strumenti multimediali per la didattica e la disabilità; Tecnologie per il filtraggio di contenuti internet a tutela dei minori; Strumenti multilingue; Generazione automatica; Ontologia; Web Semantico. La descrizione delle scelte tecnologiche e del software utilizzato è descritta nella pubblicazione allegata.}, KEYWORDS = {Bibliografia, Linguistica Computazionale}, URL = {http://www.ilc.cnr.it/viewpage.php/sez=ricerca/id=58/vers=ita}, } @MISC{MARCONI_2003_MISC_M_151524, AUTHOR = {Marconi, L.}, TITLE = {Thesaurus elettronico Italiano-Spagnolo: Strumento per l'insegnamento e la traduzione automatica assistita nelle due lingue}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/151524}, } @MISC{MARZI_2003_MISC_M_157343, AUTHOR = {Marzi, C.}, TITLE = {FONDI STRUTTURALI: linee direttrici}, YEAR = {2003}, ABSTRACT = {Scopo del presente documento è la diffusione delle indicazioni ed istruzione ricevute in occasione del corso di formazione sulla "Preparazione, gestione e rendicontazione dei progetti di ricerca e sviluppo nell'ambito dei finanziamenti nazionali alla ricerca industriale (D. lgs. 297/99)"1. L'idea del corso nasce dall'importanza crescente annessa al ruolo delle Istituzioni Scientifiche per la crescita della competitività delle imprese, il che investe il CNR di nuovi compiti per lo sviluppo economico dell'Italia. In seguito alla riforma del sistema di incentivi alla ricerca industriale attuata dal MURST (Ministero dell'Università e della Ricerca Scientifica e Tecnologica - oggi MIUR) con l'emanazione di due provvedimenti che hanno riunito la precedente normativa - il Decreto Legislativo n. 297 del 27 luglio 1999 "Riordino della disciplina e snellimento delle procedure per il sostegno della ricerca scientifica e tecnologica, per la diffusione delle tecnologie, per la mobilità dei ricercatori" ed il Decreto Ministeriale n. 593 dell'8 agosto 2000 "Modalità procedurali per la concessione delle agevolazioni previste dal decreto legislativo 297/99" - il CNR, così come le Università e gli altri Enti Pubblici di Ricerca, trovano l'opportunità di collaborazione con le imprese e la possibilità di fruire dei finanziamenti agevolati per la ricerca nelle seguenti forme: presentazione di progetti di ricerca co-intestati tra imprese ed istituti di ricerca; partecipazione a Società e Consorzi per lo sviluppo di attività imprenditoriali; partecipazione ad attività di formazione di ricercatori e tecnici delle imprese.}, KEYWORDS = {gestione rendicontazione progetti ricerca e sviluppo}, URL = {https://publications.cnr.it/doc/157343}, } @MISC{SASSI_2003_MISC_S_157354, AUTHOR = {Sassi, M.}, TITLE = {Costituzione della Repubblica Bolivariana del Venezuela}, YEAR = {2003}, URL = {https://publications.cnr.it/doc/157354}, } @ARTICLE{ALLEGRINI_2002_ARTICLE_ABBIGY_168728, AUTHOR = {Allegrini, P. and Bellazzini, J. and Bramanti, G. and Ignaccolo, M. and Grigolini, P. and Yang, J.}, TITLE = {Scaling breakdown: A Signature of aging}, YEAR = {2002}, ABSTRACT = {We prove that the Lévy walk is characterized by bilinear scaling. This effect mirrors the existence of a form of aging that does not require the adoption of nonstationary conditions.}, PAGES = {1-4}, URL = {https://publications.cnr.it/doc/168728}, VOLUME = {66}, } @ARTICLE{ALLEGRINI_2002_ARTICLE_AGHPR_30857, AUTHOR = {Allegrini, P. and Grigolini, P. and Hamilton, P. and Palatella, L. and Raffaelli, G.}, TITLE = {Memory beyond memory in heart beating, a sign of a healtly physiological conditions}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/30857}, VOLUME = {65}, } @ARTICLE{BIORCI_2002_ARTICLE_BMRR_70703, AUTHOR = {Biorci, G. and Marconi, L. and Ratti, D. and Rolando, C.}, TITLE = {La "composante animale" dans les expressions figées italiennes}, YEAR = {2002}, ABSTRACT = {In questo lavoro sono state raccolte e esaminate le frasi fatte, i modi di dire italiani che hanno un animale come fulcro caratterizzante della sequenza linguistica. I dati sono stati estratti da diversi sottoinsiemi linguistici automatizzati e sono stati aggiornati attraverso l’inserimento di polirematiche, di modi di dire, di proverbi e di locuzioni di uso frequente nella lingua italiana. Si sono studiate alcune metodologie e alcune regole morfo-sintattiche per la classificazione verificate successivamente nella pratica. Si sono raffrontati, inoltre, i risultati ottenuti con i dati linguistici del corpus di riferimento dell’italiano. Le espressioni idiomatiche qui raccolte, circa 600, sono state suddivise per tipologia formale. Ne sono stati esaminati nel dettaglio la morfologia e la sintassi. Esse si presentano nel periodo come nucleo indivisibile ma non cristallizzato: i verbi possono essere coniugati, alcuni elementi della polirematica possono essere sostituiti mantenendo intatti il significato e la pregnanza del modo di dire nel contesto.}, KEYWORDS = {fraseologia idiomatica}, PAGES = {141-186}, URL = {https://publications.cnr.it/doc/70703}, VOLUME = {81}, PUBLISHER = {Didier-Larousse; [poi] Honoré Champion, 2001-2004; [poi] Garnier (Paris, Francia)}, ISSN = {0007-9871}, JOURNAL = {Cahiers de lexicologie}, } @ARTICLE{BOZZI_2002_ARTICLE_BC_64456, AUTHOR = {Bozzi, A. and Corradini, M. S.}, TITLE = {New Trends in Philology: a Computational Application for Textual Criticism}, YEAR = {2002}, PAGES = {267-285}, URL = {https://publications.cnr.it/doc/64456}, VOLUME = {XXX}, } @ARTICLE{CALZOLARI_2002_ARTICLE_C_64455, AUTHOR = {Calzolari, N.}, TITLE = {Written Language Resources at LREC in Las Palmas: Closing Session}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/64455}, } @ARTICLE{CALZOLARI_2002_ARTICLE_CSBB_64500, AUTHOR = {Calzolari, N. and Soria, C. and Bertagna, F. and Barsotti, F.}, TITLE = {Evaluating Lexical Resources Using Senseval}, YEAR = {2002}, ABSTRACT = {The aim of our paper is twofold: to introduce some general reflections on the task of lexical semantic annotation and the adequacy of existing lexical-semantic reference resources, while giving an overall description of the Italian lexical sample task for the SENSEVAL-2 experiment. We suggest how the SENSEVAL exercise (and comparison between the two editions of the experiment) can be employed to evaluate the lexical reference resources used for annotation. We conclude with a few general remarks on the gap between the lexicon, a partially decontextualised object, and the corpus, where context plays a significant role.}, KEYWORDS = {Risorse Lessicali, Disambiguazione, Semantica, Annotazione, Wordnet}, PAGES = {375-390}, URL = {https://publications.cnr.it/doc/64500}, VOLUME = {8}, DOI = {10.1017/S1351324902003017}, PUBLISHER = {Cambridge University Press (Cambridge, Regno Unito)}, ISSN = {1469-8110}, JOURNAL = {Natural language engineering (Online)}, } @ARTICLE{CALZOLARI_2002_ARTICLE_CZL_30862, AUTHOR = {Calzolari, N. and Zampolli, A. and Lenci, A.}, TITLE = {Towards a Standard for a Multilingual Lexical Entry: The EAGLES/ISLE Initiative}, YEAR = {2002}, ABSTRACT = {ISLE, a transatlantic standard oriented initiative supported by EC and NSF under the Human Language Technology (HLT) programme, is a continuation of the long standing EAGLES initiative. The objective is to develop widely agreed and urgently demanded standards and guidelines for infrastructural language resources, tools, and HLT products. ISLE targets the areas of multilingual computational lexicons, natural interaction and multimodality, and evaluation. We describe the preliminary guidelines of a standard framework for multilingual computational lexicons, based on a general schema for the "Multilingual ISLE Lexical Entry" (MILE). The needs and features of existing Machine Translation systems provide the main reference points for the process of consensual definition of the MILE. We also provide a brief description of the EU SIMPLE semantic lexicons, built on the basis of previous EAGLES recommendations and now enlarged to real-size lexicons within national projects, thus creating a large infrastructural platform of harmonised lexicons in Europe. EAGLES previous results have already become de facto widely adopted standards, and EAGLES itself is a well-known trademark and point of reference for HLT projects and products.}, KEYWORDS = {Lessici multilingui, standard internazion, Semantic Web, Modello lessicale, Standard lessicali}, PAGES = {264-279}, URL = {https://publications.cnr.it/doc/30862}, VOLUME = {2276}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{CIGNONI_2002_ARTICLE_CCM_64501, AUTHOR = {Cignoni, L. and Coffey, S. and Moon, R.}, TITLE = {Idiom Variation in Italian and English: Two Corpus-based Studies}, YEAR = {2002}, KEYWORDS = {Idioms, Variability, Corpora}, PAGES = {279-300}, URL = {https://publications.cnr.it/doc/64501}, VOLUME = {2}, } @ARTICLE{GIORGETTI_2002_ARTICLE_GPS_30861, AUTHOR = {Giorgetti, D. and Prodanof, I. and Sebastiani, F.}, TITLE = {Mapping an Automated Survey Coding Task into a Probabilistic Text categorization Framework}, YEAR = {2002}, ABSTRACT = {This paper describes how to apply a probabilistic Text Categorization method to a different and new domain where documents are answers to open end questionnaires and codes viewed as categories consist of a hierarchical model. A reduced size training set may be used taking advantage of the hierarchical organization of categories. The system developed in this framework aims at helping psychologists in the evaluation of open end surveys inquiring about job candidates' competencies.}, KEYWORDS = {Codifica interviste, Apprendimento, Estrazione info, Machine Learning}, PAGES = {115-124}, URL = {https://publications.cnr.it/doc/30861}, VOLUME = {2389}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{TURRINI_2002_ARTICLE_TCP_30860, AUTHOR = {Turrini, G. and Cignoni, L. and Paccosi, A.}, TITLE = {Addizionario: words in your pocket}, YEAR = {2002}, ABSTRACT = {This paper describes Addizionario, a multilingual hypermedia laboratory for native language learning, addressed to children of nursery and primary schools. The features of the tool, which is child-centred, flexible, easy-to-use and interactive, make it suitable not only for normally skilled users, but also for those presenting more or less severe learning difficulties linked to both physical and cognitive impairment. The software, patented by the Italian National Research Council (C.N.R.), is used in numerous schools in Italy and abroad.}, KEYWORDS = {Bisogni speciali, Apprendimento, Lingua, Multimedialità, E-learning}, PAGES = {136-139}, URL = {https://publications.cnr.it/doc/30860}, VOLUME = {2398}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @BOOK{ALLEGRINI_2002_BOOK_AGW_136410, AUTHOR = {Allegrini, P. and Grigolini, P. and West, B. J.}, TITLE = {Formal Tools for Exploring Complexity}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/136410}, PUBLISHER = {EOLSS Publishers (Oxford, GBR)}, } @BOOK{CALZOLARI_2002_BOOK_C_136411, AUTHOR = {Calzolari, N.}, TITLE = {Computational Lexicons and Corpus Analysis: Between Theory and Practice}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/136411}, } @BOOK{CALZOLARI_2002_BOOK_C_136415, AUTHOR = {Calzolari, N.}, TITLE = {Computational Lexicons: Towards a New Paradigm of an Open Lexical Infrastructure?}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/136415}, } @BOOK{CALZOLARI_2002_BOOK_C_136416, AUTHOR = {Calzolari, N.}, TITLE = {Language Resources in a Multilingual Setting: The European Perspective}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/136416}, } @BOOK{LENCI_2002_BOOK_LD_136413, AUTHOR = {Lenci, A. and Di Tomaso, V.}, TITLE = {Introduction}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/136413}, PUBLISHER = {Edizioni dell'Orso (Alessandria, ITA)}, } @BOOK{LENCI_2002_BOOK_LD_136414, AUTHOR = {Lenci, A. and Di Tomaso, V.}, TITLE = {Exploring the Lexicon: Theory and Computation}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/136414}, PUBLISHER = {Edizioni dell'Orso (Alessandria, ITA)}, } @BOOK{PARDELLI_2002_BOOK_POSEG_231725, AUTHOR = {Pardelli, G. and Orsolini, P. and Sassi, M. and Enea, A. and Gazzetti, S.}, TITLE = {TAL Bibliography (1951-2002). Parte I}, YEAR = {2002}, ABSTRACT = {Il presente catalogo contiene molte delle bibliografie del Trattamento Automatico della Lingua TAL a partire dal secondo dopoguerra ad oggi e diverse bibliografie di opere di linguistica generale variamente collegate alla linguistica Computazionale CL, in varie formulazioni: glossematica, grammatica trasformazionale-generativa, fonetica, stilistica linguistica, psicolinguistica, sociolinguistica, didattica delle lingue, filosofia del linguaggio, storia della lingua, funzionalismo praghese, prosodismo inglese, ecc.. Sono compresi, inoltre, alcuni riferimenti alla documentazione di alcuni linguaggi di programmazione evoluti adatti alla elaborazione di dati linguistici. La tipologia delle opere presenti nel seguente catalogo sinteticamente può ricondursi a: - Le prime testimonianze del trattamento automatico del linguaggio: Busa (1951); - Atti di Congressi e Conferenze di varie Associazioni Internazionali (ACL, ALLC, COLING, TAL, ACLA, AILA, ecc.), tra i principali possiamo citare quelli dei congressi di CL tenuti a Yorktown Heights (IBM-64), a Grenoble (CITAL-67), a Praga (1968) e a Bergen (COLING-78); - Opere generali o introduttive delle applicazioni del calcolatore alle ricerche umanistiche e letterarie. Un esempio è dato dalla pubblicazione di F. De Tollenaere: i lavori presentati in questo survey del 1962 si riferiscono a ricerche nel settore Humanities di vari paesi: Stati Uniti, Inghilterra, Francia, Italia, Belgio, Olanda, Unione Sovietica, Cecoslovacchia, ecc. In questo gruppo, per la lessicografia, ricordiamo i due colloqui di Praga (1967) e di Pisa (1970)0; - Opere sull'uso di modelli matematici nella linguistica (Garvin, Maegaard). Gli argomenti trattati in queste opere nella maggior parte dei casi fanno riferimento a: 1. Valutazione statistica: compilazione di liste, dizionari, indici e di ricerche statistiche in genere (livello distribuzionale); 2. Elaborazione algoritmica dei sistemi sintattici e di vari modelli di acquisizione del linguaggio (livello sintattico); 3. Elaborazione automatica del contenuto del linguaggio (livello semantico); 4. Traduzione automatica (per le varie lingue); 5. Lessicografia (classica, romanza, slava, germanica, biblica, concordanze, indici e studi dialettologici, ecc.). E' stato possibile individuare le tematiche principali e gli argomenti più ricorrenti della soggettazione attraverso l'analisi di circa 5000 documenti nel settore del TAL. Mantenere una terminologia comune (normalizzazione) della soggettazione non è stato sempre possibile. L'interdisciplinarietà, sempre più praticata dopo gli anni '50, dovuta all'incontro dei metodi della linguistica con altre discipline e la terminologia dei primi anni in questo settore di indagine, ci avrebbero condotto alla dispersione dei soggetti, che sono stati rivisti in funzione di una maggiore omogeinità. I testi che studiano il linguaggio e i sistemi di automazione nelle ricerche e nelle analisi linguistiche sono stati descritti nel database principalmente in inglese e occasionalmente in francese per casi particolari (v. nell'indice dei Soggetti: Traduction Mécanique /Traduction Automatique) per rispettarne le prime testimonianze. Abbiamo preferito mantenere descrittori simili per evidenziare l'evoluzione della terminologia usata nei testi nel corso degli anni (v.: Automatic ... /Automated ...). Abbiamo evitato, per quanto possibile, l'uso di termini di eccessivo tecnicismo: ciò per rendere più agevole la ricerca attraverso Internet da parte di un'utenza non sempre specialistica. Il lavoro di soggettazione, analisi e selezione dei documenti per la costruzione di questo archivio è stato svolto da Gabriella Pardelli. Il software CDS-ISIS dell'Unesco è stato utilizzato per gli standard catalografici e, allo scopo, è stato attivato un server per l'accesso simultaneo al database. Il supporto informatico, anche per ciò che concerne il rilevamento dei dati per le indagini statistiche come il recupero automatico dei tag di sottocampo dei record bibliografici (lingua, paese, argomento, ecc.) è stato fornito interamente da Alessandro Enea. Paola Orsolini e Silvia Gazzetti si sono occupate del lavoro di catalogazione. L'elaborazione del catalogo generato dal database ISIS è stata effettuata da Manuela Sassi.}, KEYWORDS = {Bibliografia, Linguistica Computazionale}, PAGES = {1-187}, URL = {http://www.biblos.cnr.it/04_2_TALb.html}, PUBLISHER = {S. T. A. R. Servizio Tecnografico Area Ricerca CNR (Pisa, ITA)}, } @BOOK{PIRRELLI_2002_BOOK_P_136412, AUTHOR = {Pirrelli, V.}, TITLE = {Per un superamento della dicotomia Lessico-Grammatica. Aspetti di composizionalità "debole" del linguaggio}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/136412}, } @INPROCEEDINGS{GIORGETTI_2002_INPROCEEDINGS_GPS_91537, AUTHOR = {Giorgetti, D. and Prodanof, I. and Sebastiani, F.}, TITLE = {Automated Coding of Open-ended Surveys: Technical and Ethical Issues}, YEAR = {2002}, ABSTRACT = {This paper presents some technical and ethical issues arising from the use of automated methods to solve a typical social science problem: the coding of surveys including answers to open-ended questions. Coding an open-ended survey, which may include thousands of interviews, means to assign symbolic predefined labels to its answers according to their meaning. The increasing amount of information available from surveys carried out also on the Web, makes it viable the use of (semi)automated systems both to reduce time and human resources cost to analyze and manage it, and to produce results independent from coders' subjective impressions, but on the other hand poses both technical and ethical challenges to be carefully evaluated before being adopted}, KEYWORDS = {Text categorization}, PAGES = {9}, URL = {https://publications.cnr.it/doc/91537}, CONFERENCE_NAME = {International Conference on Universal Knowledge and Language. ICUKL-2002}, CONFERENCE_PLACE = {Goa, India}, CONFERENCE_DATE = {25-29 November 2002}, } @INPROCEEDINGS{GIORGETTI_2002_INPROCEEDINGS_GPS_91568, AUTHOR = {Giorgetti, D. and Prodanof, I. and Sebastiani, F.}, TITLE = {Mapping an automated survey coding task into a probabilistic text categorization framework}, YEAR = {2002}, ABSTRACT = {This paper describes how to applya probabilistic Text Categorization method to a different and new domain where documents are answers to open end questionnaires and codes viewed as categories consist of a hierarchical model. A reduced size training set mayb e used taking advantage of the hierarchical organization of categories. The system developed in this framework aims at helping psychologists in the evaluation of open end surveys inquiring about job candidates' competencies.}, KEYWORDS = {Text categorization}, PAGES = {115-124}, URL = {https://publications.cnr.it/doc/91568}, VOLUME = {2389}, CONFERENCE_NAME = {Advances in Natural Language Processing-Third International Conference-PorTAL 2002}, CONFERENCE_PLACE = {Faro, Portugal}, CONFERENCE_DATE = {23-26 June 2002}, } @INPROCEEDINGS{PICCHI_2002_INPROCEEDINGS_PSNCV_288585, AUTHOR = {Picchi, E. and Sassolini, E. and Nahli, O. and Cucurullo, S. and Vargas, I. M.}, TITLE = {Italian Arabic Linguistic Tools}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/288585}, VOLUME = {Volume II}, CONFERENCE_NAME = {LREC 2002}, CONFERENCE_PLACE = {Las Palmas de Gran Canaria, Spain}, CONFERENCE_DATE = {30th \& 31 May 2002}, BOOKTITLE = {Third International Conference on Language Resources and Evaluation}, } @TECHREPORT{SASSOLINI_2002_TECHREPORT_SN_288591, AUTHOR = {Sassolini, E. and Nahli, O.}, TITLE = {Motore morfologico della lingua araba}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/288591}, } @TECHREPORT{STARYNKEVITCH_2002_TECHREPORT_SDTZHIGACMMG_430635, AUTHOR = {Starynkevitch, B. and Daoudi, M. and Tombelle, C. and Zheng, H. and Hepple, M. and Ireson, N. and Gomez Hildago, J. and Allegrini, P. and Calzolari, N. and Marchi, S. and Montemagni, S. and Guerra, S.}, TITLE = {POESIA Software Architecture Definition Document}, YEAR = {2002}, ABSTRACT = {Software Architecture Definition Document}, KEYWORDS = {NLP, Software Engineering}, PAGES = {68-80}, URL = {https://publications.cnr.it/doc/430635}, } @MISC{CECCOTTI_2002_MISC_CPS_242344, AUTHOR = {Ceccotti, M. L. and Pardelli, G. and Sassi, M.}, TITLE = {Per un'analisi del lessico linguistico-computazionale: da Weaver all'ALPAC Report}, YEAR = {2002}, ABSTRACT = {In questo report proponiamo una breve presentazione della 'preistoria' della linguistica computazionale, del periodo compreso tra la pubblicazione del memorandum di Warren Weaver del 1949 e del Report dell'Alpac del 1966. La preistoria della CL vuol dire essenzialmente traduzione automatica, dizionari di macchina, etc., attività di ricerca svolte da matematici, fisici. L'inglese è la lingua veicolo di tutto ciò e continuerà ad esserlo ancora per anni prima che anche in Europa - eccezione è l'Inghilterra - e nel resto del mondo siano tentate traduzioni, adattamenti, proposte, che guidano i primi passi della ricerca in questi ambiti.}, KEYWORDS = {Lessico linguistico-computazionale}, URL = {https://publications.cnr.it/doc/242344}, } @ARTICLE{CALZOLARI_2001_ARTICLE_CCZ_30863, AUTHOR = {Calzolari, N. and Corazzari, O. and Zampolli, A.}, TITLE = {Lexical-Semantic Tagging of an Italian Corpus}, YEAR = {2001}, ABSTRACT = {Semantically tagged corpora are becoming an urgent need for training and evaluation within many applications. They are also the natural accompaniment of semantic lexicons, for which they constitute both a useful testbed to evaluate their adequacy and a repository of corpus examples for the attested senses. It is essential that sound criteria are defined for their construction and a specific methodology is set up for the treatment of various semantic phenomena. We present some observations and results concerning the lexical-semantic tagging of an Italian corpus within the framework of two projects: the ELSNET feasibility study, part of a preparatory phase started with Senseval/Romanseval, and an Italian National Project (TAL), where one of the components is the lexical-semantic annotation of larger quantities of texts for an Italian syntactic-semantic Treebank. The results of the ELSNET experiment have been of utmost importance for the definition of the technical guidelines for the lexical-semantic level of annotation of the Treebank.}, KEYWORDS = {Annotazione semantic, Corpus annotato, Treebank, Lessico semantico, Semantica}, PAGES = {291-304}, URL = {https://publications.cnr.it/doc/30863}, VOLUME = {2004}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, JOURNAL = {Lecture notes in computer science}, } @ARTICLE{PICCHI_2001_ARTICLE_PMB_64487, AUTHOR = {Picchi, E. and Montemagni, S. and Biagini, L.}, TITLE = {DBT-ALT: a System for Storying and Querying the Data of the Atlante Lessicale Toscano (ALT)}, YEAR = {2001}, KEYWORDS = {Atlanti linguistici, Dialettologia comput, Lessicografia dialet, Geolinguistica, Sociolinguistica}, PAGES = {85-103}, URL = {https://publications.cnr.it/doc/64487}, VOLUME = {9}, } @BOOK{PICCHI_2001_BOOK_PS_136429, AUTHOR = {Picchi, E. and Stoppelli, P.}, TITLE = {LIZ 4. 0 Letteratura Italiana Zanichelli}, YEAR = {2001}, KEYWORDS = {Analisi testuale, Editoria Elettronica, Letteratura Italiana, Digital Library}, URL = {https://publications.cnr.it/doc/136429}, PUBLISHER = {Zanichelli (Bologna, ITA)}, } @BOOK{SABA_2001_BOOK_S_136428, AUTHOR = {Saba, A.}, TITLE = {El léxico del Breve Compendio de la sphera y de la arte de navegar de Martín Cortés}, YEAR = {2001}, KEYWORDS = {Lessicografia, Morfosintassi, Lessico nautico, Text processing}, URL = {https://publications.cnr.it/doc/136428}, } @INCOLLECTION{BUSA_2001_INCOLLECTION_BCLP_136430, AUTHOR = {Busa, F. and Calzolari, N. and Lenci, A. and Pustejovsky, J.}, TITLE = {Building a Semantic Lexicon: Structuring and Generating Concepts}, YEAR = {2001}, KEYWORDS = {Semantica Lessicale, Lessico Generativo, Lessici computaziona, Ontologie, Conoscenza semantica}, URL = {https://publications.cnr.it/doc/136430}, } @INCOLLECTION{RUIMY_2001_INCOLLECTION_RGM_136426, AUTHOR = {Ruimy, N. and Gola, E. and Monachini, M.}, TITLE = {Lexicography Informs Lexical Semantics: the SIMPLE Experience}, YEAR = {2001}, ABSTRACT = {Gli autori presentano un approccio innovativo alla costruzione di un lessico semantico che coniuga teoria linguistica e pratica lessicografica, dimostrando che la visione della lessicografia quale disciplina ortogonale alla linguistica teorica è ingannevole. L’'articolo valuta l’'adeguatezza del Lessico Generativo, una teoria innovativa nel settore della semantica lessicale utilizzata per lo sviluppo dei lessici SIMPLE (coordinato da Pisa). Tali risorse computazionali su vasta scala, costruite sotto l’'egida della CE per 12 lingue europee, condividono modello teorico, formato di rappresentazione, un medesimo nucleo di entrate lessicali e sono divenute di fatto uno standard. Il potenziale dei ruoli qualia del Lessico Generativo viene esaminato attraverso la rappresentazione dei nomi astratti che, per loro intrinseca complessità, costituiscono un significativo banco di prova per ogni teoria semantica. I qualia forniscono le dimensioni semantiche lungo cui strutturare un’'ontologia dei nomi astratti e costruire parallelamente, per la loro descrizione, una ‘libreria’ di ‘templates’, ovvero nuclei strutturati di informazioni associate ad ogni tipo semantico. La metodologia ‘template-driven’ conferisce rilevanza e valore competitivo ai lessici così sviluppati, in quanto costituisce non solo un originale strumento di codifica ma anche l'’implementazione della teoria ed assicura, inoltre, coerenza sia all’'interno di un lessico che tra lessici di lingue diverse.}, KEYWORDS = {semantica lessicale, ontologia, templates, lessico elettronico, lessico generativo}, PAGES = {350-362}, URL = {https://publications.cnr.it/doc/136426}, PUBLISHER = {Cambridge University Press (Cambridge, GBR)}, ISBN = {0521780489}, BOOKTITLE = {The Language of Word Meaning}, EDITOR = {Bouillon, P. and Busa, F. and Bogouraev, B.}, } @PATENT{BOZZI_2001_PATENT_BEF_144813, AUTHOR = {Bozzi, A. and Eisinberg, A. and Fedele, G.}, TITLE = {Metodo e apparato per il riconoscimento automatico di caratteri}, YEAR = {2001}, KEYWORDS = {OCR, Reti Neurali, Biblioteche Digitali, Archivi Linguistici, Beni Librari}, URL = {https://publications.cnr.it/doc/144813}, } @TECHREPORT{PARDELLI_2001_TECHREPORT_PC_241747, AUTHOR = {Pardelli, G. and Cignoni, L.}, TITLE = {Entrate Lessicali per il trattamento automatico del linguaggio (TAL)}, YEAR = {2001}, KEYWORDS = {Strumenti informatici, didattiche disciplinari}, PAGES = {1-9}, URL = {https://publications.cnr.it/doc/241747}, } @ARTICLE{LANZA_2000_ARTICLE_LP_229315, AUTHOR = {Lanza, C. and Pardelli, G.}, TITLE = {Una soggettazione automatica di letteratura grigia con algoritmi di rete neurale artificiale. Due esperimenti ICAS e ILC}, YEAR = {2000}, ABSTRACT = {The aim of this work is to create an automatic subject classification of grey literature documents using an artificial neural network. In particular, a software simulator of neural network with back-propagation learning scheme was used; training of the network was carried out on around 300 documents. The prototype developed follows the steps which were performed during the learning, the processing and the network querying phase. The analysis of the final tests provides targets to be referred to the percentage of document classification error for each subject. From this data it is possible to evince possible document-subject correlations and/or subject-subject correlations in order to construct a relational Database of the scientific documents available at the Institute of Computational Linguistics and at the Institute of Instrumental Analitical Chemistry.}, KEYWORDS = {Artificial Neural Network, IT for Library, Data Mining}, PAGES = {52-56}, URL = {https://publications.cnr.it/doc/229315}, VOLUME = {67}, PUBLISHER = {Istituto superiore di sanità (Roma, Italia)}, ISSN = {0393-5620}, JOURNAL = {ISTISAN congressi}, } @ARTICLE{PIRRELLI_2000_ARTICLE_PB_274338, AUTHOR = {Pirrelli, V. and Battista, M.}, TITLE = {The Paradigmatic Dimension of Stem Allomorphy in Italian Verb Inflection}, YEAR = {2000}, ABSTRACT = {This paper is concerned with a detailed analysis of stem allomorphy in Italian Conjugation, carried out from a phonological and paradigmatic perspective. In theory, one would expect these two complementary viewpoints to take care of neatly separable classes of phenomena. In fact, the two dimensions turn out to be interlocked in a complex way, to define a grammatical continuum ranging from minor phonological processes to full suppletion. A formal descriptive framework is proposed here, whereby several insights into the structure of inflectional paradigms (Matthews 1974, Carstairs 1987, Wurzel 1989, Stump 1991, Aronoff 1994) are dealt with from a unifying, purely morphological perspective. In this framework, the structure of a verb paradigm is characterised in terms of a distribution of slots into a number of equivalence classes, or set partition, where each equivalence class is associated with a morphologically distinct stem root. It is shown that, in Italian, a few set partitions account for the structure of all Italian verb paradigms, whether regular or less regular. Moreover, all these partitions are mutually related homomorphically. This well-behaved family of distributions tightly constrains stem allomorphy at an appropriate level of abstraction, independently of whether the origin of allomorphy is morpho-phonological or purely morphological, showing the superiority of the obtained generalisations over more traditional syntagmatic accounts.}, KEYWORDS = {Morfologia, allomorfia, paradigmi flessionali}, PAGES = {307-379}, URL = {https://publications.cnr.it/doc/274338}, VOLUME = {12}, PUBLISHER = {Pacini (Ospedaletto, Italia)}, ISSN = {1120-2726}, JOURNAL = {Rivista di Linguistica}, } @BOOK{MARCONI_2000_BOOK_MBRRM_264796, AUTHOR = {Marconi, L. and Bermudez, E. M. and Ratti, D. and Rolando, C. and Miyares, L. R.}, TITLE = {Diccionario Ortográfico del Español, basado en el lexico del escolar cubano}, YEAR = {2000}, ABSTRACT = {Edizione a cura dell'Ufficio del Presidente del Consiglio Nazionale delle Ricerche. Realizzato dalla base di un corpus di testi prodotti dai bambini, raccolto in Cuba da un'equipe di Linguisti, il "Diccionario Ortográfico del Español basado en el léxico del escolar cubano" contiene 8.403 lemmi con la descrizione delle possibili forme, della categoria grammaticale per sostantivi, aggettivi e per le altre categorie grammaticali fatta eccezione per i verbi. Come per l'italiano i verbi sono dotati di un codice di rimando al modello di coniugazione. I modelli di riferimento per questo dizionario sono 77 e rappresentano un consistente insieme dei modelli del verbo spagnolo. Il Dizionario Ortografico dello Spagnolo (ed. CNR, Genova, 2000) basato sul lessico dello scolaro cubano è formato da tre parti distinte. - Una prima parte con le regole più comuni dell'ortografia e dell'accentuazione; alcune irregolarità grafiche dei verbi e delle altre categorie verbali; una breve descrizione della struttura del dizionario. - La seconda parte è formata dai modelli verbali, cioè da una serie di tavole dove ogni verbo coniugato serve di riferimento per tutti i verbi che si comportano nella coniugazione nello stesso modo. - Forma la terza parte la lista delle parole usate dallo scolaro cubano con le relative terminazioni nel caso dei nominali e con il numero che indica la tavola di riferimento nel caso dei verbi .}, KEYWORDS = {Diccionario, Español}, PAGES = {i-334}, URL = {http://www.ge.ilc.cnr.it/page.php?ID=dic-cubano\&lingua=it}, ISBN = {8890031824}, } @BOOK{MARCONI_2000_BOOK_MBRRM_264800, AUTHOR = {Marconi, L. and Bermudez, E. M. and Ratti, D. and Rolando, C. and Miyares, L. R.}, TITLE = {Vocabulario inverso y anagramas del Español, basado en el léxico del escolar cubano}, YEAR = {2000}, ABSTRACT = {A partire dal "Diccionario Ortográfico del Español basado en el léxico del escolar cubano" il "Vocabulario Inverso y Anagramas del Español basado en el léxico del escolar cubano" contiene tutte le forme ordinate in modalità inversa e i relativi anagrammi del lessico scolare cubano. Il "Vocabolario Inverso e Anagrammi dello Spagnolo" (ed. CNR, Genova, 2000) è un complemento del Diccionario Ortográfico del Español, dal momento che contiene le 80305 forme generate a partire dal Diccionario Ortográfico del Español, ordinate in ordine inverso e i 10182 anagrammi.}, KEYWORDS = {Vocabulario Inverso, Anagramas, Español}, PAGES = {i-417}, URL = {http://www.ge.ilc.cnr.it/page.php?ID=inverso\&lingua=it}, ISBN = {8890031832}, EDITOR = {Marconi, L. and Bermúdez, E. M. and Ratti, D. and Rolando, C. and Miyares, L. R.}, } @INCOLLECTION{PETERS_2000_INCOLLECTION_PPB_406496, AUTHOR = {Peters, C. and Picchi, E. and Biagini, L.}, TITLE = {Parallel and comparable bilingual corpora in language teaching and learning}, YEAR = {2000}, ABSTRACT = {An abstract is not available.}, KEYWORDS = {Linguistics, Content analysis and indexing. Linguistic processes, Information search and retrieval}, PAGES = {73-85}, URL = {https://publications.cnr.it/doc/406496}, VOLUME = {4}, BOOKTITLE = {Multilingual corpora in teaching and research}, EDITOR = {Botley, S. P. and McEnery, A. M. and Wilson, A.}, } @INPROCEEDINGS{CECCOTTI_2000_INPROCEEDINGS_CSP_228142, AUTHOR = {Ceccotti, M. L. and Sassi, M. and Pardelli, G.}, TITLE = {Un laboratorio multimediale dedicato a Carlo Emilio Gadda: il modello e i primi dati implementati in formato XML}, YEAR = {2000}, ABSTRACT = {In this paper we present of the Italian National Council of Research titled "Gadda 's Electronic Archive: Lexicographical and bibliographical Tools in XML". The text is made of two sections: in the first, we present Gadda's Electronic Archive, implemented at the ILC, and in the second, we show the project's objectives and the results achieved in the first months of work.}, KEYWORDS = {Gadda's Electronic Archive}, PAGES = {267-271}, URL = {https://publications.cnr.it/doc/228142}, PUBLISHER = {Associazione Italiana per l'Informatica ed il Calcolo Automatico (AICA) (Milano, ITA)}, CONFERENCE_NAME = {XXXVIII Congresso Annuale AICA: Le tecnologie dell'Informazione e della Comunicazione come sviluppo del Paese}, CONFERENCE_PLACE = {Taormina}, CONFERENCE_DATE = {27-30 Settembre}, } @INPROCEEDINGS{CECCOTTI_2000_INPROCEEDINGS_CSP_231335, AUTHOR = {Ceccotti, M. L. and Sassi, M. and Pardelli, G.}, TITLE = {Il soccorso informatico per lo studio di un autore difficile, C. E. Gadda}, YEAR = {2000}, ABSTRACT = {Nella prima parte di questo contributo si illustreranno le caratteristiche dell'Archivio Gadda in DBT, frutto di un lungo lavoro redazionale di transcodifica e di codifica. Nella seconda sarà brevemente descritta e motivata la realizzabilità di un sito web su Gadda, un modello di 'laboratorio culturale' che costituito inizialmente da alcuni brani gaddiani, da nostre recenti pubblicazioni, da dati bibliografici, potrebbe essere arricchito dall'apporto del lettore di Gadda, studioso,. studente, curioso...}, KEYWORDS = {Gadda C. E}, PAGES = {149-154}, URL = {https://publications.cnr.it/doc/231335}, PUBLISHER = {Associazione Italiana per l'Informatica ed il Calcolo Automatico (AICA) (Milano, ITA)}, CONFERENCE_NAME = {DIDAMATICA 2000, Informatica per la Didattica}, CONFERENCE_PLACE = {Cesena}, CONFERENCE_DATE = {4-5-6 maggio 2000}, BOOKTITLE = {Atti 1. Lavori Scientifici}, EDITOR = {Andronico, A. and Casadei, G. and Sacerdoti, G.}, } @INPROCEEDINGS{LANZA_2000_INPROCEEDINGS_LP_226416, AUTHOR = {Lanza, C. and Pardelli, G.}, TITLE = {Una soggettazione automatica di letteratura grigia con algoritmi di rete neurale artificiale. Due esperimenti ICAS e ILC}, YEAR = {2000}, ABSTRACT = {The aim of this work is to create an automatic subject classification of grey literature documents using an artificial neural network. In particular, a software simulator of neural network with back-propagation learning scheme was used; training of the network was carried out on around 300 documents. The prototype developed follows the steps which were performed during the learning, the processing and the network querying phase. The analysis of the final tests provides targets to be referred to the percentage of document classification error for each subject. From this data it is possible to evince possible document-subject correlations and/or subject-subject correlations in order to construct a relational Database of the scientific documents available at the Institute of Computational Linguistics and at the Institute of Instrumental Analitical Chemistry.}, KEYWORDS = {Artificial Neural Network, IT for Library, Data Mining}, PAGES = {52-56}, URL = {https://publications.cnr.it/doc/226416}, VOLUME = {67}, PUBLISHER = {Istituto Superiore di Sanità (Roma, ITA)}, CONFERENCE_NAME = {La letteratura grigia: politica e pratica. 3° Convegno Nazionale}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {25-26 novembre 1999}, BOOKTITLE = {La letteratura grigia: politica e pratica}, EDITOR = {Alberani, V. and De Castro, P.}, } @ARTICLE{PIRRELLI_1999_ARTICLE_PY_273631, AUTHOR = {Pirrelli, V. and Yvon, F.}, TITLE = {The hidden dimension: a paradigmatic view of data-driven NLP}, YEAR = {1999}, ABSTRACT = {Many tasks in language analysis are described as the maximally economic mapping of one level of linguistic representation onto another such level. Over the past decade, many different machine-learning strategies have been developed to automatically induce such mappings directly from data. In this paper, we contend that the way most learning algorithms have been applied to problems of language analysis reflects a strong bias towards a compositional (or biunique) model of interlevel mapping. Although this is justified in some cases, we contend that biunique inter-level mapping is not a jack of all trades. A model of analogical learning, based on a paradigmatic reanalysis of memorized data, is presented here. The methodological pros and cons of this approach are discussed in relation to a number of germane linguistic issues and illustrated in the context of three case studies: word pronunciation, word analysis, and word sense disambiguation. The evidence produced here seems to suggest that the brain is not designed to carry out the logically simplest and maximally economic way of relating form and function in language. Rather we propose a radical shift of emphasis in language learning from syntagmatic inter-level mapping to paradigmatically-constrained intra-level mapping.}, KEYWORDS = {data-driven NLP, memory-based machine learning, analogical language learning}, PAGES = {391-408}, URL = {https://publications.cnr.it/doc/273631}, VOLUME = {11}, PUBLISHER = {Taylor \& Francis (London, Regno Unito)}, ISSN = {1362-3079}, JOURNAL = {Journal of experimental and theoretical artificial intelligence (Online)}, } @INPROCEEDINGS{BEDINI_1999_INPROCEEDINGS_BBT_407631, AUTHOR = {Bedini, L. and Bozzi, A. and Tonazzini, A.}, TITLE = {Digital techniques for character recognition in old printed books and in modern damaged documents}, YEAR = {1999}, ABSTRACT = {An abstract is not available.}, KEYWORDS = {Document analysis, Blind image restoration, Optical character recognition, Computational philology, Image processing and computer vision}, PAGES = {959-962}, URL = {https://publications.cnr.it/doc/407631}, CONFERENCE_NAME = {Second International Congress on Science and Technology for the Safeguard of Cultural Heritage in the Mediterranean Basin}, CONFERENCE_PLACE = {Paris, France}, CONFERENCE_DATE = {July 5-9 1999}, } @INPROCEEDINGS{PARDELLI_1999_INPROCEEDINGS_P_241865, AUTHOR = {Pardelli, G.}, TITLE = {I risultati del Progetto BIBLOS}, YEAR = {1999}, ABSTRACT = {Il sito di BIBLOS e' entrato ufficialmente in servizio con questa Conferenza quando e' stato presentato presso la Sede Centrale del CNR (Aula Marconi). In tale occasione i responsabili delle Unità Operative degli Istituti del Comitato 08 hanno presentato i risultati raggiunti, in particolare il catalogo a soggetto. In tale occasione è stato presentato il soggettario e il prototipo bibliografico dell'unità operativa ILC.}, KEYWORDS = {Biblos Project}, URL = {https://publications.cnr.it/doc/241865}, CONFERENCE_NAME = {BIBLOS Biblioteca virtuale del settore Storico Filosofico e Filologico del Consiglio Nazionale delle Ricerche: I risultati del Progetto BIBLOS: la telematica al servizio della cultura umanistica}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {16 novembre 1999}, } @BOOK{PARDELLI_1998_BOOK_PS_255948, AUTHOR = {Pardelli, G. and Sassi, M.}, TITLE = {I. L. C. Library}, YEAR = {1998}, ABSTRACT = {La stampa del catalogo dell'archivio librario dell'Istituto di Linguistica Computazionale I.L.C. è tratta dall'archivio elettronico. La registrazione del materiale bibliografico viene effettuata con il software CDS/ISIS dell'UNESCO. Tale sistema ci permette di descrivere il documento secondo gli standard internazionali. I dati bibliografici dell'archivio sono stati curati da Gabriella Pardelli, responsabile della Biblioteca dell'Istituto, le procedure informatiche relative all'indicizzazione e alla stampa complessiva del catalogo sono state effettuate da Manuela Sassi, Tecnologo dell'Istituto.}, KEYWORDS = {Catalogo, Linguistica Computazionale}, PAGES = {i-253}, URL = {https://publications.cnr.it/doc/255948}, PUBLISHER = {S. T. A. R. Servizio Tecnografico Area Ricerca CNR (Pisa, ITA)}, } @INPROCEEDINGS{LANZA_1998_INPROCEEDINGS_LP_241761, AUTHOR = {Lanza, C. and Pardelli, G.}, TITLE = {Sviluppo delle raccolte e procedure di gestione nelle Biblioteche dell'ICAS e dell'ILC}, YEAR = {1998}, ABSTRACT = {La gestione del materiale bibliografico è rappresentata da una serie di procedure che vanno dall'acquisizione all'accessibilità dei documenti. Nel presente articolo abbiamo cercato di illustrare la situazione attuale delle nostre Biblioteche, il contenuto argomentativo, il servizio informativo, l'importanza della cooperazione tra Biblioteche e la necessità di aggiornamento professionale costante per un settore, l'IT for Libray, in continuo divenire.}, KEYWORDS = {Linguistica Computazionale, Chimica Analitica e Strumentale, Biblioteche}, URL = {https://publications.cnr.it/doc/241761}, CONFERENCE_NAME = {La Biblioteca, un Servizio in Rete}, CONFERENCE_PLACE = {Follonica}, CONFERENCE_DATE = {1-2 ottobre 1998}, } @INPROCEEDINGS{MARCONI_1998_INPROCEEDINGS_MRR_270217, AUTHOR = {Marconi, L. and Ratti, D. and Rolando, C.}, TITLE = {Evaluacion cuantitava del lexico periodistico}, YEAR = {1998}, ABSTRACT = {Our corpus comprises all editions of the Repubblica newspaper published en 1994. These have been analysed automatically to establish the newspaper's language richness and variety. Since the data have not been lematized, our analisys is based on all forms encountered in the corpus. The word in the corpus have been classified according to the following types: news, business and finance, politics, culture entertainment and sport. The classification follows the scheme developed for the Italian corpora project. Various statistical analyses will be presented and discussed.}, PAGES = {679-683}, URL = {https://publications.cnr.it/doc/270217}, CONFERENCE_NAME = {JADT 1998}, CONFERENCE_PLACE = {Nice}, CONFERENCE_DATE = {1998}, BOOKTITLE = {JADT 1998}, EDITOR = {Mellet, S. and La Collaboration De Brunet, A. E. and Juillard, M. and Lebart, L. and Salem, A.}, } @TECHREPORT{PARDELLI_1998_TECHREPORT_P_241822, AUTHOR = {Pardelli, G.}, TITLE = {Verso la costruzione di un soggettario per il Progetto BIBLOS all'Istituto di Linguistica Computazionale}, YEAR = {1998}, ABSTRACT = {This document describes a methodology aimed at building a subject catalogue at the Institute for Computational Linguistics within the framework of the Biblos Project (Virtual Humanities Library of the National Research Council).}, KEYWORDS = {Subject Indexing, Cataloguing Documentation, Progetto BIBLOS}, PAGES = {1-6}, URL = {https://publications.cnr.it/doc/241822}, } @TECHREPORT{PARDELLI_1998_TECHREPORT_PR_241842, AUTHOR = {Pardelli, G. and Rini, S.}, TITLE = {Progetto BIBLOS: realizzazione del prototipo per la messa in linea con INTERNET della banca dati bibliografica dell'Istituto di Linguistica Computazionale}, YEAR = {1998}, ABSTRACT = {INTRODUZIONE Nel presente lavoro abbiamo cercato di illustrare la tipologia e il contenuto argomentativo del materiale documentario, le relative procedure di selezione, di registrazione e di scarto per arrivare alla costruzione del prototipo bibliografico del progetto BIBLOS. L'Istituto di Linguistica Computazionale (ILC) del Consiglio Nazionale delle Ricerche (CNR) afferisce al Comitato per le Scienze Storiche, Filosofiche e Filologiche. L' Istituto, nell'ambito della propria area disciplinare, gestisce il patrimonio documentario ed eroga i propri servizi in conformità a quanto previsto nel DPR n.475/87 1 e nel DPR n.417/95 2. La biblioteca dell'Istituto raccoglie e conserva: a) le pubblicazioni di consultazione generale o specializzate acquistate nell'ambito dell'attività scientifica del CNR, indispensabili per studi e ricerche; b) le pubblicazioni che riguardano la ricerca scientifica e tecnologica edita dal CNR o con il suo contributo o sotto il suo patrocinio. La biblioteca dell' Istituto ha quindi il compito di curare la raccolta e la catalogazione di tutto il materiale in suo possesso nonché quello di coordinare il servizio di informazione, mediante l'accesso alle banche dati, per rendere disponibile il recupero di documenti anche attraverso il ricorso alle procedure di prestito interbibliotecario. L'assolvimento di tali compiti e la necessità di soddisfare un'utenza esigente comportano una continua e attenta valutazione nell'acquisizione delle raccolte, di fatto spesso condizionata dal dinamismo dell'attività scientifica. Tale attività genera una letteratura spesso caratterizzata da rapida obsolescenza, benchè certi documenti possano essere richiesti anche dopo molti anni dalla loro pubblicazione, per il valore culturale e storico. Tutti questi fattori, non disgiunti dalla razionalizzazione dei fondi destinati alla biblioteca e dallo spazio fisico disponibile, determinano un'attenta valutazione dell'intera attività di gestione.}, KEYWORDS = {Biblos Project}, PAGES = {1-11}, URL = {https://publications.cnr.it/doc/241842}, } @ARTICLE{MARCONI_1997_ARTICLE_MRR_264811, AUTHOR = {Marconi, L. and Ratti, D. and Rolando, C.}, TITLE = {Il lessico scritto dei bambini}, YEAR = {1997}, ABSTRACT = {Il lessico dei bambini è più ricco e fantasioso di quanto normalmente si pensi. Certamente più ricco e vario del lessico usato dagli scrittori dei testidestinati all'infanzia, che sembrano temere di indirizzare la fantasia dei bambini verso coinvolgimenti conflittuali.}, PAGES = {22-24}, URL = {https://publications.cnr.it/doc/264811}, VOLUME = {19}, PUBLISHER = {Giunti-Marzocco (Firenze, Italia)}, ISSN = {0042-7349}, JOURNAL = {La Vita scolastica}, } @EDITORIAL{RATTI_1997_EDITORIAL_RB_265128, AUTHOR = {Ratti, D. and Biorci, G.}, TITLE = {Sinonimi e contrari. Dizionario compatto dei sinonimi e dei contrari di Daniela Ratti e Grazia Biorci}, YEAR = {1997}, PAGES = {1-719}, URL = {https://publications.cnr.it/doc/265128}, PUBLISHER = {Zanichelli SpA (Bologna, ITA)}, ISBN = {9788808165206}, } @ARTICLE{MARCONI_1996_ARTICLE_MR_264809, AUTHOR = {Marconi, L. and Ratti, D.}, TITLE = {Se il bambino scrive luovo}, YEAR = {1996}, ABSTRACT = {In questo lavoro viene analizzata la distribuzione degli errori riferita a 5000 testi liberamente prodotti dai bambini di eta compresa tra i 6 e 11 anni. La casistica degli errori è estremamente ampia e variegata: accanto ad errori di ortografia abbiamo trovato errori di concordanza tra le varie parti del discorso, errori sintattici e errori di tipo morfologico.}, PAGES = {14-18}, URL = {https://publications.cnr.it/doc/264809}, VOLUME = {3}, PUBLISHER = {Giunti-Marzocco (Firenze, Italia)}, ISSN = {0042-7349}, JOURNAL = {La Vita scolastica}, } @INPROCEEDINGS{LAUDANNA_1995_INPROCEEDINGS_LTBBM_265175, AUTHOR = {Laudanna, A. and Thornton, A. M. and Brown, G. and Burani, C. and Marconi, L.}, TITLE = {Un corpus dell'italiano scritto contemporaneo dalla parte del ricevente}, YEAR = {1995}, ABSTRACT = {In this paper we describe the criteria we adopted for the selection of a corpus composed of 3,000,000 words from Italian contemporary written texts. The corpus will give rise to a frequency dictionary, which should have two main characteristics: i) representativeness of the Italian texts which are actually read, rather than of all possible written texts, ii) usefulness for psycholinguistic research.}, KEYWORDS = {Corpora, Frequency, Frequency Dictionary, Psycholinguistics}, PAGES = {103-109}, URL = {https://publications.cnr.it/doc/265175}, VOLUME = {I}, PUBLISHER = {CISU (Roma, ITA)}, ISBN = {8879751603}, CONFERENCE_NAME = {JADT 1995 III Giornate Internazionali di ANALISI STATISTICA dei DATI TESTUALI}, CONFERENCE_PLACE = {CNR-Roma}, CONFERENCE_DATE = {11-13 Dicembre 1995}, BOOKTITLE = {JADT 1995 III Giornate Internazionali di ANALISI STATISTICA dei DATI TESTUALI}, EDITOR = {Bolasco, S. and Lebart, L. and Salem, A.}, } @TECHREPORT{PARDELLI_1995_TECHREPORT_P_242262, AUTHOR = {Pardelli, G.}, TITLE = {Verso una catalogazione dei testi della Biblioteca dell'ILC}, YEAR = {1995}, ABSTRACT = {Il report focalizza la titplogia del materiale bibliografico e la scelta dei criteri di soggettazione per la LC.}, KEYWORDS = {Linguistica Computazionale, soggetti argomentativi}, PAGES = {1-12}, URL = {https://publications.cnr.it/doc/242262}, } @ARTICLE{CALZOLARI_1994_ARTICLE_CP_409444, AUTHOR = {Calzolari, Z. N. and Peters, C.}, TITLE = {Lexical knowledge bases}, YEAR = {1994}, ABSTRACT = {One of the major current requirements in real world natural language processing applications is the construction of suitably structured and sufficiently exhaustive computational lexicons or lexical knowledge bases (LKBs). However, this is an extremely difficult, time consuming and expensive task, especially if it is necessary to start from scratch. For this reason, in recent years, researchers have begun to examine the potential or already existing resources, such as machine readable dictionaries and text archives, to see whether it is possible to develop procedures that can capture different kinds of lexical information from them, representing it in a computationally tractable and reusable formal framework.}, KEYWORDS = {Lexical databases, Information storage and retrieval. Dictionaries}, PAGES = {10-11}, URL = {https://publications.cnr.it/doc/409444}, VOLUME = {18}, PUBLISHER = {ERCIM (Le Chesnay)}, ISSN = {0926-4981}, JOURNAL = {ERCIM news}, } @ARTICLE{MARINAI_1994_ARTICLE_MPP_409603, AUTHOR = {Marinai, E. and Peters, C. and Picchi, E.}, TITLE = {A prototype system for the semi-automatic sense linking and merging of mono-and bilingual LDBs}, YEAR = {1994}, ABSTRACT = {This paper describes a method for the semi-automatic linking of lexical databases (LDBs) based on different source dictionaries and their partial merging. Equivalent entries from different dictionaries are mapped together and links are created between them at the sense level. The results can be modified interactively by the user and saved to form part of a new merged LDB whose entries will contain all the information included in the separate source LDBs. The aim is to provide a tool which makes it easier to compare and study lexical data derived from different sources and also to permit linguistic and lexical analyses of much richer data.}, KEYWORDS = {Lexical databases, Databases, Database management, Information search and retrieval}, PAGES = {97-108}, URL = {https://publications.cnr.it/doc/409603}, VOLUME = {3}, PUBLISHER = {Clarendon (Oxford, Regno Unito)}, ISSN = {0964-7090}, JOURNAL = {Research in humanities computing}, } @INPROCEEDINGS{PETERS_1994_INPROCEEDINGS_PFMZ_409402, AUTHOR = {Peters, C. and Federici, S. and Montemagni, S. and Zamorani, C. N.}, TITLE = {From machine readable dictionaries to lexicons for NLP: the cobuild dictionaries-a different approach}, YEAR = {1994}, ABSTRACT = {We describe the results of a syntactic-semantic parser for Cobuild dictionary definitions. Unlike previous work on the automatic analysis of machine readable dictionaries, the particular structure of the Cobuild definition allows us to derive information that classifies the lexical item mainly in terms of the selectional restrictions or preferences encoded on its arguments. The resulting formalized lexical entries contain data that has generally been lacking in other lexical representations but which is expected to be very useful in a wide range of NLP purposes. We show how this information can be used in dictionary sense disambiguation by creating links throughout the lexicon both on the paradigmatic and the syntagmatic axes.}, KEYWORDS = {Lexical databases, Information storage and retrieval. Dictionaries}, PAGES = {147-157}, URL = {https://publications.cnr.it/doc/409402}, CONFERENCE_NAME = {6th International Congress on Lexicography}, CONFERENCE_PLACE = {Amsterdam, The Netherlands}, CONFERENCE_DATE = {1994}, BOOKTITLE = {Euralex 1994 Proceedings}, EDITOR = {Martin, W.}, } @INPROCEEDINGS{PICCHI_1994_INPROCEEDINGS_PPM_409108, AUTHOR = {Picchi, E. and Peters, C. and Marinai, E.}, TITLE = {Strumenti computazionali per l'apprendimento delle lingue: una stazione di lavoro integrata}, YEAR = {1994}, ABSTRACT = {Presentiamo un prototipo di stazione di lavoro progettata per essere uno strumento efficiente e facile da usare per coloro che apprendono una seconda lingua. La stazione di lavoro è costituita da due componenti principali: una base di dati lessicali mono e bilingui e un sistema testuale per l'interrogazione di corpora testuali mono e bilingui. Il componente lessicale è corredato di un sistema di interrogazione per l'accesso ai dati dei dizionari monolingui e bilingui (nel prototipo presentato sono utilizzate come lingue di riferimento l'italiano e l'inglese). L'altro componente permette l'interrogazione degli archivi testuali mono e bilingui. Una procedura automatica è in grado di rendere "paralleli" due testi, uno traduzione dell'altro, permettendo una interrogazione simultanea di tali testi. La stazione di lavoro funziona su personal computer operativo MS/DOS e con un'interfaccia user friendly di tipo Windows.}, KEYWORDS = {Language, Arts and humanities}, PAGES = {189-200}, URL = {https://publications.cnr.it/doc/409108}, CONFERENCE_NAME = {Didamatica '94. Informatica per la didattica}, CONFERENCE_DATE = {Marzo 1994}, BOOKTITLE = {Atti-Lavori scientifici}, EDITOR = {Andronico, A. and Casadei, G. and Sacerdoti, G.}, } @TECHREPORT{BARNBROOK_1994_TECHREPORT_BCFHMPSS_446186, AUTHOR = {Barnbrook, G. and Calzolari, N. and Federici, S. and Hoelter, M. and Montemagni, S. and Peters, C. and Schnelle, H. and Sinclair, J.}, TITLE = {ET10/51-Deliverable 8: Evaluation Report}, YEAR = {1994}, ABSTRACT = {The objective of the work in Pisa has been to translate and produce instantiations of the syntactically parsed definitions of the Cobuild dictionary: provided by Birmingham in a Typed Feature Structure formalism. However, as described in Methodology above, our results have been produced at two different levels: intermediate results; final results in the form of TFS entries. In the following, we will discuss briefly the possible applications of these different results for the three user types recognized in the introduction to this section: i. Human user ii. Human user-assisted by the machine iii. The machine Obviously, the discussion here below refers entirely to the results that would be obtained once the parser has been applied to the whole dictionary.}, KEYWORDS = {Language, Computational linguistics, Formal Definitions and Theory}, PAGES = {38}, URL = {https://publications.cnr.it/doc/446186}, } @TECHREPORT{CALZOLARI_1994_TECHREPORT_CFMP_446200, AUTHOR = {Calzolari, N. and Federici, S. and Montemagni, S. and Peters, C.}, TITLE = {ET-10/51-Final Report: Par. 3-Extracting, representing and using syntactic-semantic information from cobuild definitions}, YEAR = {1994}, ABSTRACT = {In May 1992 a new research project brought together the authors of this report. With the help and support of several other people and institutions, they worked steadily for two years, trying to improve the design and building of machine-usable lexicons, for automatic translation and many other applications. The starting point was clear. Around 1989 Helmut Schnelle of the Ruhr-Universitat Bochum became interested in the way in which words were defined in a new kind of dictionary called Cobuild. He thought that since theywere couched in sentences of apparently ordinary English, and had distinctive and repetitive shapes according to their meanings, it should be possible to represent them in logical form by means of regular rules.}, KEYWORDS = {Language, Computational linguistics, Formal Definitions and Theory, Semantics}, PAGES = {162}, URL = {https://publications.cnr.it/doc/446200}, } @BOOK{PARDELLI_1992_BOOK_PSM_241726, AUTHOR = {Pardelli, G. and Sassi, M. and Marinai, E.}, TITLE = {L'Archivio librario dell'I. L. C}, YEAR = {1992}, ABSTRACT = {Questo lavoro nasce dalla richiesta di colleghi e studenti di avere a disposizione un catalogo cartaceo per il reperimento di dati bibliografici del patrimonio librario dell'Istituto di Linguistica Computazionale. La Biblioteca dell'ILC fino a questo momento, infatti, si è avvalsa soltanto del catalogo on-line, attualmente gestito da un sistema informativo basato sulla procedura automatica di spoglio elettronico, DBT (Data Base Testuale), che permette il recupero delle informazioni contenute nell'archivio librario in tempo reale. Il catalogo segue questa strutturazione :a) Introduzione b) Premessa c) Criteri di catalogazione, d)I campi del database e) Esempi di interrogazione f) Indice per argomenti g) Indice per autori .}, KEYWORDS = {Linguistica Computazionale, Cataloghi, Sistema DBT}, PAGES = {1-131}, URL = {https://publications.cnr.it/doc/241726}, PUBLISHER = {S. T. A. R. Servizio Tecnografico Area Ricerca CNR (Pisa, ITA)}, } @INPROCEEDINGS{MARINAI_1992_INPROCEEDINGS_MPP_453170, AUTHOR = {Marinai, E. and Peters, C. and Picchi, E.}, TITLE = {Bilingual reference corpora: creation, querying, applications}, YEAR = {1992}, ABSTRACT = {The paper discusses the importance or bilingual reference corpora as valid sources or real-world renderings or texts written in one language (L1) in a second (L2) and illustrates their potential for exploitation in various kinds of crosslanguage studies. A system that has been developed for the creation management and interrogation of such corpora is presented and the integration of this system in a Workstation providing facilities to query and extract information from both mono- and bilingual text archives and lexical databases is described.}, KEYWORDS = {Bilingual reference corpora}, PAGES = {221-228}, URL = {https://publications.cnr.it/doc/453170}, CONFERENCE_NAME = {Complex '92}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {4-8/10 1992}, } @INPROCEEDINGS{PICCHI_1992_INPROCEEDINGS_PPM_453168, AUTHOR = {Picchi, E. and Peters, C. and Marinai, E.}, TITLE = {A translator's workstation}, YEAR = {1992}, ABSTRACT = {A description is given of the present state of development of a workstation that has been designed to provide the translator with efficient and easy-to-use computational tools. The aim is to offer translators fast and flexible on-line access to existing dictionary databases and bilingual text archives and also to supply them with facilities for updating, adding to and personalizing the system data archives with their own material.}, KEYWORDS = {translator's workstation}, PAGES = {972-976}, URL = {https://publications.cnr.it/doc/453168}, CONFERENCE_NAME = {COLING-92}, CONFERENCE_PLACE = {Nantes, France}, CONFERENCE_DATE = {23-28/8 1992}, } @INPROCEEDINGS{PICCHI_1992_INPROCEEDINGS_PPM_453174, AUTHOR = {Picchi, E. and Peters, C. and Marinai, E.}, TITLE = {The Pisa lexicographic workstation: the bilingual components}, YEAR = {1992}, ABSTRACT = {The main components ot the Pisa Lexicographic Workstation are a full text retrieval system and a lexical database system: each system incorporates procedures that have been implemented to meet the specific needs of the lexicographer. The paper describes the recent tailoring of existing modules and the development of new ones with bilingual lexicography in mind. The aim is to provide a flexible, user friendly system that can be employed in all stages of dictionary compilation, from the acquisition of citation material to the formatting of the entry for printing.}, KEYWORDS = {Computational lexicography, Computer-aided lexicography, Bilingual lexicography, Bilingual lexical databases, Parallel text retrieval acm: J. 5 Arts and Humanities}, PAGES = {277-285}, URL = {https://publications.cnr.it/doc/453174}, VOLUME = {A 2}, CONFERENCE_NAME = {EURALEX '92}, CONFERENCE_PLACE = {Tampere, Finland}, CONFERENCE_DATE = {1992}, } @INPROCEEDINGS{MARCONI_1991_INPROCEEDINGS_MR_270201, AUTHOR = {Marconi, L. and Ratti, D.}, TITLE = {Aplicaciones de redes neuronales al lenguaje natural}, YEAR = {1991}, ABSTRACT = {In the context of current connectivist theories we present a neural network simuleting some language learning phaenomena, i.e. synonyms, plural, recognition of semantic field. The performance of back-propagation algorithm for such net is computed for specific data. Finally we propose a coding scheme to language application.}, PAGES = {699-710}, URL = {https://publications.cnr.it/doc/270201}, VOLUME = {VI. 2}, PUBLISHER = {PPU, S. A (Barcelona, ESP)}, ISBN = {84-7665-867-2}, CONFERENCE_NAME = {VI Congreso de Lenguajes Naturales y Lenguajes Formales}, CONFERENCE_PLACE = {Taragona, Spagna}, CONFERENCE_DATE = {17-21 settembre 1990}, BOOKTITLE = {Lenguajes Naturales y Lenguajes Formales}, EDITOR = {Martín Vide, C.}, } @INPROCEEDINGS{MARINAI_1991_INPROCEEDINGS_MPP_447130, AUTHOR = {Marinai, E. and Peters, C. and Picchi, E.}, TITLE = {A first prototype of a system for the semi-automatic sense linking and merging of mono-and bilingual ldbs}, YEAR = {1991}, ABSTRACT = {A method far the (semi-automatic) linking of lexical data bases based on different source dictionaries and their partial merging is presented. The resu1ts can be modified interactively by the user and saved to form part of a new merged LDB whose entries will contain all the information included in the separate source LDBs. The aim is to provide not only a tool which makes it easier to compare and study lexical data derived from different sources but also to permit linguistic and lexical analyses on much richer data.}, KEYWORDS = {prototype, system, semi-automatic sense linking mono Idbs, bilingual ldbs}, PAGES = {293-298}, URL = {https://publications.cnr.it/doc/447130}, CONFERENCE_NAME = {Making connections}, CONFERENCE_PLACE = {Tempe, Arizona}, CONFERENCE_DATE = {March 17-21 1991}, } @INPROCEEDINGS{MARINAI_1991_INPROCEEDINGS_MPP_447148, AUTHOR = {Marinai, E. and Peters, C. and Picchi, E.}, TITLE = {BILINGUAL REFERENCE CORPORA-A SYSTEM FOR PARALLEL TEXT RETRIEVAL}, YEAR = {1991}, ABSTRACT = {A system for the automatic creation and retrieval of parallel concordances from a Bilingual Reference Corpus is described. At present, the system runs on a test set of ltalian/English 'bilingual' texts. A description is given of the different stages of the procedure which aligns and links equivalent texts in the two languages and of the way in which the query system uses these links to construct parallel contexts. The procedure uses a number of the components of the PiSystem, an integrated set of tools for text processing and analysis, including morphological analyzers and generators, and monolingual and bilingual lexical database systems. The user can use the system to query either of the two sets of texts (Italian and English) and, for any form or cooccurrences of forms found in the set of texts for one language, can retrieve parallel contrastive contexts from the other. The system should be of particular interest to bilingual lexicographers, translators and linguists, and should also find applications in the office automation area and in the language industry in general.}, KEYWORDS = {BILINGUAL REFERENCE CORPORA, SYSTEM, PARALLEL TEXT RETRIEVAL}, PAGES = {63-70}, URL = {https://publications.cnr.it/doc/447148}, CONFERENCE_NAME = {Seventh Annual Conference of the UW Centre for the New Oed and Text Research. Using Corpora}, CONFERENCE_PLACE = {Oxford, UK}, CONFERENCE_DATE = {29 September-1 October 1991}, } @INPROCEEDINGS{RATTI_1991_INPROCEEDINGS_RM_270196, AUTHOR = {Ratti, D. and Marconi, L.}, TITLE = {Criterios y Metodos para la creacion de un lexico de frecuencia de la lengua escrita conocida y usada por los ninos}, YEAR = {1991}, ABSTRACT = {The aim of this project is to identify the active and passive lexicon possesed, in the average, by elementary school children. We use the computer to create and explore, with appropriate methodologies, a reference-corpus of such lexicon.}, PAGES = {861-867}, URL = {https://publications.cnr.it/doc/270196}, VOLUME = {VI. 2}, PUBLISHER = {PPU, S. A (Barcelona, ESP)}, ISBN = {84-7665-867-2}, CONFERENCE_NAME = {VI Congreso de Lenguajes Naturales y Lenguajes Formales}, CONFERENCE_PLACE = {Taragona, Spagna}, CONFERENCE_DATE = {17-21 settembre 1990}, BOOKTITLE = {Lenguajes Naturales y Lenguajes Formales}, EDITOR = {Martín Vide, C.}, } @INPROCEEDINGS{PICCHI_1990_INPROCEEDINGS_PPC_452773, AUTHOR = {Picchi, E. and Peters, C. and Calzolari, N.}, TITLE = {A tool for the second language learner organizing bilingual dictionary data in an interactive workstation}, YEAR = {1990}, ABSTRACT = {The data contained in a conventional English/ ltalian, ltalian/ English dictionary, recorded in machine-readable form and coded for computer typesetting, have been parsed and organized in a database type structure on a persona! computer. The aim is to implement an interactive bilingual lexical workstation in order to offer the second language learner and user a tool which overcomes many of the restrictions traditionally imposed by the norma! organization of the printed bi/inguai dictionary and allows him to consult and exploit the lexical materia! 1n new ways.}, KEYWORDS = {Second language learner, Bilingual dictionary, Workstation}, PAGES = {334-344}, URL = {https://publications.cnr.it/doc/452773}, VOLUME = {44}, ISBN = {2-05-101079-X}, CONFERENCE_NAME = {15th International Conference A. L. L. C}, CONFERENCE_PLACE = {Jerusalem, Israel}, CONFERENCE_DATE = {5-9/06/1988}, EDITOR = {Choueka, Y.}, } @MISC{ZAMPOLLI_1989_MISC_ZMP_242294, AUTHOR = {Zampolli, A. and Marinelli, R. and Pardelli, G.}, TITLE = {I sistemi di documentazione dell'ILC}, YEAR = {1989}, ABSTRACT = {Il documento si concentra sui punti seguenti: a) Recupero e trasformazione dei materiali codificati su archivi magnetici e costruzione di un sistema informativo per la gestione mista di archivi con dati catalografici normalizzati e dati bibliografici; b) Sistema di gestione dell' d'archivio scientifico per le varie lingue; c) Recupero e trasformazione di informazioni bibliografiche da VM a altro sistema informativo.}, KEYWORDS = {Documentazione, Linguistica Computazionale}, PAGES = {1-13}, URL = {https://publications.cnr.it/doc/242294}, } @MISC{BIANCHIBANDINELLI_1986_MISC_BBBFFMSV_420087, AUTHOR = {Bianchi Bandinelli, R. and Beltrame, R. and Bindi, R. and Faconti, G. and Ferrini, R. and Masserotti, M. V. and Severino, G. and Vasarelli, G.}, TITLE = {Relazione tecnica per la revisione del parco stampanti del CNUCE e dell'Istituto di Linguistica Computazionale}, YEAR = {1986}, ABSTRACT = {No abstract available}, KEYWORDS = {Park Printers}, PAGES = {20}, URL = {https://publications.cnr.it/doc/420087}, }