@INCOLLECTION{MALLIA_2024_INCOLLECTION_MBQ_485252, AUTHOR = {Mallia, M. and Bandini, M. and Quochi, V.}, TITLE = {An interface for linking ancient languages}, YEAR = {2024}, ABSTRACT = {The paper focuses on the linking potentials offered by the EpiLexO web-based front-end for the creation and editing of an ecosystem of digital resources for ancient languages, developed in the context of a project on the languages of fragmentary attestation of ancient Italy. The focus is particularly on mechanisms introduced for linking lexical information to other information bits either internally or externally, e.g. for creating attestations by linking lexical forms to their variants in relevant inscriptions, as well as for linking lexical data to external independent LOD datasets available on a remote endpoint. Finally, in the conclusions, we briefly introduce some future planned or desired enhancements as well as the final platform component, a parallel interface that constitutes the fruition application, which will be open to anyone on the web and will allow for browsing, searching, cross-querying and visualising the created set of interlinked resources.}, KEYWORDS = {eLexicography, Ancient languages, Linguistic Linked Open Data, Digital historical linguistics}, PAGES = {1-12}, URL = {https://publications.cnr.it/doc/485252}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, BOOKTITLE = {Proceedings of the CLaDA-BG 2023 Conference: Language Technologies and Digital Humanities: Resources and Applications (LTaDH-RA), Sofia, Bulgaria, 10-12 May 2023}, EDITOR = {Simov, K. and Osenova, P.}, } @MISC{BANDINI_2024_MISC_BQ_491319, AUTHOR = {Bandini, M. and Quochi, V.}, TITLE = {EpiLexO-User Manual}, YEAR = {2024}, ABSTRACT = {This document contains a user manual for EpiLexO, a specialized web platform designed for the creation and editing of lexical resources, associated evidence, references, and relevant bibliography of fragmentary languages of ancient Italy such as Oscan, Faliscan, and Venetic in the context of the project: Languages and Cultures of Ancient Italy. Historical Linguistics and Digital Models (PRIN 2017XJLE8J). The platform, a single-page web application, includes several sections, each of which provides functionality for generating or editing lexical resources and establishing connections and links between their elements and different sets of internal and external resources. This User Manual aims to help users understand the interface by illustrating its functions with step-by-step instructions, examples, and troubleshooting guidance. Its target audience includes historical linguists, digital humanists and epigraphists whose research is based on linguistics and philology.}, KEYWORDS = {Digital Epigraphy, Restsprachen, Lexicon Linking, Ancient Languages, eLexicography, User Manual, Interface}, PAGES = {1-77}, URL = {https://doi.org/10.5281/zenodo.10475219}, } @ARTICLE{ALZETTA_2023_ARTICLE_ADMPV_488202, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Miaschi, A. and Prat, E. and Venturi, G.}, TITLE = {Tell me how you write and I'll tell you what you read: a study on the writing style of book reviews}, YEAR = {2023}, ABSTRACT = {Purpose: The authors' goal is to investigate variations in the writing style of book reviews published on different social reading platforms and referring to books of different genres, which enables acquiring insights into communication strategies adopted by readers to share their reading experiences. Design/methodology/approach: The authors propose a corpus-based study focused on the analysis of A Good Review, a novel corpus of online book reviews written in Italian, posted on Amazon and Goodreads, and covering six literary fiction genres. The authors rely on stylometric analysis to explore the linguistic properties and lexicon of reviews and the authors conducted automatic classification experiments using multiple approaches and feature configurations to predict either the review's platform or the literary genre. Findings: The analysis of user-generated reviews demonstrates that language is a quite variable dimension across reading platforms, but not as much across book genres. The classification experiments revealed that features modelling the syntactic structure of the sentence are reliable proxies for discerning Amazon and Goodreads reviews, whereas lexical information showed a higher predictive role for automatically discriminating the genre. Originality/value: The high availability of cultural products makes information services necessary to help users navigate these resources and acquire information from unstructured data. This study contributes to a better understanding of the linguistic characteristics of user-generated book reviews, which can support the development of linguistically-informed recommendation services. Additionally, the authors release a novel corpus of online book reviews meant to support the reproducibility and advancements of the research.}, KEYWORDS = {Stylometric analysis, Genre detection, Natural language processing, Book reviews}, PAGES = {23}, URL = {https://www.emerald.com/insight/content/doi/10.1108/JD-04-2023-0073/full/html}, VOLUME = {79}, DOI = {10.1108/JD-04-2023-0073}, PUBLISHER = {Emerald (Bingley, Regno Unito)}, ISSN = {0022-0418}, JOURNAL = {Journal of documentation}, } @ARTICLE{BACCO_2023_ARTICLE_BDLMN_488201, AUTHOR = {Bacco, L. and Dell'Orletta, F. and Lai, H. and Merone, M. and Nissim, M.}, TITLE = {A text style transfer system for reducing the physician-patient expertise gap: An analysis with automatic and human evaluations}, YEAR = {2023}, ABSTRACT = {Physicians and patients often come from different backgrounds and have varying levels of education, which can result in communication difficulties in the healthcare process. To address this expertise gap, we present a "Text Style Transfer" system. Our system uses Semantic Textual Similarity techniques based on Sentence Transformers models to create pseudo-parallel datasets from a large, non-parallel corpus of lay and expert texts. This approach allowed us to train a denoising autoencoder model (BART), overcoming the limitations of previous systems. Our extensive analysis, which includes both automatic metrics and human evaluations from both lay (patients) and expert (physicians) individuals, shows that our system outperforms state-of-the-art models and is comparable to human-provided gold references in some cases.}, KEYWORDS = {Healthcare, Natural language processing, Text style transfer, Text simplification}, PAGES = {1-18}, URL = {https://www.sciencedirect.com/science/article/pii/S0957417423013763}, VOLUME = {233}, DOI = {10.1016/j.eswa.2023.120874}, PUBLISHER = {Pergamon (Oxford, Regno Unito)}, ISSN = {0957-4174}, JOURNAL = {Expert systems with applications}, } @ARTICLE{BIFFI_2023_ARTICLE_BGMS_490948, AUTHOR = {Biffi, M. and Guadagnini, E. and Montemagni, S. and Sassolini, E.}, TITLE = {Il lemmario del «GDLI»: dati quantitativi e prime osservazioni}, YEAR = {2023}, ABSTRACT = {Dopo la realizzazione della versione elettronica del solo testo del "Grande dizionario della lingua italiana" (GDLI), si è avviato un progetto di graduale informatizzazione della sua struttura. Questo articolo ne presenta il primo risultato, vale a dire l'estrazione automatica del lemmario che è così per la prima volta quantificabile e individuabile. Una prima parte del testo è dedicata all'illustrazione della strutturazione dei contenuti del dizionario e la loro rappresentazione secondo standard internazionalmente riconosciuti (XML-TEI); la seconda presenta una prima elaborazione dei dati del lemmario estratto; la terza propone una prima analisi comparativa con i lemmari di altri dizionari della lingua italiana.}, KEYWORDS = {Lessicografia, Lessicografia digitale, Lessicografia storica}, PAGES = {331-351}, URL = {https://accademiadellacrusca.it/it/riviste/articoli/slei-xl-2023/8679}, VOLUME = {40}, PUBLISHER = {Le Lettere (Firenze, Italia)}, ISSN = {0392-5218}, JOURNAL = {Studi di lessicografia italiana}, } @ARTICLE{BURGASSI_2023_ARTICLE_BG_478887, AUTHOR = {Burgassi, C. and Guadagnini, E.}, TITLE = {Per studiare il vocabolario del passato. La posizione delle parole in epoca storica}, YEAR = {2023}, ABSTRACT = {This paper aims to propose a new method for describing the lexicon of a language in a specific period of its history. The first paragraph outlines the two main ideas to be found in the studies concerning both synchronic and diachronic lexicology. In the second para-graph our method for lexical inquiry is presented along with its core concepts, such as textual Corpus Representativeness, Connotation, Connotation Rate (Quoziente Connota-tivo, QC) and word Position in the Center-Periphery Vocabulary Model. The third para-graph sketches two possible research lines, the first one regarding the lexicon of a given historical period (Old Italian), the second dealing with the comparison between two differ-ent linguistic historical phases (Old Italian vs. Contemporary Italian).}, KEYWORDS = {Historical Lexicology, Corpus Linguistics, Word Connotation, Word Position, Center-Periphery Vocabulary Model}, PAGES = {1-18}, URL = {https://revistas.uam.es/chimera/article/view/15698}, VOLUME = {10}, DOI = {10.15366/chimera2023.10.001}, PUBLISHER = {UAM ([Madrid], Spagna)}, ISSN = {2386-2629}, JOURNAL = {Chimera (Madrid)}, } @ARTICLE{CERULLI_2023_ARTICLE_CBD_491082, AUTHOR = {Cerulli, A. and Brunato, D. and Dell'Orletta, F.}, TITLE = {Linguistic Profile of a Text and Human Ratings of Writing Quality: a Case Study on Italian L1 Learner Essays}, YEAR = {2023}, ABSTRACT = {This paper presents a study based on the linguistic profiling methodology to explore the relationship between the linguistic structure of a text and how it is perceived in terms of writing quality by humans. The approach is tested on a selection of Italian L1 learners essays, which were taken from a larger longitudinal corpus of essays written by Italian L1 students enrolled in the first and second year of lower secondary school. Human ratings of writing quality by Italian native speakers were collected through a crowdsourcing task, in which annotators were asked to read pairs of essays and rated which one they believed to be better written. By analyzing these ratings, the study identifies a variety of linguistic phenomena spanning across distinct levels of linguistic description that distinguish the essays considered as 'winners' and evaluates the impact of students' errors on the human perception of writing quality.}, KEYWORDS = {human ratings, text quality, Natural Language Processing, learner corpus}, PAGES = {7-34}, URL = {https://www.ai-lc.it/wp-content/uploads/2023/09/IJCOL_9_1_1_cerulli_et_al.pdf}, VOLUME = {1}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{CHIARELLA_2023_ARTICLE_C_485365, AUTHOR = {Chiarella, D.}, TITLE = {Towards Multi-AUV Collaboration and Coordination: A Gesture-Based Multi-AUV Hierarchical Language and a Language Framework Comparison System}, YEAR = {2023}, ABSTRACT = {The underwater environment is a harmful environment, yet one of the richest and least exploited. For these reasons the idea of a robotic companion with the task of supporting and monitoring divers during their activities and operations has been proposed. However, the idea of a platoon of robots at the diver's disposal has never been fully addressed in these proposals due to the high cost of implementation and the usability, weight and bulk of the robots. Nevertheless, recent advancements in swarm robotics, materials engineering, deep learning, and the decreasing cost of autonomous underwater vehicles (AUVs), have rendered this concept increasingly viable. Therefore, this paper introduces, in the first part, a novel framework that integrates a revised version of a gesture-based language for underwater human-robot interaction (Caddian) based on insights gained from extensive field trials. The newly introduced objective of this framework is to enable the cooperation and coordination of an AUV team by one or more human operators, while allowing a human operator to delegate a robot leader to instruct the other robotic team members. The work, in the second part, provides an evaluation of the new language proposed thanks to a fifty million sentence corpus and describes a comparison framework, which is used to estimate it with respect to other existing underwater human-robot interaction languages.}, KEYWORDS = {gesture-based language, underwater human-robot interaction, multi-AUV collaboration, language corpora and resources}, PAGES = {28}, URL = {https://publications.cnr.it/doc/485365}, VOLUME = {11}, DOI = {10.3390/jmse11061208}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2077-1312}, JOURNAL = {Journal of marine science and engineering}, } @ARTICLE{ERJAVEC_2023_ARTICLE_EOOLSPRKBSCDDAVPDNLCRMKDRVMF_470080, AUTHOR = {Erjavec, T. and Ogrodniczuk, M. and Osenova, P. and Ljubesic, N. and Simov, K. and Pancur, A. and Rudolf, M. and Kopp, M. and Barkarson, S. and Steingrimsson, S. and Coltekin, C. and De Does, J. and Depuydt, K. and Agnoloni, T. and Venturi, G. and Perez, M. C. and De Macedo, L. D. and Navarretta, C. and Luxardo, G. and Coole, M. and Rayson, P. and Morkevicius, V. and Krilavicius, T. and Dargis, R. and Ring, O. and Van Heusden, R. and Marx, M. and Fiser, D.}, TITLE = {The ParlaMint corpora of parliamentary proceedings}, YEAR = {2023}, ABSTRACT = {This paper presents the ParlaMint corpora containing transcriptions of the sessions of the 17 European national parliaments with half a billion words. The corpora are uniformly encoded, contain rich meta-data about 11 thousand speakers, and are linguistically annotated following the Universal Dependencies formalism and with named entities. Samples of the corpora and conversion scripts are available from the project's GitHub repository, and the complete corpora are openly available via the CLARIN.SI repository for download, as well as through the NoSketch Engine and KonText concordancers and the Parlameter interface for on-line exploration and analysis.}, KEYWORDS = {Parlamentary proceedings, Linguistic annotation, Universal Dependencies}, PAGES = {1-34}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85124105199\&origin=inward}, DOI = {10.1007/s10579-021-09574-0}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{FOLESANI_2023_ARTICLE_FBPTMTZNCBRDCG_482226, AUTHOR = {Folesani, F. and Belvederi, M. M. and Puggioni, C. and Tiberto, E. and Marella, M. and Toffanin, T. and Zerbinati, L. and Nanni, M. G. and Caruso, R. and Brunato, D. and Ravelli, A. A. and Dell'Orletta, F. and Chochinov, H. M. and Grassi, L.}, TITLE = {Linguistic markers of demoralization improvement in schizophrenia: A pilot study}, YEAR = {2023}, ABSTRACT = {Background and objectives: Individuals with schizophrenia display language impairments involving pragmatics, semantics and syntax. Language impairments may show diagnostic specificity and could relate to the ability of engaging in psychotherapy. This pilot study sought to: (1) identify linguistic features that might differentiate individuals with schizophrenia from distressed controls without psychotic symptoms; and (2) examine the association between linguistic abilities and clinical changes during psychotherapy. Methods: We recruited patients with schizophrenia and a comparison group of individuals with demoralization and distress due to cancer. Participants underwent Dignity Therapy (DT), an existentially-oriented brief psychotherapy focused on legacy and subjective dignity. Verbatim transcripts of the DT sessions were analysed using Natural Language Processing (NLP). In addition, we measured changes in levels of demoralization and dignity-related distress before and after DT, exploring the association with linguistic variables with network analysis. Results: Patients with schizophrenia could be differentiated from those with cancer-related distress using only three out of 141 linguistic variables: total number of words, number of prepositional chains and conversational elements. Across groups, better levels of discourse coherence and higher number of arguments controlled by a predicate (verb "arity") were associated with larger improvements in demoralization and, indirectly, dignity-related distress. Conclusions: Reproducible linguistic markers may be able to differentiate individuals with schizophrenia from those with less severe psychopathology, and to predict better uptake of psychotherapy independent from diagnosis. Future studies should explore whether linguistic features derived from NLP may be exploited as accessible diagnostic or prognostic markers to tailor psychotherapy and other interventions in schizophrenia.}, KEYWORDS = {Schizophrenia, Dignity Therapy, Natural Language Processing, Linguistic Profiling, Psychotherapy}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85153800425\&origin=inward}, DOI = {10.1016/j.ejpsy.2023.03.001}, PUBLISHER = {European Journal of Psychiatry (Saragosse, Spagna)}, ISSN = {0213-6163}, JOURNAL = {The European journal of psychiatry}, } @ARTICLE{GUADAGNINI_2023_ARTICLE_G_490947, AUTHOR = {Guadagnini, E.}, TITLE = {Una breve storia del 'cadavere': caduti latini, corpi morti romanzi e una postilla dantesca}, YEAR = {2023}, ABSTRACT = {The designations of death, dying, and the dead have been extensively studied, especially since they are often subject to linguistic taboo and are therefore named through euphemisms and dysphemisms. This contribution will reconstruct the history of the lexical type cadaver, in parallel with corpus (mortuum), from ancient Latin to modern Romance languages: the 'X-phemic' model will be discussed, but the study will adopt a semasiological perspective.}, KEYWORDS = {Lexicology, Romance Linguistics, Corpse, Dante Alighieri, Corpo morto}, PAGES = {129-152}, URL = {https://edizionicafoscari.unive.it/en/edizioni4/riviste/transcript/2023/2/una-breve-storia-del-cadavere-caduti-latini-corpi/}, VOLUME = {2}, DOI = {10.30687/TranScript/2785-5708/2023/04/001}, PUBLISHER = {Edizioni Ca' Foscari (Venezia, Italia)}, ISSN = {2785-5708}, JOURNAL = {TranScript (Venezia)}, } @ARTICLE{GUADAGNINI_2023_ARTICLE_G_490949, AUTHOR = {Guadagnini, E.}, TITLE = {[recensione] Toscana bilingue (1260 ca.-1430 ca.). Per una storia sociale del tradurre medievale}, YEAR = {2023}, KEYWORDS = {Volgarizzamenti, Traduttologia, Storia medievale}, PAGES = {239-243}, URL = {https://publications.cnr.it/doc/490949}, VOLUME = {87}, ISSN = {0035-1458}, JOURNAL = {Revue de linguistique romane}, } @ARTICLE{MARZI_2023_ARTICLE_MMV_490328, AUTHOR = {Marzi, C. and Melloni, C. and Vender, M.}, TITLE = {Finger-tracking reading profiles in monolingual and bilingual early graders}, YEAR = {2023}, ABSTRACT = {In this paper we propose an analysis of the reading behaviour of a group of Italian monolingual (n= 24) and bilingual (n= 35) 2nd schoolgraders, engaged in the tasks of reading aloud lists of isolated words and nonwords (from the DDE-2 test battery), as well as narrative connected texts displayed on the touch-screen of a common tablet, to be read either aloud or silently. A finger-tracking technique is illustrated, which provides detailed information about the reading behaviour and attention focus of early graders. Our results reveal various differences between groups. In particular, a different tracking pattern emerged in reading long, morphologically-complex word forms, correlating with a higher decoding error rate and comprehension difficulties in bilingual children compared with their monolingual peers. We suggest that the unsteady, discontinuous reading pattern for long noun and verb forms may be due to a (proto)-morphological reading strategy, with monolingual children being more successful in benefiting from a morpheme-based reading route. We also discuss the potentials of the finger-tracking technique as a tool to offer a more profound and comprehensive analysis of the reading profiles of both monolingual and bilingual readers.}, KEYWORDS = {developing readers, bilingualism, L2 literacy, connected text reading, morphological processing, finger-tracking}, PAGES = {327-361}, URL = {https://www.rivisteweb.it/doi/10.1418/109051}, VOLUME = {XXII}, DOI = {10.1418/109051}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{MARZI_2023_ARTICLE_MP_485504, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {A discriminative information-theoretical analysis of the regularity gradient in inflectional morphology}, YEAR = {2023}, ABSTRACT = {Over the last decades, several independent lines of research in morphology have questioned the hypothesis of a direct correspondence between sublexical units and their mental correlates. Word and paradigm models of morphology shifted the fundamental part-whole relation in an inflection system onto the relation between individual inflected word forms and inflectional paradigms. In turn, the use of artificial neural networks of densely interconnected parallel processing nodes for morphology learning marked a radical departure from a morpheme-based view of the mental lexicon. Lately, in computational models of Discriminative Learning, a network architecture has been combined with an uncertainty reducing mechanism that dispenses with the need for a one-to-one association between formal contrasts and meanings, leading to the dissolution of a discrete notion of the morpheme.The paper capitalises on these converging lines of development to offer a unifying information-theoretical, simulation-based analysis of the costs incurred in processing (ir)regularly inflected forms belonging to the verb systems of English, German, French, Spanish and Italian. Using Temporal Self-Organising Maps as a computational model of lexical storage and access, we show that a discriminative, recurrent neural network, based on Rescorla-Wagner's equations, can replicate speakers' exquisite sensitivity to widespread effects of word frequency, paradigm entropy and morphological (ir)regularity in lexical processing. The evidence suggests an explanatory hypothesis linking Word and paradigm morphology with principles of information theory and human perception of morphological structure. According to this hypothesis, the ways more or less regularly inflected words are structured in the mental lexicon are more related to a reduction in processing uncertainty and maximisation of predictive efficiency than to economy of storage.}, KEYWORDS = {Morphological inflection, Morphological regularity, Prediction-driven processing, Discriminative learning, Lexical self-organisation, Gradient structure, Information theory, Non-linear modelling}, PAGES = {1-51}, URL = {https://doi.org/10.1007/s11525-023-09415-6}, DOI = {10.1007/s11525-023-09415-6}, PUBLISHER = {Springer (Heidelberg, Paesi Bassi)}, ISSN = {1871-5621}, JOURNAL = {Morphology (Dordrecht)}, } @ARTICLE{MAZZARINO_2023_ARTICLE_MM_483114, AUTHOR = {Mazzarino, S. and Marzi, C.}, TITLE = {Morphological processing in Italian L2 developing readers: a pilot study}, YEAR = {2023}, ABSTRACT = {In this paper we focus on the morphological competence and awareness of 23 Italian second-language (L2) school children, by comparing the reading profiles of Italian L1 and L2 children attending primary school from 2nd to 5th grades. Reading data were collected through the experimental finger-tracking protocol developed within the ReadLet project, which supports collecting and structuring behavioural reading data of short narrative texts displayed on a tablet touch-screen. The analyses reproduced the main effects that are well-attested in the developmental literature, and pointed out some differences in the behavioural profile of L2 versus L1 children, with the former being more affected by word length and frequency effects, as well as by the aloud reading task than the latter. Interestingly, however, a functional morphological segmentation strategy emerges in L2 readers processing complex inflected forms during the aloud reading task. We interpret it as a possible strategy to alleviate the extra cognitive load associated with the overt articulation of morphologically complex words within the context of a connected text.}, KEYWORDS = {reading, bilingualism, morphological awareness, developing readers, word processing}, PAGES = {143-166}, URL = {https://publications.cnr.it/doc/483114}, VOLUME = {XXII}, DOI = {10.1418/107679}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{MIASCHI_2023_ARTICLE_MABDV_488203, AUTHOR = {Miaschi, A. and Alzetta, C. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Testing the Effectiveness of the Diagnostic Probing Paradigm on Italian Treebanks}, YEAR = {2023}, ABSTRACT = {The outstanding performance recently reached by neural language models (NLMs) across many natural language processing (NLP) tasks has steered the debate towards understanding whether NLMs implicitly learn linguistic competence. Probes, i.e., supervised models trained using NLM representations to predict linguistic properties, are frequently adopted to investigate this issue. However, it is still questioned if probing classification tasks really enable such investigation or if they simply hint at surface patterns in the data. This work contributes to this debate by presenting an approach to assessing the effectiveness of a suite of probing tasks aimed at testing the linguistic knowledge implicitly encoded by one of the most prominent NLMs, BERT. To this aim, we compared the performance of probes when predicting gold and automatically altered values of a set of linguistic features. Our experiments were performed on Italian and were evaluated across BERT's layers and for sentences with different lengths. As a general result, we observed higher performance in the prediction of gold values, thus suggesting that the probing model is sensitive to the distortion of feature values. However, our experiments also showed that the length of a sentence is a highly influential factor that is able to confound the probing model's predictions.}, KEYWORDS = {Neural language model, Probing tasks, Treebanks}, PAGES = {19}, URL = {https://www.mdpi.com/2078-2489/14/3/144}, VOLUME = {14}, DOI = {10.3390/info14030144}, PUBLISHER = {MDPI (Basel, Svizzera)}, ISSN = {2078-2489}, JOURNAL = {Information (Basel)}, } @ARTICLE{MURANO_2023_ARTICLE_MQDRZ_485254, AUTHOR = {Murano, F. and Quochi, V. and Del Grosso, A. M. and Rigobianco, L. and Zinzi, M.}, TITLE = {Describing Inscriptions of Ancient Italy. The ItAnt Project and Its Information Encoding Process}, YEAR = {2023}, ABSTRACT = {This paper discusses the challenges addressed in the digital scholarly encoding of the fragmentary texts of the languages of Ancient Italy according to the TEI/EpiDoc Guidelines in XML format. This contribution describes the solutions and customisations that have been adopted for dealing with the peculiarities of our epigraphical documentation and with the formalisation of epigraphical information deemed interesting for data retrieval in a historical linguistic perspective. The making of a digital corpus consisting of new critical editions of selected inscriptions is a work carried out in the context of the project "Languages and Cultures of Ancient Italy. Historical Linguistics and Digital Models", which aims to investigate the languages of Ancient Italy by combining the traditional methods, proper to historical linguistics, with methods and technologies proper to the digital humanities and computational lexicography. More specifically, the purpose of the project is to create a collection of interrelated digital language resources which comprise: 1) the digital corpus of texts editions; 2) a computational lexicon compliant with the Web Semantic requirements; 3) a relevant bibliographic reference dataset encoded according to the FRBRoo/LRMoo specifications. Additionally, selected textual data and scientific interpretations will be encoded by using CIDOC CRM and its extensions, namely CRMtex and CRMinf. The present contribution tackles one of the main aspects of the project, and proposes significant innovations in the encoding of critical editions for epigraphic texts of fragmentary languages, which will hopefully foster future interoperability and integration with other external datasets, a paramount concern of the project.}, KEYWORDS = {text encoding, ancient languages, digital epigraphy, TEI/EpiDoc}, PAGES = {15}, URL = {https://dl.acm.org/doi/pdf/10.1145/3606703}, VOLUME = {16}, DOI = {10.1145/3606703}, PUBLISHER = {Association for Computing Machinery (New York, NY, Stati Uniti d'America)}, ISSN = {1556-4711}, JOURNAL = {Journal on computing and cultural heritage (Online)}, } @ARTICLE{PROIETTI_2023_ARTICLE_PC_481807, AUTHOR = {Proietti, C. and Chiarella, D.}, TITLE = {The Role of Argument Strength and Informational Biases in Polarization and Bi-Polarization Effects}, YEAR = {2023}, ABSTRACT = {This simulation research explores the informational causes of polarization and bi-polarization of opinions within groups. We define 'polarization' here as a uniform change of the opinion of the whole group in the same direction, whereas 'bi-polarization' indicates a split of two subgroups towards opposite directions. For our purposes, we have expanded the model of the Argument Communication Theory of Bi-polarization. This is an argument-based multi-agent model of opinion dynamics inspired by Persuasive Argument Theory. The original model accounts for polarization as an outcome of pure informational influence and reproduces bipolarization effects by postulating an additional mechanism of homophilous selection of communication partners. The expanded model adds two dimensions: i.e., argument strength and more sophisticated protocols of informational influence (argument communication and opinion update). Adding the first dimension, allows us to investigate whether and how the presence of stronger or weaker arguments in a discussion influences polarization and bi-polarization dynamics, as suggested by the original framework of Persuasive Arguments Theory. The second feature allows us to test whether other mechanisms related to confirmation bias and epistemic vigilance can act as a driving force of bi-polarization. For the first issue, our simulations showed that argument strength has a measurable effect. For the second, our results would indicate that, in absence of homophily, only very strong types of informational bias can lead to bi-polarization.}, KEYWORDS = {Argumentation, Argument Communication Theory, Polarization, Bi-Polarization, Epistemic Vigilance, Opinion dynamics}, PAGES = {25}, URL = {https://www.jasss.org/26/2/5.html}, VOLUME = {26}, DOI = {10.18564/jasss.5062}, PUBLISHER = {SimSoc Consortium ([Guildford], Regno Unito)}, ISSN = {1460-7425}, JOURNAL = {JASSS (Guildf.)}, } @ARTICLE{SALES_2023_ARTICLE_SATD_488204, AUTHOR = {Sales, S. S. and Alzetta, C. and Tatay, C. M. and Dell'Orletta, F.}, TITLE = {Analysing Deception in Witness Memory Though Linguistic Styles in Spontaneous Language}, YEAR = {2023}, ABSTRACT = {The act of lying and its detection have raised interest in many fields, from the legal system to our daily lives. Considering that testimonies are commonly based on linguistic parameters, natural language processing, a research field concerned with programming computers to process and analyse natural language texts or speech, is a topic of interest on this front. This study aimed to examine the linguistic styles of simulated deception and true testimonies collected with the aim of studying witness memory. Study participants were asked to act as a witness of a crime by retelling the story they had just read. Cognitive interviewing techniques were used to collect testimony under two conditions: truth and simulated deception. A sample of 48 participants volunteered to participate in the study. Analyses of the linguistic indicators and content were carried out. Specifically, we performed a comparison of testimonies of the same participant by condition to analyse the variation between (i) lexical and (ii) linguistic features and (iii) content and speech characteristics (disfluencies) depending on the narrative condition. Concerning lexical properties, adjectives were the most-varying grammatical category between truthful and deceptive testimonies. Furthermore, in the linguistic analysis, we observed that truthful testimonies were generally longer than deceptive ones in terms of the number of words and sentences and also characterised by more articulated sentence structures, and these differences were also statistically significant. Regarding the analysis of the content, cognitive criteria (details) and admitting lack of memory were more present in truthful statements. By providing an objective measure, these results are of interest in developing NLP tools for assessing the credibility of testimonies in forensics.}, KEYWORDS = {Natural language processing, Simulated deception, Stylometric analysis}, PAGES = {26}, URL = {https://www.mdpi.com/2076-3425/13/2/317}, VOLUME = {13}, DOI = {10.3390/brainsci13020317}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2076-3425}, JOURNAL = {Brain sciences}, } @ARTICLE{VENUTI_2023_ARTICLE_VDBTPBCM_485318, AUTHOR = {Venuti, M. C. and Del Grosso, A. M. and Boschetti, F. and Tessarolo, L. and Prontera, A. and Bovet, D. and Cattaneo, G. and Melis, V.}, TITLE = {La 'Galassia MQDQ: ' un concetto di filologia tradizionale, digitale, sostenibile}, YEAR = {2023}, ABSTRACT = {The investigation of intertextuality within a corpus of Latin poetry is the main objective of the research functions of the so-called 'Musisque Deoque Galaxy': formular recurrences, and lexical and metric-verbal co-occurrences draw a dense network of relationships between texts, where poetic memory presents itself in various modalities. This contribution aims to analyse the latest developments in the "Musisque Deoque Galaxy" both from the point of view of the tools created to enable new textual acquisitions and new analyses, and from the technological point of view, with reference to the work carried out to ensure a long-term sustainability.}, KEYWORDS = {Digital textual corpora, Domain Specific Languages (DSL), Sustainability, Latin poetry, Carmina Latina Epigraphica, MQDQ Galaxy}, PAGES = {71-120}, URL = {https://hdl.handle.net/10278/5032220}, VOLUME = {4}, DOI = {10.30687/mag/2724-3923/2023/07/003}, PUBLISHER = {Edizioni Ca' Foscari (Venezia, Italia)}, ISSN = {2724-3923}, JOURNAL = {Magazèn}, } @BOOK{DUVAL_2023_BOOK_DG_489796, AUTHOR = {Duval, F. and Guadagnini, E.}, TITLE = {Le théâtre antique au Moyen Âge. Étude des mots et des concepts dans les textes en français et en italien}, YEAR = {2023}, ABSTRACT = {Longtemps s'est imposée l'idée d'une redécouverte tardive du théâtre antique après la longue parenthèse du Moyen Âge. Dans ce domaine, comme pour tant d'autres, l'« âge moyen » aurait représenté une coupure nette entre l'Antiquité, où le théâtre était une institution sociale répandue, et la Renaissance, qui aurait renoué avec les codes et pratiques antiques. Pour faire pièce à cette historiographie de la rupture, Frédéric Duval et Elisa Guadagnini ont recueilli et étudié les traces laissées par le théâtre antique dans l'encyclopédie et les langues vernaculaires du Moyen Âge. Le présent livre s'intéresse à l'histoire des langues et à l'histoire des idées, aux mots autant qu'aux concepts. Les auteurs partent toutefois de la représentation lexicale du théâtre antique. Sous cet angle, la documentation vernaculaire présente des avantages par rapport à la documentation latine. L'analyse porte à la fois sur la mise en place d'un lexique théâtral référant à l'Antiquité et sur le processus de conceptualisation du ?théâtre antique? en France et en Italie entre le XIIe et le XVe siècle.}, KEYWORDS = {Teatro, Linguistica romanza, Eredità dei classici, Letteratura medievale}, PAGES = {672}, URL = {https://publications.cnr.it/doc/489796}, PUBLISHER = {Droz (Genève, CHE)}, ISBN = {978-2-600-06468-2}, } @INCOLLECTION{BOSCHETTI_2023_INCOLLECTION_BBDMKBT_484489, AUTHOR = {Boschetti, F. and Bambaci, L. and Del Grosso, A. M. and Mugelli, G. and Khan, A. F. and Bellandi, A. and Taddei, A.}, TITLE = {Collaborative and Multidisciplinary Annotations of Ancient Texts: The Euporia System}, YEAR = {2023}, ABSTRACT = {Euporia is an annotation system originally created to study the ritual dynamics in ancient Greek tragedies from an anthropological perspective. The system is designed to be flexible enough so that it can be easily extended in other directions of multidisciplinary research. The system combines the simplicity of a web interface pared down to its essential elements with the expressivity of a domain-specific language parsed with ANTLR, that avoids the verbosity of general-purpose markup languages (such as XML-TEI) during the annotation phase. In this way, the user is focused on domain-specific tasks by writing concise annotations. Upon exportation of our data, interoperability is ensured by two measures: references to the annotated text are translated from a system based on progressive word numbers to the Canonical Text Services (CTS) system, and the annotations are translated into XML-TEI. An annotation is constituted by a text reference, a condition related to variant readings and interpretations of the same text, as well as by a sequence of tags. Tags are created by following a bottom-up approach: they are progressively introduced and reused by the domain-expert during the annotation process. During revisions, tags are grouped and mapped onto an ontology, in order to enable and to exploit the identification of relations among the tags in querying the annotated corpus. Being designed for interoperability, our approach can be extended to other research fields (e.g. philology, archaeology) through the creation of new domain-specific languages and domain-specific tagsets, in order to improve the functionalities of the Euporia system.}, KEYWORDS = {Digital Humanities, Euporia, Domain Specific Languages, CoPhiLab, Digital Philology}, PAGES = {172-223}, URL = {https://publications.cnr.it/doc/484489}, VOLUME = {6}, DOI = {10.1163/9789004527119_008}, PUBLISHER = {Brill Academic Publishers (Leiden, NLD)}, ISBN = {9789004527119}, BOOKTITLE = {The Ancient World Goes Digital}, EDITOR = {Juloux, V. B. and Di Ludovico, A. and Matskevich, S.}, } @INCOLLECTION{BURGASSI_2023_INCOLLECTION_BG_490570, AUTHOR = {Burgassi, C. and Guadagnini, E.}, TITLE = {La marcatezza lessicale nella ricostruzione del vocabolario di epoca storica}, YEAR = {2023}, ABSTRACT = {In questo contributo si descrive un possibile impiego del concetto di "marcatezza" nel campo della lessicologia storica. Come è noto, si tratta di un concetto molto ricorrente negli studi ma di difficile definizione e di vario utilizzo, del quale qui si propone un'applicazione specifica. Rispetto a una fase storica della lingua, in una serie di lessemi che risultano "omoionimici" per un significato, la marcatezza può stabilire un ordinamento che, di tali lessemi, rispecchi le relative posizioni nell'architettura del vocabolario. A parità di contenuto denotativo, la marcatezza così concepita valuta la variazione sul piano connotativo, là dove la connotazione non riguarda la competenza linguistica ma è estrapolata dalle caratteristiche di attestazione dei lessemi in un corpus rappresentativo di riferimento. Per sostanziare l'argomentazione portiamo due esempi, il primo relativo al significato 'essere umano nei primi anni di vita', il secondo relativo al significato 'atto di violazione di una norma', nel vocabolario italiano antico.}, KEYWORDS = {Lessicologia italiana, Linguistica storica, Marcatezza}, PAGES = {77-94}, URL = {https://amsacta.unibo.it/id/eprint/7465}, VOLUME = {7}, DOI = {10.6092/unibo/amsacta/7465}, ISBN = {9788854971264}, BOOKTITLE = {«CLUB Working Papers in Linguistics» Volume 7}, EDITOR = {Cervini, C. and Gagliardi, G.}, } @EDITORIAL{MARZI_2023_EDITORIAL_MP_490518, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Integrative Views on Representations and Processes in Morphology}, YEAR = {2023}, ABSTRACT = {One of the most enduring conceptualisations of the language architecture rests on a modular subdivision of work between lexical representations of stored items on the one hand, and dynamic processes, modelled as procedural rules working on such items, on the other hand. In morphology, network-based approaches have suggested an alternative "integrative" view of word representations and processes, where lexical representations consist of partially overlapping activation patterns spreading over several processing units. From this integrative perspective, the resulting network is both a lexicon and a word processor. We argue that the network-based view provides a stimulating research framework for several complementary levels of language inquiry (including theoretical, computational and neuro-psychological approaches) to be fruitfully integrated into a novel, comprehensive understanding of morphology. We discuss some implications of this view and delineate prospects of progress in this area.}, KEYWORDS = {morphology, mental lexicon, Connectionism, Network science, Discriminative Learning}, PAGES = {397-556}, URL = {https://link.springer.com/journal/11525/volumes-and-issues/33-4}, VOLUME = {33(4)}, DOI = {10.1007/s11525-023-09416-5}, PUBLISHER = {Springer (Dordrecht, NLD)}, } @EDITORIAL{MARZI_2023_EDITORIAL_MP_492243, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Integrative views of representations and processes in morphology: an introduction}, YEAR = {2023}, KEYWORDS = {Morphology, Mental Lexicon, Connectionism, Network science, Discriminative learning}, PAGES = {397-408}, URL = {https://link.springer.com/article/10.1007/s11525-023-09416-5}, VOLUME = {33}, DOI = {10.1007/s11525-023-09416-5}, PUBLISHER = {Springer (Heidelberg, Paesi Bassi)}, ISSN = {1871-5621}, BOOKTITLE = {Morphology (Dordrecht)}, } @INPROCEEDINGS{BRUNATO_2023_INPROCEEDINGS_BDDR_491078, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Dini, I. and Ravelli, A. A.}, TITLE = {Coherent or Not? Stressing a Neural Language Model for Discourse Coherence in Multiple Languages}, YEAR = {2023}, ABSTRACT = {In this study, we investigate the capability of a Neural Language Model (NLM) to distinguish between coherent and incoherent text, where the latter has been artificially created to gradually undermine local coherence within text. While previous research on coherence assessment using NLMs has primarily focused on English, we extend our investigation to multiple languages. We employ a consistent evaluation framework to compare the performance of monolingual and multilingual models in both in-domain and out-domain settings. Additionally, we explore the model's performance in a cross-language scenario.}, KEYWORDS = {text coherence, neural language models, multilingual corpora}, PAGES = {10690-10700}, URL = {https://aclanthology.org/2023.findings-acl.680}, DOI = {10.18653/v1/2023.findings-acl.680}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-959429-62-3}, CONFERENCE_NAME = {61st Annual Meeting of the Association for Computational Linguistics (ACL 2023)}, CONFERENCE_PLACE = {Toronto, Canada}, CONFERENCE_DATE = {9-14/07/2023}, } @INPROCEEDINGS{CHIARELLA_2023_INPROCEEDINGS_CCF_481820, AUTHOR = {Chiarella, D. and Cutugno, P. and Ferretti, M.}, TITLE = {A linguistic approach of sound characterisation and polarization: first steps}, YEAR = {2023}, ABSTRACT = {The activities of the "TRIPLO: TRasporti e collegamenti Innovativi e sostenibili tra Porti e piattaforme LOgistiche" project, funded with funds from the Interregional Operational Programme Italy-France Maritime 2014-2020, have as their particular goal to increase the sustainability of commercial ports and associated logistic platforms, helping to lessen noise pollution [1][2]. In some project activities, the acoustic impact on the people exposed to noise from back port activities is evaluated in connection to how each person perceives the noise. Only technical investigations, which cannot ensure a phenomena's universality in terms of perception, can objectively describe a phenomenon in environmental surveys [3]; A sound can be viewed as both a physical reality that can be measured using objective criteria and a sound perception phenomenon that is of a subjective character and related to the subject's psycho-physical-emotional state. Because these two traits are inextricably linked, it is not enough to just look at them independently. Driven by these motivations, we created questionnaires concerning the perception of sounds, the structure and first results of which can be consulted in [4] [5] [6]. In this article, in the first part we present a methodology to identify adjectives characterising each sound via TF-IDF (term frequency - inverse document frequency) [7][8][9][10]; in the second part we analyse the positive or negative emotions described by the adjectives given for each sounds with TexBlob, a sentiment analysis classifier, and subsequently we compare the results obtained with the ones shown in [6].}, KEYWORDS = {sentiment analysis, TF-IDF, sound polarization, sound characterisation}, PAGES = {86-91}, URL = {https://publications.cnr.it/doc/481820}, ISBN = {978-959-7174-41-7}, CONFERENCE_NAME = {XVIII° Simposio Internacional de Comunicación Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {23-27/01/2023}, BOOKTITLE = {Serie de Comunicación Social 2022-2023}, } @INPROCEEDINGS{SCIOLETTE_2023_INPROCEEDINGS_SMG_491771, AUTHOR = {Sciolette, F. and Marchi, S. and Giovannetti, E.}, TITLE = {Towards a New Computational Lexicon for Italian: building the morphological layer by harmonizing and merging existing resources}, YEAR = {2023}, ABSTRACT = {The present work illustrates the first steps towards the construction of a new computational lexicon for the Italian language. Following an analysis of existing lexical resources, it was decided to use LexicO as the reference base. In this first phase a resource of nearly 800,000 inflected forms was produced, accompanied by lemmas and morphological traits, obtained by integrating the available data in LexicO with those coming from two support sources: the tool MAGIC and a selection of Italian treebanks.}, KEYWORDS = {computational lexicon, lexical resources, morphology, morphological harmonization}, PAGES = {5}, URL = {https://ceur-ws.org/Vol-3596/short20.pdf}, VOLUME = {3596}, CONFERENCE_NAME = {9th Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {30/11/2023-01/12/2023}, BOOKTITLE = {Proceedings of the 9th Italian Conference on Computational Linguistics}, EDITOR = {Boschetti, F. and Lebani, G. E. and Magnini, B. and Novielli, N.}, } @INPROCEEDINGS{ZENZARO_2023_INPROCEEDINGS_ZDBR_484956, AUTHOR = {Zenzaro, S. and Del Grosso, A. M. and Boschetti, F. and Ranocchia, G.}, TITLE = {Ease the collaboration making Scholarly Editions: the GreekSchools case study}, YEAR = {2023}, ABSTRACT = {CophiEditor is a Digital Scholarly Editing Web platform based on Domain Specific Languages (DSL-based DSE). We are developing this platform in the context of the GreekSchools-885222 ERC project in which it is being used for the constitutio textus of Philodemus of Gadara's Arrangement of the Philosophers. The digital papyrological edition of these texts is challenging in many ways, as most of the Herculaneum papyri are highly fragmentary because they are carbonized and damaged. One of the main goals of the CophiEditor is to provide a full-fledged collaborative environment in order to support the scholar's editorial work. In this poster we show how the progress of CophiEditor eases the collaboration between scholars and the role played by the Web Annotation Data Model (WADM) for data representation and interchange.}, KEYWORDS = {Domain Specific Languages Computational Philology Digital Philology Web Annotation Data Model DSE tools}, PAGES = {230-232}, URL = {http://www.aiucd2023.unisi.it/atti/}, PUBLISHER = {Alma Mater Studiorum-Università di Bologna (Bologna, ITA)}, ISBN = {978-88-942535-7-3}, CONFERENCE_NAME = {LA MEMORIA DIGITALE: XII CONVEGNO ANNUALE AIUCD}, CONFERENCE_PLACE = {Siena}, CONFERENCE_DATE = {5-7 giugno 2023}, EDITOR = {Carbé, E. and Lo Piccolo, Gabrieleand Valenti, Alessia and Stella, F.}, } @INPROCEEDINGS{FRONTINI_2023_INPROCEEDINGS_F_478212, AUTHOR = {Frontini, F.}, TITLE = {Words and the Company they Keep: Digital corpora and infrastructures for the foreign language classroom}, YEAR = {2023}, ABSTRACT = {We give an overview of corpora \& language technologies and their use in foreign language teaching.}, KEYWORDS = {corpora, didattica L2, tecnologie del linguaggio}, URL = {https://publications.cnr.it/doc/478212}, CONFERENCE_NAME = {Didattica della lingua, della cultura e cittadinanza attiva: sfide educative contemporanee-Seminari LEND Modena}, CONFERENCE_DATE = {07/02/2023}, } @INPROCEEDINGS{SICILIANO_2023_INPROCEEDINGS_SD_491768, AUTHOR = {Siciliano, A. and Del Grosso, A. M.}, TITLE = {Giorgio Bassani's notes between tradition and innovation}, YEAR = {2023}, ABSTRACT = {This contribution illustrates the preliminary results of the project concerning Giorgio Bassani's personal library. The project provides both a printed traditional edition of the notes he wrote on his books and the development of a digital environment to browse and analyze them.}, KEYWORDS = {Digital Humanities, Digital Philology, Giorgio Bassani, Digital Scholarly Editions}, URL = {https://doi.org/10.5281/zenodo.8107868}, DOI = {10.5281/zenodo.7961822}, CONFERENCE_NAME = {Digital Humanities 2023. Collaboration as Opportunity}, CONFERENCE_PLACE = {Graz, Austria}, CONFERENCE_DATE = {10-14/07/2023}, BOOKTITLE = {Digital Humanities 2023. Collaboration as Opportunity}, EDITOR = {Scholger, W. and Vogeler, G. and Tasovac, T. and Baillot, A. and Helling, P.}, } @TECHREPORT{ALBANESI_2023_TECHREPORT_AGMPS_491776, AUTHOR = {Albanesi, D. and Giovannetti, E. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 23}, YEAR = {2023}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo gennaio 2023 - giugno 2023. Le principali attività tecniche svolte sul sistema Traduco attualmente in produzione sono state la risoluzione di bug e l'aggiornamento di alcune funzionalità. Parallelamente, è proseguito il lavoro di ricerca e sviluppo su due fronti: i) la realizzazione della nuova versione di Traduco, ii) l'ampliamento della risorsa lessicale per l'italiano contemporaneo a supporto della funzionalità di ricerca full-text sul testo del Talmud tradotto in italiano.}, KEYWORDS = {Lessici elettronici, rappresentazione della conoscenza, Linguistica Computazionale, traduzione di testi religiosi, traduzione assistita dal calcolatore}, PAGES = {13}, URL = {https://publications.cnr.it/doc/491776}, } @TECHREPORT{CARNIANI_2023_TECHREPORT_CP_490945, AUTHOR = {Carniani, E. and Papini, M.}, TITLE = {Maia: Una piattaforma aperta e collaborativa per la lessicografia elettronica, l'annotazione del testo e il linking testo-lessico-Consultazione e compilazione del lessico}, YEAR = {2023}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta sull'interfaccia web Maia nel periodo marzo 2023 - dicembre 2023. In particolare riportiamo le attività tecniche svolte sulla parte di creazione e manipolazione del lessico.}, KEYWORDS = {Linguistica Computazionale, Lessici elettronici, Lexicon editor, Text annotation, Digital lexicography, text-lexical connection}, PAGES = {13}, URL = {https://publications.cnr.it/doc/490945}, } @TECHREPORT{CASTELLI_2023_TECHREPORT_CDCCCCDGLMPR_482044, AUTHOR = {Castelli, D. and De Simone, G. and Cancedda, F. and Candela, L. and Colcelli, V. and Conte, R. and Di Donato, F. and Giannini, S. and Lazzeri, E. and Mangiaracina, S. and Puccinelli, R. and Ranchino, M. A.}, TITLE = {Roadmap Scienza Aperta}, YEAR = {2023}, ABSTRACT = {La scienza aperta è un paradigma che influenza le pratiche di produzione e condivisione di conoscenza. Obiettivo di questa roadmap è delineare un percorso per la realizzazione e diffusione di pratiche e politiche di scienza aperta all'interno del Consiglio Nazionale delle Ricerche.}, KEYWORDS = {Open Science, Open Access, Roadmap}, PAGES = {23}, URL = {https://publications.cnr.it/doc/482044}, PUBLISHER = {CNR (Roma, ITA)}, } @TECHREPORT{PAPINI_2023_TECHREPORT_P_490842, AUTHOR = {Papini, M.}, TITLE = {Maia: Una piattaforma aperta e collaborativa per la lessicografia elettronica, l'annotazione del testo e il linking testo-lessico-Autenticazione e gestione utenti}, YEAR = {2023}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta sull'interfaccia web Maia nel periodo marzo 2023 - dicembre 2023. In particolare riportiamo le attività tecniche svolte sulla parte di autenticazione e gestione utenti.}, KEYWORDS = {Linguistica Computazionale, Lessici elettronici, Lexicon editor, Text annotation, Digital lexicography, text-lexical connection}, PAGES = {7}, URL = {https://publications.cnr.it/doc/490842}, } @TECHREPORT{PAPINI_2023_TECHREPORT_P_490844, AUTHOR = {Papini, M.}, TITLE = {Maia: Una piattaforma aperta e collaborativa per la lessicografia elettronica, l'annotazione del testo e il linking testo-lessico-Gestione del corpus e annotazione del testo}, YEAR = {2023}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta sull'interfaccia web Maia nel periodo marzo 2023 - dicembre 2023. In particolare riportiamo le attività tecniche svolte sulla parte di manipolazione del Corpus e annotazione del testo.}, KEYWORDS = {Linguistica Computazionale, Lessici elettronici, Lexicon editor, Text annotation, Digital lexicography, text-lexical connection}, URL = {https://publications.cnr.it/doc/490844}, } @MISC{BARONI_2023_MISC_B_483769, AUTHOR = {Baroni, P.}, TITLE = {PRIN-20204EJYRX CWALM: Project Web Site}, YEAR = {2023}, ABSTRACT = {Sito Web del progetto CWALM - Un Modello Lessicale basato sul Corpus dell'Arabo Scritto Contemporaneo (Bando PRIN 2020 | Settori ERC SH4 e PE6 | Prot. n. 20204EJYRX), realizzato con WordPress, sviluppato in inglese}, KEYWORDS = {lexical model, corpus, Contemporary Written Arabic}, URL = {https://cwalm.ilc.cnr.it}, } @MISC{BARONI_2023_MISC_B_484291, AUTHOR = {Baroni, P.}, TITLE = {CoPhiLab Web Site}, YEAR = {2023}, ABSTRACT = {Sito Web del Laboratorio del CNR-ILC "CoPhiLab - Laboratorio di Filologia Collaborativa e Cooperativa", realizzato con WordPress, sviluppato in inglese}, KEYWORDS = {Filologia Collaborativa, Filologia Cooperativa, Digital Humanities, Digital Scholarly Editing, risorse digitali, strumenti digitali, mondo mediterraneo antico, mondo mediterraneo medievale, mondo mediterraneo rinascimentale, greco, latino, arabo, ebraico, italiano, dialetti italiani, lingue minoritarie europee}, URL = {https://cophilab.ilc.cnr.it}, } @MISC{CARLINO_2023_MISC_C_484296, AUTHOR = {Carlino, M.}, TITLE = {Sito web www. ilc. cnr. it}, YEAR = {2023}, ABSTRACT = {Progettazione della struttura, realizzazione con il CMS WordPress e aggiornamento dei contenuti del sito istituzionale del Cnr-Istituto di Linguistica Computazionale "Antonio Zampolli" (CNR-ILC): https://www.ilc.cnr.it Sito bilingue (italiano e inglese).}, KEYWORDS = {sito web, website, dissemination}, URL = {https://www.ilc.cnr.it}, } @MISC{DELGROSSO_2023_MISC_DS_484322, AUTHOR = {Del Grosso, A. M. and Spampinato, D.}, TITLE = {Bellini Digital Correspondece}, YEAR = {2023}, ABSTRACT = {Bellini Digital Correspondence (BDC) è un progetto di edizione scientifica digitale relativo alle lettere autografe di Vincenzo Bellini. Il fondo è conservato presso il Museo Belliniano di Catania. Il carteggio comprende 40 unita? testuali per 35 unita? codicologiche, riprodotte in 111 immagini digitali. BDC implementa una piattaforma per la filologia digitale applicata al testo belliniano la cui base critica si regge sul lavoro realizzato dalla prof.ssa Graziella Seminara nel 2017. L'edizione tiene conto di diverse tipologie di fruitori: specialisti, visitatori del museo, che usufruiscono dell'edizione all'interno del percorso museale. L'edizione digitale e? stata realizzata in accordo con le linee guida dettate dalla Text Encoding Initiative e integra una fruizione via Web grazie al software Edition Visualization Technology. BDC è completamente open source e open access. Il sito Web, che accompagna l'edizione, illustra con dovizia di particolari tutte le fasi del progetto, i principi editoriali, le scelte di rappresentazione digitale del testo e l'ampio gruppo di persone coinvolte.}, KEYWORDS = {Digital Humanities, Edizioni Scientifiche Digitali, Vincenzo Bellini, Filologia Digitale, Digital Scholarly Edition, Edition Visualization Technology}, URL = {http://bellinicorrespondence.cnr.it/}, } @MISC{GIOVANNETTI_2023_MISC_GABCGMPS_491773, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Carniani, E. and Guidi, L. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Maia}, YEAR = {2023}, ABSTRACT = {Maia is an open and collaborative web tool based on semantic web and linked open data technologies for text annotation, e-lexicography, and lexical linking.}, KEYWORDS = {maia, linked open data, e-lexicography, text annotation, lexical linking, collaborative tools}, URL = {https://github.com/klab-ilc-cnr/Maia}, } @ARTICLE{ACHENBACH_2022_ARTICLE_ABDDDFKV_465243, AUTHOR = {Achenbach, K. and Błaszczyńska, M. and De Paoli, S. and Di Donato, F. and Dumouchel, S. and Forbes, P. and Kraker, P. and Vignoli, M.}, TITLE = {Defining discovery: Is Google Scholar a discovery platform? An essay on the need for a new approach to scholarly discovery}, YEAR = {2022}, ABSTRACT = {This essay discusses the concept of discovery, intended as content discovery, and defines it in the new context of Open Science, with a focus on Social Sciences and Humanities (SSH). Starting from the example of Google Scholar, the authors argue that this well-established service does not address the current needs, practices, and variety of discovery. Alternatives in terms of technical choices, features, and governance, do however exist, offering richer and more open discovery. The paper presents, in particular, the implementations and research work of the H2020 project TRIPLE (Transforming Research through Innovative Practices for Linked Interdisciplinary Exploration). Dedicated to the building of a discovery platform for the SSH, the project is meant to address the specificities and evolution of discovery in this field. Prevailing scholarly resource platforms like Google Scholar limit discovery by focussing only on publications, and favouring through their algorithm well-cited papers, English content, and discipline-specific resources. A limitation in the context of cross-disciplinary and collaborative Open Science, such a service more specifically hinders discovery in the SSH. Characterized by a fragmented landscape, a variety of languages, data types, and outputs, research in the SSH requires services that fully exploit discovery potentialities. Moreover, a survey conducted within the TRIPLE project showed that most SSH researchers use Google Scholar as their starting point, and that they recognise the lack of control they have with this system. Beyond the extension of features and content, transparency is the other important criterion for the building of an open infrastructure serving the research community. In light of this, we present the GoTriple platform, which exploits today's technological potential and incorporates the best known functionalities, in order to unveil more and innovative scholarly outputs and lead to international and interdisciplinary research project collaborations.}, KEYWORDS = {discovery, TRIPLE}, URL = {https://open-research-europe.ec.europa.eu/articles/2-28/v1}, DOI = {10.12688/openreseurope.14318.1}, PUBLISHER = {F1000 Research Limited on behalf of the European Commission, London, United Kingdom}, ISSN = {2732-5121}, JOURNAL = {Open research Europe}, } @ARTICLE{BACCO_2022_ARTICLE_BRADVVDMPD_472298, AUTHOR = {Bacco, L. and Russo, F. and Ambrosio, L. and D'Antoni, F. and Vollero, L. and Vadala, G. and Dell'Orletta, F. and Merone, M. and Papalia, R. and Denaro, V.}, TITLE = {Natural language processing in low back pain and spine diseases: A systematic review}, YEAR = {2022}, ABSTRACT = {Natural Language Processing (NLP) is a discipline at the intersection between Computer Science (CS), Artificial Intelligence (AI), and Linguistics that leverages unstructured human-interpretable (natural) language text. In recent years, it gained momentum also in health-related applications and research. Although preliminary, studies concerning Low Back Pain (LBP) and other related spine disorders with relevant applications of NLP methodologies have been reported in the literature over the last few years. It motivated us to systematically review the literature comprised of two major public databases, PubMed and Scopus. To do so, we first formulated our research question following the PICO guidelines. Then, we followed a PRISMA-like protocol by performing a search query including terminologies of both technical (e.g., natural language and computational linguistics) and clinical (e.g., lumbar and spine surgery) domains. We collected 221 non-duplicated studies, 16 of which were eligible for our analysis. In this work, we present these studies divided into sub-categories, from both tasks and exploited models' points of view. Furthermore, we report a detailed description of techniques used to extract and process textual features and the several evaluation metrics used to assess the performance of the NLP models. However, what is clear from our analysis is that additional studies on larger datasets are needed to better define the role of NLP in the care of patients with spinal disorders.}, KEYWORDS = {natural language processing, Low Back Pain, Survey}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85135163810\&origin=inward}, VOLUME = {9}, DOI = {10.3389/fsurg.2022.957085}, PUBLISHER = {Frontiers Media (Lausanne, Svizzera)}, ISSN = {2296-875X}, JOURNAL = {Frontiers in surgery}, } @ARTICLE{BIFFI_2022_ARTICLE_BDFGMS_477716, AUTHOR = {Biffi, M. and De Blasi, F. and Favaro, M. and Guadagnini, E. and Montemagni, S. and Sassolini, E.}, TITLE = {Parole in rete / reti di parole. Possibili impieghi didattici dei grandi vocabolari storici digitalizzati}, YEAR = {2022}, ABSTRACT = {After a brief presentation of the great historical dictionaries of Italian, which are free to use online thanks to the digitalisation work carried out by the Accademia della Crusca, the contribution offers a number of examples of how these tools can be used for educational purposes. Finally, further didactic uses are described, which will be made possible thanks to the advanced digital tools that the Accademia della Crusca and the Istituto di Linguistica Computazionale "Antonio Zampolli" del Consiglio Nazionale delle Ricerche (ILC) are currently working on.}, KEYWORDS = {Lessicografia italiana, Didattica dell'italiano, Lessicografia digitale}, PAGES = {143-188}, URL = {https://italianoascuola.unibo.it/article/view/14866}, VOLUME = {4}, DOI = {10.6092/issn.2704-8128/14866}, PUBLISHER = {ABIS-AlmaDL (Bologna, Italia)}, ISSN = {2704-8128}, JOURNAL = {Italiano a scuola}, } @ARTICLE{BIFFI_2022_ARTICLE_BG_469340, AUTHOR = {Biffi, M. and Guadagnini, E.}, TITLE = {«Le citazioni riconducono il dizionario nell'ambito della letteratura e della vita»: un primo sguardo d'insieme sui citati del GDLI}, YEAR = {2022}, ABSTRACT = {Nel corso dei lavori di affinamento della versione informatizzata del Grande dizionario della lingua italiana, condotti dall'Accademia della Crusca in collaborazione con l'Istituto di linguistica computazionale (CNR-Pisa), è stato integralmente corretto e acquisito digitalmente l'Indice degli autori citati. Il contributo presenta alcuni dati, oggi disponibili per la prima volta grazie alla conversione in formato elettronico del vocabolario e della sua bibliografia, utili ad approfondire lo studio delle fonti impiegate nel Battaglia. Gli esempi citati nelle voci rappresentano, come è noto, il cuore del GDLI e insieme uno degli aspetti più intensamente discussi dagli studiosi. La percezione che si ha e si è avuta di questo aspetto del dizionario, tuttavia, non sempre corrisponde ai dati effettivi. Posta la mole del corpus delle allegazioni alle voci, infatti, soltanto la visione complessiva resa possibile dall'interrogazione digitale ne consente una valutazione oggettiva.}, KEYWORDS = {Lessicografia italiana, Storia della lingua italiana}, PAGES = {351-386}, URL = {https://accademiadellacrusca.it/it/riviste/articoli/slei-xxxix-2022/7599}, VOLUME = {XXXIX}, PUBLISHER = {Le Lettere (Firenze, Italia)}, ISSN = {0392-5218}, JOURNAL = {Studi di lessicografia italiana}, } @ARTICLE{BRUNATO_2022_ARTICLE_BDV_464954, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Linguistically-Based Comparison of Different Approaches to Building Corpora for Text Simplification: A Case Study on Italian}, YEAR = {2022}, ABSTRACT = {In this paper, we present an overview of existing parallel corpora for Automatic Text Simplification (ATS) in different languages focusing on the approach adopted for their construction. We make the main distinction between manual and (semi)-automatic approaches in order to investigate in which respect complex and simple texts vary and whether and how the observed modifications may depend on the underlying approach. To this end, we perform a two-level comparison on Italian corpora, since this is the only language, with the exception of English, for which there are large parallel resources derived through the two approaches considered. The first level of comparison accounts for the main types of sentence transformations occurring in the simplification process, the second one examines the results of a linguistic profiling analysis based on Natural Language Processing techniques and carried out on the original and the simple version of the same texts. For both levels of analysis, we chose to focus our discussion mostly on sentence transformations and linguistic characteristics that pertain to the morpho-syntactic and syntactic structure of the sentence.}, KEYWORDS = {linguistic complexity, Italian language, corpus construction, text simplification, aligned corpora}, PAGES = {1-19}, URL = {https://www.frontiersin.org/articles/10.3389/fpsyg.2022.707630/full}, VOLUME = {13}, DOI = {10.3389/fpsyg.2022.707630}, PUBLISHER = {Frontiers Research Foundation (Switzerland)}, ISSN = {1664-1078}, JOURNAL = {Frontiers in Psychology}, } @ARTICLE{BRUNATO_2022_ARTICLE_BMD_474123, AUTHOR = {Brunato, D. and Mattei, A. and Dell'Orletta, F.}, TITLE = {Analisi della scrittura giovanile da una prospettiva linguistico-computazionale: il caso di studio della Fanfiction}, YEAR = {2022}, ABSTRACT = {This paper presents a study aimed at characterizing the linguistic style of an emerging literary genre of the web, particularly appreciated by teens and young adults: fanfiction. By relying on Natural Language Processing approaches, and in particular on the methodology of linguistic profiling applied to a novel corpus of Italian fanfiction stories inspired by the fantasy saga "Harry Potter", we investigate the relationship between linguistic style and 'success', measured in terms of number of reviews obtained by the readers. We show that it is possible to detect a set of features, among a wide set of linguistic ones modeling lexical, morpho-syntactic and syntactic phenomena, which help more in discriminating between 'successful' and 'unsuccessful' fanfics.}, KEYWORDS = {Trattamento Automatico del Linguaggio, stilometria computazionale, linguistic profiling, corpora, fanfiction}, PAGES = {171-189}, URL = {https://publications.cnr.it/doc/474123}, VOLUME = {2021/3}, PUBLISHER = {Bulzoni (Roma, Italia)}, ISSN = {0033-9725}, JOURNAL = {Rassegna Italiana di Linguistica Applicata (Testo stamp.)}, } @ARTICLE{BRUNATO_2022_ARTICLE_BV_472409, AUTHOR = {Brunato, D. and Venturi, G.}, TITLE = {Why is this language complex? Cherry-pick the optimal set of features in multilingual treebanks}, YEAR = {2022}, ABSTRACT = {This paper investigates linguistic complexity across natural languages from a corpus-based perspective and relies on the assumptions of linguistic profiling as a methodological framework. We focus in particular on the domain of syntactic complexity and analyze the distribution of a set of features taken as proxies of complexity phenomena at the sentence level, which were extracted from 63 treebanks annotated according to the Universal Dependencies formalism. This dataset guarantees that the features considered are modeling the same linguistic phenomena in different treebanks, allowing reliable comparison among languages. We show that our approach is able to identify tendencies of structural proximity between languages not necessarily in line with typologically-supported classification, thus shedding light on new corpus-based findings.}, KEYWORDS = {Linguistic Complexity, Linguistic Profiling, Universal Dependencies, Syntactic Domain}, PAGES = {1-14}, URL = {https://www.degruyter.com/document/doi/10.1515/lingvan-2021-0017/html}, DOI = {10.1515/lingvan-2021-0017}, PUBLISHER = {De Gruyter Mouton (Berlin; New York NY, Germania)}, ISSN = {2199-174X}, JOURNAL = {Linguistics vanguard}, } @ARTICLE{DELFANTE_2022_ARTICLE_D_464869, AUTHOR = {Del Fante, D.}, TITLE = {Review: A Corpus-Based Analysis of Ideological Bias: Migration in the British Press}, YEAR = {2022}, KEYWORDS = {Migration Studies, Newspaper Discourse, Corpus Linguistics, Corpus Approaches to Discourse Analysis}, PAGES = {137-139}, URL = {https://journals.sagepub.com/doi/10.1177/14614456211073219a}, VOLUME = {24}, DOI = {10.1177/14614456211073219a}, PUBLISHER = {SAGE (London, Regno Unito)}, ISSN = {1461-4456}, JOURNAL = {Discourse studies (Print)}, } @ARTICLE{DELFANTE_2022_ARTICLE_D_470092, AUTHOR = {Del Fante, D.}, TITLE = {Metaphors and pandemics: Spanish Flu and Coronavirus in US newspapers. A case-study}, YEAR = {2022}, ABSTRACT = {The international outbreak of Coronavirus has challenged the stability of our contemporary societies. However, this is not the first time that humanity is facing a global pandemic. The 1918 Spanish Flu pandemic led to one of the most lethal pandemics. Metaphors play a fundamental role in influencing how we think and talk about health and illness. With an understanding of how the Coronavirus and the Spanish Flu are metaphorically represented in newspaper discourse, it would be easier to shed light on the linguistic process through which metaphors work and to understand to what extent socio-historical-cultural conditions may affect the actualisation of a metaphor. This paper shows that metaphors are consistently present in both time contexts and Coronavirus and Spanish Flu are similarly metaphorically represented. This might suggest the existence of a rhetoric of pandemics which goes beyond the specific socio-cultural and political context: a response to a threat as a pandemic is deeply related with human nature}, KEYWORDS = {conceptual metaphor, corpus assisted discourse studies, health communication, corpus linguistics}, PAGES = {143-184}, URL = {https://www.metaphorik.de/sites/www.metaphorik.de/files/journal-pdf/32-2022_6_del-fante_0.pdf}, VOLUME = {32}, PUBLISHER = {Metaphorik. de c/o D. Osthus c/o Universität Bonn, Romanisches Seminar (Bonn, Germania)}, ISSN = {1618-2006}, JOURNAL = {Metaphorik. de (Internet)}, } @ARTICLE{DELFANTE_2022_ARTICLE_DD_463185, AUTHOR = {Del Fante, D. and Di Nunzio, G. M.}, TITLE = {OCR Correction for Corpus-assisted Discourse Studies: A Case Study of Old Newspapers}, YEAR = {2022}, ABSTRACT = {The use of OCR software to convert printed characters to digital text is a fundamental tool within diachronic approaches to Corpus-assisted discourse Studies. However, OCR software is not totally accurate, and the resulting error rate may compromise the qualitative analysis of the studies. This paper proposes a mixed qualitative-quantitative approach to OCR error detection and correction in order to develop a methodology for enhancing the quality of historical corpora. We applied the developed methodology to two case studies on newspapers of the beginning of the 20th century for the linguistic analysis of the metaphors representing migration and pandemics. The outcome of this project consists in a set of rules which are, eventually, valid for different contexts and applicable to different corpora and which can be reproduced and reused. The proposed procedure, in terms of computational readability, is aimed at making more readable and searchable the vast array of historical text corpora which are, at the moment, only partially usable given the high error rate introduced by an OCR software.}, KEYWORDS = {Corpus-assisted Discourse Studies, OCR detection, OCR correction, OCR post-processing, Text Mining}, PAGES = {99-124}, URL = {https://umanisticadigitale.unibo.it/article/view/13689}, VOLUME = {11}, DOI = {10.6092/issn.2532-8816/13689}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @ARTICLE{DELGRATTA_2022_ARTICLE_DDZBB_472290, AUTHOR = {Del Gratta, R. and Del Grosso, A. M. and Zenzaro, S. and Boschetti, F. and Bambaci, L.}, TITLE = {La Filologia come sistema dinamico}, YEAR = {2022}, ABSTRACT = {Introduciamo un approccio formale all'evoluzione del contenuto informativo veicolato da documenti umanistici, con particolare attenzione alla prospettiva filologica e alle problematiche tipiche ad essa connesse (studio della tradizione, confronto tra testimoni, selezione e scelta delle lezioni, edizione di un testo, etc). Proponiamo un modello matematico in grado di formalizzare diversi fenomeni complessi in vari ambiti di ricerca quali la Linguistica Computazionale, la Filologia Digitale e l'Ingegneria del Software, soprattutto quando questi vengono applicati all'analisi di documenti e testi di interesse storico-letterario.}, KEYWORDS = {computational philology, formal philology, digital humanities}, PAGES = {1-20}, URL = {https://umanisticadigitale.unibo.it/article/view/13684}, VOLUME = {13}, DOI = {10.6092/issn.2532-8816/13684}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @ARTICLE{FERRARI_2022_ARTICLE_FPBVV_473649, AUTHOR = {Ferrari, A. and Pirrotta, L. and Bonciani, M. and Venturi, G. and Vainieri, M.}, TITLE = {Higher readability of institutional websites drives the correct fruition of the abortion pathway: A cross-sectional study}, YEAR = {2022}, ABSTRACT = {In Italy, abortion services are public: therefore, health Institutions should provide clear and easily readable web-based information. We aimed to 1) assess variation in abortion services utilisation; 2) analyse the readability of institutional websites informing on induced abortion; 3) explore whether easier-to-read institutional websites influenced the correct fruition of abortion services. We identified from the 2021 administrative databases of Tuscany all women having an abortion, and-among them-women having an abortion with the certification provided by family counselling centres, following the pathway established by law. We assessed variation in total and certified abortion rates by computing the Systematic Component of Variation. We analysed the readability of the Tuscan health authorities' websites using the readability assessment tool READ-IT. We explored how institutional website readability influenced the odds of having certified abortions by running multilevel logistic models, considering health authorities as the highest-level variables. We observed high variation in the correct utilization of the abortion pathway in terms of certified abortion rates. The READ-IT scores showed that the most readable text was from the Florence Teaching Hospital website. Multilevel models revealed that higher READ-IT scores, corresponding to more difficult texts, resulted in lower odds of certified abortions. Large variation in the proper fruition of abortion pathways occurs in Tuscany, and such variation may depend on readability of institutional websites informing on induced abortion. Therefore, health Institutions should monitor and improve the readability of their websites to ensure proper and more equitable access to abortion.}, KEYWORDS = {abortion services, readability assessment}, PAGES = {1-13}, URL = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0277342}, VOLUME = {17}, DOI = {10.1371/journal.pone.0277342}, PUBLISHER = {Public Library of Science (San Francisco, CA, Stati Uniti d'America)}, ISSN = {1932-6203}, JOURNAL = {PloS one}, } @ARTICLE{GUADAGNINI_2022_ARTICLE_G_477583, AUTHOR = {Guadagnini, E.}, TITLE = {Alessandro Manzoni tra l'ancudine e l'incudine}, YEAR = {2022}, ABSTRACT = {Nel Fermo e Lucia compare la locuzione tra l'incudine e il martello : dopo averla eliminata nella "seconda minuta", nella Ventisettana Alessandro Manzoni reinserisce (essere) tra l'ancudine e il martello, e con questa forma l'espressione idiomatica resta nella Quarantana. Il contributo presenta alcuni elementi utili a spiegare le ragioni che possono aver portato Manzoni a preferire ancudine a incudine, prima e dopo il viaggio in Toscana.}, KEYWORDS = {Alessandro Manzoni, Toscanismo, Latinismo}, PAGES = {161-169}, URL = {https://publications.cnr.it/doc/477583}, VOLUME = {18}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Pisa, Italia)}, ISSN = {1724-9074}, JOURNAL = {La Lingua italiana}, } @ARTICLE{MARZI_2022_ARTICLE_MNMMP_471441, AUTHOR = {Marzi, C. and Narzisi, A. and Milone, A. and Masi, G. and Pirrelli, V.}, TITLE = {Reading behaviors through patterns of finger-tracking in Italian children with autism spectrum disorder}, YEAR = {2022}, ABSTRACT = {The paper proposes an ecological and portable protocol for the large-scale collection of reading data in high-functioning autism spectrum disorder (ASD) children based on recording the finger movements of a subject reading a text displayed on a tablet touchscreen. By capitalizing on recent evidence that movements of a finger that points to a scene or text during visual exploration or reading may approximate eye fixations, we focus on recognition of written content and function words, pace of reading, and accuracy in reading comprehension. The analysis showed significant differences between typically developing and ASD children, with the latter group exhibiting greater variation in levels of reading ability, slower developmental pace in reading speed, less accurate comprehension, greater dependency on word length and word frequency, less significant prediction-based processing, as well as a monotonous, steady reading pace with reduced attention to weak punctuation. Finger-tracking patterns provides evidence that ASD readers may fail to integrate single word processing into major syntactic structures and lends support to the hypothesis of an impaired use of contextual information to predict upcoming stimuli, suggesting that difficulties in perception may arise as difficulties in prediction.}, KEYWORDS = {reading, autism, finger-tracking, deleloping readers, prediction-driven processing}, PAGES = {1-17}, URL = {https://www.mdpi.com/2076-3425/12/10/1316}, VOLUME = {12}, DOI = {10.3390/brainsci12101316}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2076-3425}, JOURNAL = {Brain sciences}, } @ARTICLE{MIASCHI_2022_ARTICLE_MBDV_475015, AUTHOR = {Miaschi, A. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {On Robustness and Sensitivity of a Neural Language Model: A Case Study on Italian L1 Learner Errors}, YEAR = {2022}, ABSTRACT = {In this paper, we propose a comprehensive linguistic study aimed at assessing the implicit behavior of one of the most prominent Neural Language Models (NLM) based on Transformer architectures, BERT (Devlin et al., 2019), when dealing with a particular source of noisy data, namely essays written by L1 Italian learners containing a variety of errors targeting grammar, orthography and lexicon. Differently from previous works, we focus on the pre-training stage and we devise two complementary evaluation tasks aimed at assessing the impact of errors on sentence-level inner representations in terms of semantic robustness and linguistic sensitivity. While the first evaluation perspective is meant to probe the model's ability to encode the semantic similarity between sentences also in the presence of errors, the second type of probing task evaluates the influence of errors on BERT's implicit knowledge of a set of raw and morpho-syntactic properties of a sentence. Our experiments show that BERT's ability to compute sentence similarity and to correctly encode multi-leveled linguistic information of a sentence are differently modulated by the category of errors and that the error hierarchies in terms of robustness and sensitivity change across layer-wise representations.}, KEYWORDS = {nlp, interpretability, transformers, learner errors}, PAGES = {426-438}, URL = {https://doi.org/10.1109/TASLP.2022.3226333}, DOI = {10.1109/TASLP.2022.3226333}, PUBLISHER = {[Institute of Electrical and Electronics Engineers] ([Piscataway NJ], Stati Uniti d'America)}, ISSN = {2329-9304}, JOURNAL = {IEEE/ACM transactions on audio, speech, and language processing (Online)}, } @ARTICLE{MIASCHI_2022_ARTICLE_MSBDV_469733, AUTHOR = {Miaschi, A. and Sarti, G. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Probing Linguistic Knowledge in Italian Neural Language Models across Language Varieties}, YEAR = {2022}, ABSTRACT = {In this paper, we present an in-depth investigation of the linguistic knowledge encoded by the transformer models currently available for the Italian language. In particular, we investigate how the complexity of two different architectures of probing models affects the performance of the Transformers in encoding a wide spectrum of linguistic features. Moreover, we explore how this implicit knowledge varies according to different textual genres and language varieties.}, KEYWORDS = {nlp, transformer models, interpretability}, PAGES = {25-44}, URL = {http://www.aaccademia.it/ita/scheda-libro?aaref=1518}, DOI = {10.4000/ijcol.965}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{PIRROTTA_2022_ARTICLE_PGTBVD_472171, AUTHOR = {Pirrotta, L. and Guidotti, E. and Tramontani, C. and Bignardelli, E. and Venturi, G. and De Rosis, S.}, TITLE = {COVID-19 vaccinations: An overview of the Italian national health system's online communication from a citizen perspective}, YEAR = {2022}, ABSTRACT = {COVID-19 vaccine hesitancy is still widespread. During the pandemic, the internet has been the preferred channel for health-related information, especially for less-educated citizens who tend to be the most hesitant about vaccination. A well-structured web communication strategy could help both to overcome vaccine hesitancy and to ensure equity in healthcare service access. This study investigated how the various regional and local health authorities in Italy used their institutional websites to inform users about COVID-19 vaccinations between March and April 2021. We browsed 129 institutional websites, checking the availability, quality and quantity, actionability and readability of information using a literature-based common grid. Descriptive statistics and statistical tests were performed. The online public dissemination of COVID-19 vaccination information in Italy was fragmented, both across and within regions. The side effects of vaccinations, were often not reported on the websites, thus missing an opportunity to enhance vaccination uptake. More focus should also be placed on readability, since readability indexes showed that they were difficult to understand. Our research revealed that several actions could be implemented to enhance online communication on COVID-19 vaccination. For instance, simplifying texts can make them more understandable and the information reported actionable.}, KEYWORDS = {Vaccinationa Communication, Readability Assessment, Online Information, Covid-19}, PAGES = {970-979}, URL = {https://www.sciencedirect.com/science/article/pii/S0168851022002184}, VOLUME = {10}, DOI = {10.1016/j.healthpol.2022.08.001}, PUBLISHER = {Elsevier (Amsterdam, Paesi Bassi)}, ISSN = {0168-8510}, JOURNAL = {Health policy (Amst. Print)}, } @ARTICLE{SAPONARO_2022_ARTICLE_SGS_484519, AUTHOR = {Saponaro, D. and Giovannetti, E. and Sciolette, F.}, TITLE = {From Religious Sources to Computational Resources: Approach and Case Study on Hebrew Terms and Concepts}, YEAR = {2022}, ABSTRACT = {This paper introduces a methodology for the creation of a digital representation of a religious case study integrating textual, linguistic, and conceptual entities. the description of a holistic model of text, to be used as the backbone of the computational resource that needs to be built, is provided. the proposed case study, focusing on the semantic field of "face" in Jewish religion, is first introduced from the point of view of the scholar and then translated, with the support of digital tools, into the relative computational representation}, KEYWORDS = {religious studies, Hebrew terminology, ontology}, PAGES = {21}, URL = {https://publications.cnr.it/doc/484519}, VOLUME = {XXVII(2022)}, PUBLISHER = {Giuntina (Firenze, Italia)}, ISSN = {2282-4499}, JOURNAL = {Materia giudaica Print}, } @ARTICLE{SICILIANO_2022_ARTICLE_SD_472277, AUTHOR = {Siciliano, A. and Del Grosso, A. M.}, TITLE = {Dalla Stampa al Digitale: Un Modello di Codifica per l'Edizione delle Postille di Giorgio Bassani}, YEAR = {2022}, ABSTRACT = {Le postille annotate da Giorgio Bassani sui suoi libri rappresentano una via d'accesso privilegiata alla sua officina, consentendo di ricostruire la genesi delle opere e del profilo intellettuale di un importante scrittore del Novecento. In sede di edizione pongono però numerose problematiche, di classificazione, rappresentazione e sistematizzazione dei dati, di fronte a cui lo strumento digitale si rivela oltremodo vantaggioso. Il presente contributo riflette sul tema, presentando il prototipo di edizione digitale costruito sul caso di studio delle annotazioni alla Scuola dell'uomo di Guido Calogero (Firenze, Sansoni, 1939) e ragionando sulle importanti potenzialità del markup XML-TEI nel trattamento e nella rappresentazione di un oggetto testuale complesso, strutturato e a volte sfuggente qual è la postilla.}, KEYWORDS = {Digital Humanities, Digital Philology, Postille, Giorgio Bassani, Computational Philology}, PAGES = {1-26}, URL = {https://umanisticadigitale.unibo.it/article/view/13688}, VOLUME = {13}, DOI = {10.6092/issn.2532-8816/13688}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @BOOK{VENTURI_2022_BOOK_VCD_464964, AUTHOR = {Venturi, G. and Cimino, A. and Dell'Orletta, F.}, TITLE = {La fede dichiarata. Un'analisi linguistico-computazionale}, YEAR = {2022}, ABSTRACT = {Il volume indaga l'apporto di tecnologie basate sul Natural Language Processing (NLP) all'analisi di un corpus di trascrizioni di 164 interviste orali raccolte durante la ricerca 2017 sulla "Religiosità in Italia". Gli autori illustrano metodologie e strumenti che permettono di trasformare l'informazione implicitamente contenuta nelle interviste in informazione esplicitamente strutturata. Il risultato finale di questo processo interpretativo spazia dall'acquisizione di conoscenze lessicali e terminologiche complesse alla loro organizzazione in strutture proto-concettuali, fino ad arrivare alla qualificazione dell'atteggiamento con il quale l'intervistato si esprime. Il lettore viene accompagnato a scoprire quale sia il valore aggiunto delle analisi basate su NLP e quali nuovi orizzonti di ricerca siano aperti da queste analisi.}, KEYWORDS = {Knowledge Extraction, Knowledge Organization}, PAGES = {1-181}, URL = {https://publications.cnr.it/doc/464964}, PUBLISHER = {Franco Angeli Editore (Milano, ITA)}, ISBN = {978-88-351-2146-6}, } @INCOLLECTION{CALAMAI_2022_INCOLLECTION_CPPCSM_467178, AUTHOR = {Calamai, S. and Piccardi, D. and Pretto, N. and Candeo, G. and Stamuli, M. F. and Monachini, M.}, TITLE = {Not Just Paper: Enhancement of Archive Cultural Heritage}, YEAR = {2022}, ABSTRACT = {Oral archives and digital technologies have gone hand-in-hand for a very long time. Both sides benefit from this interdisciplinary junction: technology enhances the preservation and diffusion of oral materials, while exploiting them to develop cutting-edge tools for their treatment. This chapter deals with an Italian instantiation of this mutual relationship: the Archivio Vi.Vo. project. Offering innovative solutions concerning metadata, audio restoration, description, and access, Archivio Vi.Vo. aims to build an online platform to host the oral archives from Tuscany. The project is powered by CLARIN-IT, which guarantees its compliance with standards and offers resources for data access and discoverability. Archivio Vi.Vo. has not been built from scratch: it is instead a cross-fertilization of previous initiatives and research projects (e.g., the Gra.fo project). Moreover, the chapter presents the related, contemporary work of a multidisciplinary group striving to synthesize a Vademecum for future generations of oral archive researchers. Lastly, a brief list of tentative ideas for future developments of the Archivio Vi.Vo. platform will be presented.}, KEYWORDS = {digital oral archives, research infrastructures, models for digital preservation}, URL = {https://www.degruyter.com/document/isbn/9783110767377/html}, VOLUME = {1}, DOI = {10.1515_9783110767377-025}, PUBLISHER = {Walter De Gruyter \& Co (Berlin, DEU)}, ISBN = {9783110767377}, BOOKTITLE = {CLARIN The Infrastructure for Language Resources}, EDITOR = {Fišer, D. and Witt, A.}, } @INCOLLECTION{CREPALDI_2022_INCOLLECTION_CFMNPT_471258, AUTHOR = {Crepaldi, D. and Ferro, M. and Marzi, C. and Nadalini, A. and Pirrelli, V. and Taxitari, L.}, TITLE = {Finger movements and eye movements during adults' silent and oral reading}, YEAR = {2022}, ABSTRACT = {Using a common tablet and a web application, we can record the finger movements of a reader that is concurrently reading and finger-pointing a text displayed on the tablet touchscreen. In a preliminary analysis of "finger-tracking" data of early-graders we showed that finger movements can replicate established reading effects observed in more controlled settings. Here, we analyse and discuss reading evidence collected by (i) tracking the finger movements of adults reading a short essay displayed on a tablet touchscreen, and (ii) tracking the eye movements of adults reading a comparable text displayed on the screen of a computer. Texts in the two conditions were controlled for linguistic complexity and page layout. In addition, we tested adults' comprehension in both silent and oral reading, by asking them multiple-choice questions after reading each text. We show and discuss the reading evidence that the two (optical and tactile) protocols provide, and to what extent they show comparable effects. We conclude with some remarks on the importance of ecology and portability of protocols for large-scale collection of naturalistic reading data.}, KEYWORDS = {Reading, finger-tracking, digital technology}, PAGES = {443-471}, URL = {https://link.springer.com/book/9783030998905}, VOLUME = {23}, PUBLISHER = {Springer (Dordrecht, NLD)}, ISBN = {978-3-030-99890-5}, BOOKTITLE = {Developing language and literacy-Studies in Honor of Dorit Diskin Ravid}, EDITOR = {Levie, R. and Bar On, A. and Ashkenazi, O. and Dattner, E. and Brandes, G.}, } @INCOLLECTION{DEJONG_2022_INCOLLECTION_DM_472304, AUTHOR = {De Jong, F. and Monachini, M.}, TITLE = {Introduction. Selected papers from the CLARIN Annual Conference 2021}, YEAR = {2022}, ABSTRACT = {CLARIN, the Common Language Resources and Technology Infrastructure, is a virtual platform that is accessible for everyone interested in language. CLARIN offers access to language resources, technology, and knowledge, and enables cross-country collaboration among academia, industry, policy-makers, cultural institutions, and the general public. Researchers, students, and citizens are offered access to digital language resources and technology services to deploy, connect, analyse and sustain such resources. Inline with the Open Science agenda, CLARIN enables scholars from the Social Sciences and Humanities(SSH) and beyond to engage in and contribute to cutting-edge, data-driven research based on language data in a range of formats and modalities.}, KEYWORDS = {Language Resource Infrastructure}, PAGES = {i-v}, URL = {https://publications.cnr.it/doc/472304}, VOLUME = {189}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, BOOKTITLE = {SELECTED PAPERS FROM THE CLARIN ANNUAL CONFERENCE 2021}, EDITOR = {Monachini, M. and Eskevich, M.}, } @INCOLLECTION{DEJONG_2022_INCOLLECTION_DVFVFW_472288, AUTHOR = {De Jong, F. and Van Uytvanck, D. and Frontini, F. and Van Den Bosch, A. and Fišer, D. and Witt, A.}, TITLE = {Language Matters. The European Research Infrastructure CLARIN, Today and Tomorrow}, YEAR = {2022}, ABSTRACT = {LARIN stands for "Common Language Resources and Technology Infrastructure". In 2012 CLARIN ERIC was established as a legal entity with the mission to create and maintain a digital infrastructure to support the sharing, use, and sustainability of language data (in written, spoken, or multimodal form) available through repositories from all over Europe, in support of research in the humanities and social sciences and beyond. Since 2016 CLARIN has had the status of Landmark research infrastructure and currently it provides easy and sustainable access to digital language data and also offers advanced tools to discover, explore, exploit, annotate, analyse, or combine such datasets, wherever they are located. This is enabled through a networked federation of centres: language data repositories, service centres, and knowledge centres with single sign-on access for all members of the academic community in all participating countries. In addition, CLARIN offers open access facilities for other interested communities of use, both inside and outside of academia. Tools and data from different centres are interoperable, so that data collections can be combined and tools from different sources can be chained to perform operations at different levels of complexity. The strategic agenda adopted by CLARIN and the activities undertaken are rooted in a strong commitment to the Open Science paradigm and the FAIR data principles. This also enables CLARIN to express its added value for the European Research Area and to act as a key driver of innovation and contributor to the increasing number of industry programmes running on data-driven processes and the digitalization of society at large.}, KEYWORDS = {research infrastructure, language resources, language technology, open science, service interoperability, innovation, SSH}, PAGES = {31-58}, URL = {https://www.degruyter.com/document/doi/10.1515/9783110767377-002/html}, VOLUME = {1}, DOI = {10.1515/9783110767377-002}, PUBLISHER = {Walter De Gruyter Inc (Boston/Berlin/Munich, USA)}, ISBN = {978-3-11-076737-7}, BOOKTITLE = {CLARIN: The Infrastructure for Language Resources}, EDITOR = {Fišer, D. and Witt, A.}, } @INCOLLECTION{DELFANTE_2022_INCOLLECTION_DFMQ_469112, AUTHOR = {Del Fante, D. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {Italian Language Resources. From CLARIN-IT to the VLO and Back: Sketching a Methodology for Monitoring LRs Visibility}, YEAR = {2022}, ABSTRACT = {This paper sketches a user-oriented, qualitative methodology for both (i) monitoring the existence and availability of language resources relevant for a given CLARIN national community and language and (ii) assessing the offering potential of CLARIN, in terms of Language Resources provided to national consortia. From the user perspective, the methodology has been applied to investigate the visibility of language resources available for Italian within the CLARIN central services, in particular the Virtual Language Observatory. As a proof-of-concept, the methodology has been tested on the resources available through the CLARIN-IT data centres, but, ideally, it could be applied by any national data centre aiming to assess the existence of LRs in CLARIN for any given languages and check their accessibility for the interested users. It is thus argued that such an assessment might be a useful instrument in the hands of national coordinators and centre managers for (i) bringing to the fore both strengths and critical issues about their data providing community and (ii) for planning targeted actions to improve and increase both visibility and accessibility of their LRs.}, KEYWORDS = {Virtual Language Observatory, CLARIN-IT, CLARIN-ERIC, Qualitative Assessment Methodology, User Involvement}, PAGES = {10-22}, URL = {https://ecp.ep.liu.se/index.php/clarin/article/view/413/371}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, BOOKTITLE = {Selected Papers from the CLARIN Annual Conference 2021}, EDITOR = {Monachini and Monica and Eskevich and Maria}, } @INCOLLECTION{MARZI_2022_INCOLLECTION_MP_464598, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Psycholinguistic Research on Inflectional Morphology in the Romance Languages}, YEAR = {2022}, ABSTRACT = {Over the past decades, psycholinguistic aspects of word processing have made a considerable impact on views of language theory and language architecture. In the quest for the principles governing the ways human speakers perceive, store, access, and produce words, inflection issues have provided a challenging realm of scientific inquiry, and a battlefield for radically opposing views. It is somewhat ironic that some of the most influential cognitive models of inflection have long been based on evidence from an inflectionally impoverished language like English, where the notions of inflectional regularity, (de)composability, predictability, phonological complexity, and default productivity appear to be mutually implied. An analysis of more "complex" inflection systems such as those of Romance languages shows that this mutual implication is not a universal property of inflection, but a contingency of poorly contrastive, nearly isolating inflection systems. Far from presenting minor faults in a solid, theoretical edifice, Romance evidence appears to call into question the subdivision of labor between rules and exceptions, the on-line processing vs. long-term memory dichotomy, and the distinction between morphological processes and lexical representations. A dynamic, learning-based view of inflection is more compatible with this data, whereby morphological structure is an emergent property of the ways inflected forms are processed and stored, grounded in universal principles of lexical self-organization and their neuro-functional correlates.}, KEYWORDS = {Romance language morphology, paradigms, inflectional classes, lexical self-organisation, frequency effects, priming, discriminative learning, lexical blocking, long-term and short-term memory}, PAGES = {1-44}, URL = {https://oxfordre.com/linguistics/view/10.1093/acrefore/9780199384655.001.0001/acrefore-9780199384655-e-709}, DOI = {10.1093/acrefore/9780199384655.013.709}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {9780199384655}, BOOKTITLE = {Oxford Encyclopedia of Romance Linguistics}, EDITOR = {Loporcaro, M.}, } @INCOLLECTION{SCIOLETTE_2022_INCOLLECTION_SG_470011, AUTHOR = {Sciolette, F. and Giovannetti, E.}, TITLE = {Un modello per domarli tutti: verso una rappresentazione del testo come esplicitazione di documento, lingua e contenuto}, YEAR = {2022}, ABSTRACT = {The aim of this research is to describe the first steps towards the theoretical elaboration of a holistic model to represent textual information. The focus of the model is the definition of "text", with its different dimensions, as a "diasystem". The set of elements, organized into distinct but strictly interconnected systems, wherein each element has an effect on the whole diasystem, is described in a model structured in the following components: graphic, linguistic, documental, discursive, and conceptual. In this work, the first attempts in the modeling of text will be shown through two case studies: the Babylonian Talmud and the DiTMAO (Dictionary of Old Occitan medico-botanical terminology).}, KEYWORDS = {textual model, holystic model, diasystem, model of text}, PAGES = {145-157}, URL = {http://www.aitla.it/images/pdf/StudiAItLA14/009_AItLA14_ScioletteGiovannetti.pdf}, VOLUME = {14}, PUBLISHER = {Officinaventuno (Milano, ITA)}, ISBN = {978-88-97657-51-4}, BOOKTITLE = {Fare linguistica applicata con le digital humanities}, EDITOR = {Saturno, J. and Spreafico, L.}, } @INCOLLECTION{VAGIONAKIS_2022_INCOLLECTION_VDBBDMM_472291, AUTHOR = {Vagionakis, I. and Del Gratta, R. and Boschetti, F. and Baroni, P. and Del Grosso, A. M. and Mancinelli, T. and Monachini, M.}, TITLE = {'Cretan Institutional Inscriptions' Meets CLARIN-IT}, YEAR = {2022}, ABSTRACT = {This paper presents 'Cretan Institutional Inscriptions', a resource in the domain of Digital Epigraphy developed at the Ca' Foscari University of Venice and supported by CLARIN-IT as part of its actions addressed to initiatives, projects and events in the field of Social Sciences and Humanities. The paper begins with a brief outline of the project within which the resource was created and then goes into a more in-depth description of the main methodologies used to develop the resource (EpiDoc and EFES) and of their benefits. The paper then focuses on the cooperation of the project with the Venice Centre of Digital and Public Humanities and the Italian node of CLARIN, also illustrating the dockerization process applied to the resource hosted on the CLARIN-IT servers. Some desiderata for future developments are outlined as well. The paper ends with some remarks about the widening of CLARIN horizons towards Digital Epigraphy and on the role of its K-Centres in this respect.}, KEYWORDS = {Digital Epigraphy, Digital Classics, Ancient Greek, Crete, Institutions, Text Encoding Initiative, TEI, EpiDoc, EpiDoc Front-End Services, EFES, Virtual Language Observatory, Dockerization, ILC4CLARIN, CLARIN-IT, CLARIN}, PAGES = {139-150}, URL = {https://ecp.ep.liu.se/index.php/clarin/article/view/424/382}, VOLUME = {189}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, BOOKTITLE = {Selected Papers from the CLARIN Annual Conference 2021}, EDITOR = {Monachini, M. and Eskevich, M.}, } @EDITORIAL{MELERO_2022_EDITORIAL_MSS_472132, AUTHOR = {Melero, M. and Sakriani, S. and Soria, C.}, TITLE = {Proceedings of The 1st Annual Meeting of the ELRA/ISCA Special Interest Group on Under-Resourced Languages (SIGUL2022)}, YEAR = {2022}, ABSTRACT = {Proceedings of the SIGUL 2022 workshop.}, KEYWORDS = {conference proceedings, less-resourced languages, language resources, NLP, ù}, URL = {https://aclanthology.org/events/lrec-2022/#2022-sigul-1}, ISBN = {979-10-95546-91-7}, } @EDITORIAL{MONACHINI_2022_EDITORIAL_ME_472302, AUTHOR = {Monachini, M. and Eskevich, M.}, TITLE = {Selected Papers from the CLARIN Annual Conference 2021}, YEAR = {2022}, ABSTRACT = {This volume presents the highlights of the 10th CLARIN Annual Conference 2021. The conference was held on 27th --29th September 2021 and because of the COVID-19 pandemic, for the second year in row a virtual format had te be adopted. CLARIN, the Common Language Resources and Technology Infrastructure, is a virtual platform that is accessible for everyone interested in language. CLARIN offers access to language resources, technology, and knowledge, and enables cross-country collaboration among academia, industry, policy-makers, cultural institutions, and the general public. Researchers, students, and citizens are offered access to digital language resources and technology services to deploy, connect, analyse and sustain such resources. In line with the Open Science agenda, CLARIN enables scholars from the Social Sciences and Humanities (SSH) and beyond to engage in and contribute to cutting-edge, data-driven research based on language data in a range of formats and modalities.}, KEYWORDS = {Language Resource Infrastructure}, PAGES = {1-212}, URL = {https://publications.cnr.it/doc/472302}, VOLUME = {189}, DOI = {10.3384/9789179294441}, ISBN = {978-91-7929-444-1}, } @INPROCEEDINGS{AGNOLONI_2022_INPROCEEDINGS_ABFMMQRV_472294, AUTHOR = {Agnoloni, T. and Bartolini, R. and Frontini, F. and Montemagni, S. and Marchetti, C. and Quochi, V. and Ruisi, M. and Venturi, G.}, TITLE = {Making Italian Parliamentary Records Machine-Actionable: the Construction of the ParlaMint-IT corpus}, YEAR = {2022}, ABSTRACT = {This paper describes the process of acquisition, cleaning, interpretation, coding and linguistic annotation of a collection of parliamentary debates from the Senate of the Italian Republic covering the COVID-19 pandemic emergency period and a former period for reference and comparison according to the CLARIN ParlaMint prescriptions. The corpus contains 1199 sessions and 79,373 speeches for a total of about 31 million words, and was encoded according to the ParlaCLARIN TEI XML format. It includes extensive metadata about the speakers, sessions, political parties and parliamentary groups. As required by the ParlaMint initiative, the corpus was also linguistically annotated for sentences, tokens, POS tags, lemmas and dependency syntax according to the universal dependencies guidelines. Named entity annotation and classification is also included. All linguistic annotation was performed automatically using state-of-the-art NLP technology with no manual revision. The Italian dataset is freely available as part of the larger ParlaMint 2.1 corpus deposited and archived in CLARIN repository together with all other national corpora. It is also available for direct analysis and inspection via various CLARIN services and has already been used both for research and educational purposes.}, KEYWORDS = {parliamentary debates, CLARIN ParlaMint, corpus creation, corpus annotation}, PAGES = {117-124}, URL = {https://aclanthology.org/2022.parlaclarin-1.17/}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, CONFERENCE_NAME = {Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {20/06/2022}, } @INPROCEEDINGS{BOSCHETTI_2022_INPROCEEDINGS_BBDDGNZ_472289, AUTHOR = {Boschetti, F. and Burgassi, C. and Del Gratta, R. and Del Grosso, A. M. and Guadagnini, E. and Nahli, O. and Zenzaro, S.}, TITLE = {Il Laboratorio di Filologia Collaborativa e Cooperativa (CoPhiLab) del CNR-ILC: dati, strumenti, servizi e infrastrutture}, YEAR = {2022}, ABSTRACT = {Questo contributo illustra le attività e le risorse del Laboratorio di Filologia Collaborativa e Cooperativa (CoPhiLab) dell'Istituto di Linguistica Computazionale "A. Zampolli" del Consiglio Nazionale delle Ricerche (CNR-ILC), con particolare attenzione all'uso delle infrastrutture di ricerca nazionali e internazionali.}, KEYWORDS = {Filologia Computazionale, Modelli Formali, Lingua Araba, Domain-Specific Languages, Ingegneria del Software}, PAGES = {45-50}, URL = {https://www.eventi.garr.it/it/conf22}, DOI = {10.26314/GARR-Conf22-proceedings}, PUBLISHER = {Associazione Consortium GARR (Roma, ITA)}, ISBN = {978-88-946629-1-7}, CONFERENCE_NAME = {CondiVisioni. La rete come strumento per costruire il futuro}, CONFERENCE_PLACE = {Palermo}, CONFERENCE_DATE = {18/05/2022-20/05/2022}, BOOKTITLE = {CONDIVISIONI La rete come strumento per costruire il futuro}, EDITOR = {Mieli, M. and Volpe, C.}, } @INPROCEEDINGS{CACIOLI_2022_INPROCEEDINGS_CCDMDZ_472285, AUTHOR = {Cacioli, G. and Cerretini, G. and Di Pietro, C. and Maenza, S. and Del Turco, R. R. and Zenzaro, S.}, TITLE = {There and back again: what to expect in the next EVT version}, YEAR = {2022}, ABSTRACT = {Developing software as complex as EVT requires a significant amount of time and resources. As a result, the release frequency of new versions has never been particularly high, especially for major versions. The first release of EVT 1 dates back to 2014, in 2016 we published the first alpha version of EVT 2, after which more complete versions followed: a beta1 in 2017, and a beta2 in 2020. Not only did it take three years between the two beta versions of EVT 2, but we still don't see the light for a 1.0 version. What has happened in the meantime? How is it possible that a stable version for EVT 1 was achieved relatively quickly, and development slowed down significantly thereafter? This talk aims at updating the Italian DH community about the current status of EVT and its future prospects with regard to the next version (EVT 3).}, KEYWORDS = {XML-TEI, filologia d'autore, mark-up, varianti, Saba}, PAGES = {212-217}, URL = {http://amsacta.unibo.it/6848/1/Proceedings_AIUCD2022.pdf#page=222}, DOI = {10.6092/unibo/amsacta/6848}, ISBN = {9788894253566}, CONFERENCE_NAME = {AIUCD 2022. Culture digitali. Intersezioni: filosofia, arti, media}, CONFERENCE_PLACE = {Lecce}, CONFERENCE_DATE = {1-3/06/2022}, BOOKTITLE = {AIUCD 2022-Proceedings. Culture digitali. Intersezioni: filosofia, arti, media}, EDITOR = {Ciracì, F. and Miglietta, G. and Gatto, C.}, } @INPROCEEDINGS{CASELLI_2022_INPROCEEDINGS_CDD_472144, AUTHOR = {Caselli, T. and Dini, I. and Dell'Orletta, F.}, TITLE = {How About Time? Probing a Multilingual Language Model for Temporal Relations}, YEAR = {2022}, ABSTRACT = {This paper presents a comprehensive set of probing experiments using a multilingual language model, XLM-R, for temporal relation classification between events in four languages. Results show an advantage of contextualized embeddings over static ones and a detrimental role of sentence level embeddings. While obtaining competitive results against state-of-the-art systems, our probes indicate a lack of suitable encoded information to properly address this task.}, KEYWORDS = {Natural Language Processing, Neural Language Models, Temporal Relation Classification}, URL = {https://aclanthology.org/2022.coling-1.283/}, CONFERENCE_NAME = {International Conference on Computational Linguistics (COLING)}, CONFERENCE_PLACE = {Gyeongju, Republic of Kore}, CONFERENCE_DATE = {12-17 ottobre 2022}, BOOKTITLE = {Proceedings of the 29th International Conference on Computational Linguistics, COLING 2022}, } @INPROCEEDINGS{COLOMBO_2022_INPROCEEDINGS_CG_463100, AUTHOR = {Colombo, M. and Giovannetti, E.}, TITLE = {La Visualizzazione Grafica di Sensi e Relazioni Semantiche di un Lessico Computazionale della Lingua Italiana}, YEAR = {2022}, ABSTRACT = {La visualizzazione, intesa come ausilio alla consultazione e alla ricerca, può giocare un ruolo fondamentale nella fruizione di dati linguistici e di conoscenza. Attraverso la rappresentazione di una risorsa lessicale o di una ontologia sotto forma di grafo, ad esempio, uno studioso ha la possibilità di farsi un'idea immediata delle relazioni che intercorrono tra i vari elementi di un lessico (di lingua o specialistico) o dei concetti che formalizzano un determinato dominio. In questo contributo presentiamo un'applicazione web based per la visualizzazione grafica della componente semantica di un lessico computazionale dell'italiano. Il grafo è stato sviluppato con Cytoscape.js, una libreria Javascript per la creazione di grafi. L'interfaccia al grafo, sviluppata come applicazione Angular, offre un primo insieme di funzionalità di interazione per la navigazione e la manipolazione della risorsa lessico-semantica.}, KEYWORDS = {human-computer interaction, lessico computazionale, grafi, Visualizzazione grafica di risorse linguistiche, rappresentazione della conoscenza}, PAGES = {155-160}, URL = {http://amsacta.unibo.it/6848/1/Proceedings_AIUCD2022.pdf}, DOI = {10.6092/unibo/amsacta/6848}, ISBN = {9788894253566}, CONFERENCE_NAME = {AIUCD 2022}, CONFERENCE_PLACE = {Lecce}, CONFERENCE_DATE = {1-3/06/2022}, BOOKTITLE = {AIUCD 2022-Culture digitali. Intersezioni: filosofia, arti, media. Proceedings della 11a conferenza nazionale}, } @INPROCEEDINGS{DELFANTE_2022_INPROCEEDINGS_DFMQ_468964, AUTHOR = {Del Fante, D. and Frontini, F. and Monachini, M. and Quochi, V.}, TITLE = {CLARIN-IT: An Overview on the Italian Clarin Consortium After Six Years of Activity}, YEAR = {2022}, ABSTRACT = {This paper offers an overview of the Italian CLARIN consortium after six years since its establishment. The members, the centres and the repositories and the most important collections are described. Lastly, in order to showcase the visibility and the accessiblity of Language Resources provided by CLARIN-IT from a user-perspective, we show how Italian resources are findable within CLARIN ERI}, KEYWORDS = {Language Resources, Data Repositories and Archives, Research Infrastructures, CLARIN}, PAGES = {8}, URL = {http://ceur-ws.org/Vol-3160/short21.pdf}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Italian Research Conference on Digital Libraries}, CONFERENCE_PLACE = {Università degli Studi di Padova}, CONFERENCE_DATE = {24/02/2022}, BOOKTITLE = {Proceedings of the 18th Italian Research Conference on Digital Libraries}, EDITOR = {Di Nunzio, G. M. and Portelli, B. and Redavid, D. and Silvello, G.}, } @INPROCEEDINGS{GAMBA_2022_INPROCEEDINGS_GFBM_472292, AUTHOR = {Gamba, F. and Frontini, F. and Broeder, D. and Monachini, M.}, TITLE = {Language Technologies for the Creation of Multilingual Terminologies. Lessons Learned from the SSHOC Project}, YEAR = {2022}, ABSTRACT = {This paper is framed in the context of the SSHOC project and aims at exploring how Language Technologies can help in promoting and facilitating multilingualism in the Social Sciences and Humanities (SSH). Although most SSH researchers produce culturally and societally relevant work in their local languages, metadata and vocabularies used in the SSH domain to describe and index research data are currently mostly in English. We thus investigate Natural Language Processing and Machine Translation approaches in view of providing resources and tools to foster multilingual access and discovery to SSH content across different languages. As case studies, we create and deliver as freely, openly available data a set of multilingual metadata concepts and an automatically extracted multilingual Data Stewardship terminology. The two case studies allow as well to evaluate performances of state-of-the-art tools and to derive a set of recommendations as to how best apply them. Although not adapted to the specific domain, the employed tools prove to be a valid asset to translation tasks. Nonetheless, validation of results by domain experts proficient in the language is an unavoidable phase of the whole workflow.}, KEYWORDS = {Multilingual terminologies, data curation, language resource infrastructures}, PAGES = {154-163}, URL = {https://aclanthology.org/2022.lrec-1.17}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, CONFERENCE_NAME = {13th Conference on Language Resources and Evaluation (LREC 2022)}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {22/06/2022-24/06/2022}, } @INPROCEEDINGS{HIRSCH_2022_INPROCEEDINGS_HFDD_469567, AUTHOR = {Hirsch, F. and Frontini, F. and Didirková, I. and Drengubiak, J.}, TITLE = {Esthétique de la voix dans les livres audio en langue française}, YEAR = {2022}, ABSTRACT = {Aesthetics of voice in French-language audio books. This research aims at studying listeners' preferences in audiobooks' voices. Samples of 8 male and 7 female voices were extracted from different audiobooks and analyzed. A survey has been carried out to obtain 69 listeners' points of view by answering questions on vocal features. Results show that the participants' choices depend on the literary genre. Indeed, male voices are preferred for science-fiction novels and female voices for juvenile literature and contemporary novels. Nevertheless, other literary genres that were tested do not match with a specific voice. On the other hand, essays are expected to be read with a slower speech rate, whereas listeners prefer faster speech rates in erotic novels.}, KEYWORDS = {audiobooks, voice esthetics, speech}, URL = {https://doi.org/10.1051/shsconf/202213808004}, DOI = {10.1051/shsconf/202213808004}, CONFERENCE_NAME = {8e Congrès Mondial de Linguistique Française}, CONFERENCE_PLACE = {Université d'Orléans, France}, CONFERENCE_DATE = {04-08/07/2022}, BOOKTITLE = {138}, } @INPROCEEDINGS{MARCHI_2022_INPROCEEDINGS_MCDG_463120, AUTHOR = {Marchi, S. and Colombo, M. and Dattilo, D. and Giovannetti, E.}, TITLE = {Un esperimento di visualizzazione grafica della terminologia del Talmud babilonese}, YEAR = {2022}, ABSTRACT = {L'impiego di tecnologie di information visualization nel settore delle digital humanities può aprire nuove frontiere di ricerca. Le informazioni veicolate attraverso modalità grafiche, infatti, possono apparire agli studiosi più immediatamente comprensibili e le interfacce grafiche realizzate fornire inediti paradigmi di studio e di manipolazione dei dati analizzati. Il caso d'uso sperimentale illustrato in questo contributo è stato concepito per fornire allo studioso una modalità visiva, immediata, per l'analisi comparativa del contenuto terminologico di un corpus testuale.}, KEYWORDS = {Visualizzazione grafica di risorse testuali, terminologia, linguistica computazionale, tf-idf, grafi}, PAGES = {239-241}, URL = {http://amsacta.unibo.it/6848/1/Proceedings_AIUCD2022.pdf}, DOI = {10.6092/unibo/amsacta/6848}, ISBN = {9788894253566}, CONFERENCE_NAME = {AIUCD 2022}, CONFERENCE_PLACE = {Lecce}, CONFERENCE_DATE = {1-3/06/2022}, BOOKTITLE = {AIUCD 2022-Culture digitali. Intersezioni: filosofia, arti, media. Proceedings della 11a conferenza nazionale}, } @INPROCEEDINGS{MERENDI_2022_INPROCEEDINGS_MDV_472145, AUTHOR = {Merendi, F. and Dell'Orletta, F. and Venturi, G.}, TITLE = {On the Nature of BERT: Correlating Fine-Tuning and Linguistic Competence}, YEAR = {2022}, ABSTRACT = {Several studies in the literature on the interpretation of Neural Language Models (NLM) focus on the linguistic generalization abilities of pre-trained models. However, little attention is paid to how the linguistic knowledge of the models changes during the fine-tuning steps. In this paper, we contribute to this line of research by showing to what extent a wide range of linguistic phenomena are forgotten across 50 epochs of fine-tuning, and how the preserved linguistic knowledge is correlated with the resolution of the fine-tuning task. To this end, we considered a quite understudied task where linguistic information plays the main role, i.e. the prediction of the evolution of written language competence of native language learners. In addition, we investigate whether it is possible to predict the fine-tuned NLM accuracy across the 50 epochs solely relying on the assessed linguistic competence. Our results are encouraging and show a high relationship between the model's linguistic competence and its ability to solve a linguistically-based downstream task.}, KEYWORDS = {Natural Language Processing, Neural Language Models, Linguistic Generalization Abilities}, URL = {https://aclanthology.org/2022.coling-1.275}, CONFERENCE_NAME = {International Conference on Computational Linguistics (COLING)}, CONFERENCE_PLACE = {Gyeongju, Republic of Kore}, CONFERENCE_DATE = {12-17 ottobre 2022}, } @INPROCEEDINGS{MIASCHI_2022_INPROCEEDINGS_MRD_469732, AUTHOR = {Miaschi, A. and Ravelli, A. A. and Dell'Orletta, F.}, TITLE = {Punctuation Restoration in Spoken Italian Transcripts with Transformers}, YEAR = {2022}, ABSTRACT = {In this paper, we propose an evaluation of a Transformer-based punctuation restoration model for the Italian language. Experimenting with a BERT-base model, we perform several fine-tuning with different training data and sizes and tested them in an in- and cross-domain scenario. Moreover, we conducted an error analysis of the main weaknesses of the model related to specific punctuation marks. Finally, we test our system either quantitatively and qualitatively, by offering a typical task-oriented and a perception-based acceptability evaluation.}, KEYWORDS = {nlp, transformer models, puncutation restoration}, PAGES = {245-260}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85135083576\&origin=inward}, VOLUME = {13196 LNAI}, DOI = {10.1007/978-3-031-08421-8_17}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, CONFERENCE_NAME = {AIxIA 2021-Advances in Artificial Intelligence}, CONFERENCE_DATE = {1-3/12/2021}, BOOKTITLE = {Lecture notes in computer science}, } @INPROCEEDINGS{PAPUCCI_2022_INPROCEEDINGS_PDMD_474890, AUTHOR = {Papucci, M. and De Nigris, C. and Miaschi, A. and Dell'Orletta, F.}, TITLE = {Evaluating Text-To-Text Framework for Topic and Style Classification of Italian texts}, YEAR = {2022}, ABSTRACT = {In this paper, we propose an extensive evaluation of the first text-to-text Italian Neural Language Model (NLM), IT5 [1], on a classification scenario. In particular, we test the performance of IT5 on several tasks involving both the classification of the topic and the style of a set of Italian posts. We assess the model in two different configurations, single- and multi-task classification, and we compare it with a more traditional NLM based on the Transformer architecture (i.e. BERT). Moreover, we test its performance in a few-shot learning scenario. We also perform a qualitative investigation on the impact of label representations in modeling the classification of the IT5 model. Results show that IT5 could achieve good results, although generally lower than the BERT model. Nevertheless, we observe a significant performance improvement of the Text-to-text model in a multi-task classification scenario. Finally, we found that altering the representation of the labels mainly impacts the classification of the topic.}, KEYWORDS = {bert, style classification, t5, text-to-text, topic classification, transformers}, PAGES = {56-70}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85143252156\&origin=inward}, VOLUME = {3287}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Sixth Workshop on Natural Language for Artificial Intelligence, NL4AI 2022}, CONFERENCE_DATE = {30/11/2022}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{PELLINO_2022_INPROCEEDINGS_PSDS_465656, AUTHOR = {Pellino, S. and Sichera, P. and Del Grosso, A. M. and Spampinato, D.}, TITLE = {Dalla codifica alla fruizione: l'edizione digitale Bellini Digital Correspondence}, YEAR = {2022}, ABSTRACT = {L'articolo illustra le attività svolte per la realizzazione dell'edizione scientifica digitale delle lettere autografe belliniane (Bellini Digital Correspondence). Il progetto si inserisce in un contesto didattico finalizzato alla rappresentazione e fruizione della corrispondenza del maestro catanese. In seno alle attivita? e? stato sviluppato un tool di gestione delle regioni d'interesse e la personalizzazione del software EVT.}, KEYWORDS = {Digital scholarly edition, correspondence, software design, XSLT, Vincenzo Bellini}, PAGES = {163-168}, URL = {http://amsacta.unibo.it/6848/1/Proceedings_AIUCD2022.pdf}, DOI = {10.6092/unibo/amsacta/6848}, PUBLISHER = {Alma Mater Studiorum-Università di Bologna (Bologna, ITA)}, ISBN = {9788894253566}, CONFERENCE_NAME = {XI Conferenza annuale AIUCD. Culture digitali. Intersezioni: filosofia, arti, media}, CONFERENCE_PLACE = {Lecce}, CONFERENCE_DATE = {1-3/6/2022}, BOOKTITLE = {Proceedings della XI conferenza nazionale AIUCD-Culture digitali. Intersezioni: filosofia, arti, media}, EDITOR = {Ciracì, F. and Miglietta, G. and Gatto, C.}, } @INPROCEEDINGS{QUOCHI_2022_INPROCEEDINGS_QBKMMPRTZ_472419, AUTHOR = {Quochi, V. and Bellandi, A. and Khan, F. and Mallia, M. and Murano, F. and Piccini, S. and Rigobianco, L. and Tommasi, A. and Zavattari, C.}, TITLE = {From Inscriptions to Lexica and Back: A Platform for Editing and Linking the Languages of Ancient Italy}, YEAR = {2022}, ABSTRACT = {Available language technology is hardly applicable to scarcely attested ancient languages, yet their digital semantic representation, though challenging, is an asset for the purpose of sharing and preserving existing cultural knowledge. In the context of a project on the languages and cultures of ancient Italy, we took up this challenge. This paper thus describes the development of a user friendly web platform, EpiLexO, for the creation and editing of an integrated system of language resources for ancient fragmentary languages centered on the lexicon, in compliance with current digital humanities and Linked Open Data principles. EpiLexo allows for the editing of lexica with all relevant cross-references: for their linking to their testimonies, as well as to bibliographic information and other (external) resources and common vocabularies. The focus of the current implementation is on the languages of ancient Italy, in particular Oscan, Faliscan, Celtic and Venetic; however, the technological solutions are designed to be general enough to be potentially applicable to different contexts and scenarios.}, KEYWORDS = {Digital Epigraphy, Restsprachen, Lexicon Editing and Linking, tools for DH}, PAGES = {59-67}, URL = {https://aclanthology.org/2022.lt4hala-1.0/}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {979-10-95546-78-8}, CONFERENCE_NAME = {Second Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2022)}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {25/06/2022}, BOOKTITLE = {Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA 2022)}, EDITOR = {Sprugnoli, R. and Passarotti, M.}, } @INPROCEEDINGS{QUOCHI_2022_INPROCEEDINGS_QBMTZ_472422, AUTHOR = {Quochi, V. and Bellandi, A. and Mallia, M. and Tommasi, A. and Zavattari, C.}, TITLE = {Supporting Ancient Historical Linguistics and Cultural Studies with EpiLexO}, YEAR = {2022}, ABSTRACT = {This contribution presents a system of independent software components meant to support the creation of ecosystems of interrelated language data (i.e. lexica linked to textual testimonies, concepts, metadata, bibliographic references, and other external lexical resources) according to the current state-of-the-art representational models for the semantic web. The system is implemented as a set of autonomous servers exposing Restful APIs that in principle can serve different frontend applications and use cases. In this work they serve the EpiLexO GUI application designed and geared to support scholars of ancient languages of fragmentary attestation in their studies. The development of both the back-ends and the front-end is still work-in progress, but a first version is ready for use.}, KEYWORDS = {tools for DH, ancient languages, restsprachen, lexicon editor, corpus management, lexicon-text linking}, PAGES = {39-43}, URL = {https://office.clarin.eu/v/CE-2022-2118-CLARIN2022_ConferenceProceedings.pdf}, VOLUME = {ISSN 2773-2177}, CONFERENCE_NAME = {CLARIN Annual Conference 2022}, CONFERENCE_PLACE = {Prague, Czechia}, CONFERENCE_DATE = {10-12/10/2022}, BOOKTITLE = {CLARIN Annual Conference Proceedings 2022}, EDITOR = {Erjavec, T. and Eskevich, M.}, } @INPROCEEDINGS{ZAMPARELLI_2022_INPROCEEDINGS_ZCBCDHV_470081, AUTHOR = {Zamparelli, R. and Chowdhury, S. A. and Brunato, D. and Chesi, C. and Dell'Orletta, F. and Hasan, A. and Venturi, G.}, TITLE = {SemEval-2022 Task 3: PreTENS-Evaluating Neural Networks on Presuppositional Semantic Knowledge}, YEAR = {2022}, ABSTRACT = {We report the results of the SemEval 2022 Task 3, PreTENS, on evaluation the acceptability of simple sentences containing constructions whose two arguments are presupposed to be or not to be in an ordered taxonomic relation. The task featured two sub-tasks articulated as: (i) binary prediction task and (ii) regression task, predicting the acceptability in a continuous scale. The sentences were artificially generated in three languages (English, Italian and French). 21 systems, with 8 system papers were submitted for the task, all based on various types of fine-tuned transformer systems, often with ensemble methods and various data augmentation techniques. The best systems reached an F1-macro score of 94.49 (sub-task1) and a Spearman correlation coefficient of 0.80 (sub-task2), with interesting variations in specific constructions and/or languages.}, KEYWORDS = {Neural Networks, Presuppositional Knowledge, Evaluation}, PAGES = {228-238}, URL = {https://aclanthology.org/2022.semeval-1.29.pdf}, CONFERENCE_NAME = {16th International Workshop on Semantic Evaluation (SemEval-2022)}, CONFERENCE_PLACE = {Seattle}, CONFERENCE_DATE = {14-15/07/2022}, } @INPROCEEDINGS{ZENZARO_2022_INPROCEEDINGS_ZDBR_472278, AUTHOR = {Zenzaro, S. and Del Grosso, A. M. and Boschetti, F. and Ranocchia, G.}, TITLE = {Verso la definizione di criteri per valutare soluzioni di scholarly editing digitale: il caso d'uso GreekSchools}, YEAR = {2022}, ABSTRACT = {Il contributo propone alcuni criteri di valutazione per l'analisi degli approcci all'editing testuale. Dopo averli presentati brevemente, si mostra come sono stati applicati alle attività finora svolte nel contesto del progetto ERC-885222 GreekSchools per quanto riguarda la progettazione e lo sviluppo di una piattaforma web collaborativa che fa uso di linguaggi specifici di dominio (DSL) finalizzata allo studio scientifico e alla pubblicazione di testi papiracei mediante edizioni critiche digitali.}, KEYWORDS = {DSL linguaggi specifici di dominio, filologia computazionale, ingegneria del software, papirologia digitale, Domain Driven Design}, PAGES = {20-25}, URL = {http://amsacta.unibo.it/6848/1/Proceedings_AIUCD2022.pdf#page=30}, DOI = {10.6092/unibo/amsacta/6848}, ISBN = {9788894253566}, CONFERENCE_NAME = {AIUCD 2022. Culture digitali. Intersezioni: filosofia, arti, media}, CONFERENCE_PLACE = {Lecce}, CONFERENCE_DATE = {1/06/2022-3/06/2022}, BOOKTITLE = {AIUCD 2022-Proceedings. Culture digitali. Intersezioni: filosofia, arti, media}, EDITOR = {Ciracì, F. and Miglietta, G. and Gatto, C.}, } @INPROCEEDINGS{BOSCHETTI_2022_INPROCEEDINGS_BDGZ_484475, AUTHOR = {Boschetti, F. and Del Grosso, A. M. and Guadagnini, E. and Zenzaro, S.}, TITLE = {L'annotazione del testo}, YEAR = {2022}, ABSTRACT = {Il contributo illustra le potenzialità offerte dall'uso dei Linguaggi Specifici di Dominio (DSL) per la produzione di note, commenti, apparati e testi scientifici. Il modello Euporia e l'approccio DSL-Based DSE sono richiamati come possibile alternativa alla codifica mediante XML/TEI per la produzione di edizioni scientifiche digitali.}, KEYWORDS = {Digital Humanities, Filologia Digitale, Domain Specific Languages, Euporia, DSL-based DSE}, URL = {https://publications.cnr.it/doc/484475}, CONFERENCE_NAME = {Filologia Digitale e Testi Italiani Antichi. Verso un sistema integrato di ricerca}, CONFERENCE_PLACE = {Pisa, Scuola Normale Superiore}, CONFERENCE_DATE = {27-28/06/2022}, } @INPROCEEDINGS{DELGROSSO_2022_INPROCEEDINGS_D_484398, AUTHOR = {Del Grosso, A. M.}, TITLE = {Forme di elaborazione per il Codice Pelavicino Digitale. Usare, Riusare ed Estendere l'Edizione}, YEAR = {2022}, ABSTRACT = {Il contributo illustra alcune attività condotte in seno al progetto di edizione digitale denominato "Codice Pelavicino". In particolare la presentazione si focalizza sulla rappresentazione dei termini notevoli, sulla presentazione del testo in formato PDF e sulla specificità della codifica come componente di un sistema FAIR.}, KEYWORDS = {Codice Pelavicino, Digital Humanities, Filologia Digitale, Edition Visualization Technology, XML/TEI}, URL = {https://pelavicino.labcd.unipi.it/}, CONFERENCE_NAME = {L'edizione digitale del Codice Pelavicino-Presentazione del lavoro completo}, CONFERENCE_PLACE = {Sarzana}, CONFERENCE_DATE = {26/03/2022}, } @INPROCEEDINGS{DELGROSSO_2022_INPROCEEDINGS_DZB_484339, AUTHOR = {Del Grosso, A. M. and Zenzaro, S. and Boschetti, F.}, TITLE = {CNR-ILC team e il progetto ERC 885222-GreekSchools}, YEAR = {2022}, ABSTRACT = {Presentazione del team di lavoro e delle attività svolte in seno al progetto ERC 885222-GreekSchools durante il convegno di Papirologia Ercolanese organizzato dal 5 al 9 settembre 2022 presso il Castello Pasquini di Castiglioncello (Livorno).}, KEYWORDS = {Digital Humanities, Digital Papyrology, Digital Scholarly Edition, ERC, GreekSchools, DSL-based DSE}, URL = {https://greekschools.eu}, CONFERENCE_NAME = {Convegno di Papirologia Ercolanese}, CONFERENCE_PLACE = {Castiglioncello}, CONFERENCE_DATE = {5-9/09/2022}, } @INPROCEEDINGS{MARINETTI_2022_INPROCEEDINGS_MMQBBDPRSZMM_479194, AUTHOR = {Marinetti, A. and Murano, F. and Quochi, V. and Ballerini, M. and Boschetti, F. and Del Grosso, A. M. and Piccini, S. and Rigobianco, L. and Solinas, P. and Zinzi, M. and Mallia, M. and Middei, E.}, TITLE = {Challenges in Encoding Fragmentary Attested Languages}, YEAR = {2022}, ABSTRACT = {The ItAnt project investigates the langages of ancient Italy, whose only attestation consist in epigraphic evidence, focusing on Venetic, Oscan, Faliscan and Celtic languages. For this purpose, the project combines the traditional method proper to historical linguistics with the setting up of digital technologies, developing computational tools specifically designes to create a digital set of interrelated resources.}, KEYWORDS = {digital epigraphy, eLexicography, Linguistic Linked Open Data, Text Encoding}, URL = {https://ciegl2022.sciencesconf.org/resource/page/id/30}, CONFERENCE_NAME = {XVI Congresso Internazionale di Epigrafia greca e latina}, CONFERENCE_PLACE = {Bordeaux}, CONFERENCE_DATE = {29 agosto-settembre 2022}, } @INPROCEEDINGS{MARZI_2022_INPROCEEDINGS_MNFMMVPTP_471602, AUTHOR = {Marzi, C. and Narzisi, A. and Ferro, M. and Masi, G. and Milone, A. and Viglione, V. and Pelagatti, S. and Tomassini, I. and Pirrelli, V.}, TITLE = {Patterns of finger-tracking in Italian early readers with Autism Spectrum Disorder}, YEAR = {2022}, ABSTRACT = {Background: Of late, the synergistic interaction of eye and hand movements in the exploration of a visual scene displayed on a computer touchscreen was shown to provide a congruent signature of the "attention maps" of subjects with autism spectrum disorders (ASD). A familiar context where this visual and tactile interaction is exploited is when children use the finger of their dominant hand to point the letters of written words as they are reading, particularly at early stages of their literacy development. In the present work, a dedicated app running on a common tablet is used to capture and analyse the finger-tracking behaviour of children with ASD while they are reading few episodes of a connected text on the tablet touchscreen. The reader's voice is also recorded through the tablet built-in microphone. The sliding movements of the finger across the tablet touchscreen are discretized into a series of densely distributed "touch events", which are then mapped onto the text lines in much the same way eye fixations are projected onto a sequence of words using an eye-tracker. Reading texts are linguistically annotated, to control for levels of reading difficulty, and finger-tracking times are associated with linguistic glosses. Objectives: Investigate patterns of finger-tracking as a potential non biological marker for identification of children with ASD . Methods: A preliminary analysis is offered of evidence of the finger-tracking behaviour of 20 Italian children with high functioning ASD, aged 7-11 years, while they are engaged in reading. A grade-matched control group of children with typical development was included. Patterns of finger-tracking are assessed in connection with three complementary aspects of reading behaviour: (1) word recognition, (2) pace of reading of multi-word intonation units, and (3) text comprehension, controlled by asking children a few multiple-choice questions on text content after each reading session. Results: Considerable variation in levels of reading ability was observed in the ASD sample, with a few children showing clear evidence of impaired reading comprehension. However, fluent readers with ASD exhibit the same correlation between accurate decoding (assessed by measuring per-word reading speed) and high levels of reading comprehension found in controls. Likewise, decoding rates were found to significantly increase with increasing grade levels, following the typical developmental pattern observed in controls. On a less local level of linguistic analysis, the reading pace of ASD readers fails to be modulated according to major syntactic structures, punctuation marks and direct speech turns, an effect concomitant with a flat prosodic intonation of oral reading. Conclusions: Preliminary findings confirm the heterogeneous nature of reading skills in children with ASD, showing that the use of a tablet screen as a tactile interface for visual perception analysis can offer a robust experimental protocol for large-scale, multimodal collection of naturalistic data for extensive assessment of readers with ASD.}, KEYWORDS = {reading, autism, finger-tracking, developing readers, prediction-driven processing}, PAGES = {192-192}, URL = {https://cdn.ymaws.com/www.autism-insar.org/resource/resmgr/files/insar_2022/2022_Abstract_Book.pdf}, VOLUME = {2022}, CONFERENCE_NAME = {INSAR}, CONFERENCE_PLACE = {Austin, Texas}, CONFERENCE_DATE = {11-14/05/2022}, BOOKTITLE = {2022 annual meeting abstract book}, } @INPROCEEDINGS{MARZI_2022_INPROCEEDINGS_MP_471259, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {An information-theoretic analysis of the inflectional regular-irregular gradient for optimal processing units}, YEAR = {2022}, ABSTRACT = {Prediction-driven word processing defines the human ability to anticipate upcoming input words in recognition. From this perspective, input word forms need to be processed as quickly and efficiently as possible. Under the reasonable assumption that spoken words are memorized and processed as word trees (e.g. Marslen-Wilson's "cohorts"), the larger the size of the cohort of an input word at a certain point in time (and the later its uniqueness point), the harder and slower to process the word is. Regularly and irregularly inflected verb forms have different stem family sizes and different uniqueness points. Using a Recurrent Neural Network (RNN) as a computational model of the human lexical proces- sor, we explore here how their distributional and structural properties may affect (optimal) processing strategies.}, KEYWORDS = {Morphological inflection, prediction-driven processing, discriminability, non-linearity, learnability}, PAGES = {50-51}, URL = {http://www.nytud.hu/imm20/abstracts/main.pdf}, CONFERENCE_NAME = {20th International Morphology Meeting-(Dedicated to the memory of Ferenc Kiefer)}, CONFERENCE_PLACE = {Budapest}, CONFERENCE_DATE = {01-04/09/2022}, } @INPROCEEDINGS{RANOCCHIA_2022_INPROCEEDINGS_RPVPFVAMRRCPPZBDE_472284, AUTHOR = {Ranocchia, G. and Puglia, E. and Vassallo, C. and Pernigotti, C. and Fleischer, K. and Verhasselt, G. and Alessandrelli, M. and Miliani, C. and Romano, F. P. and Rosi, F. and Caliri, C. and Pavone, D. P. and Preisler, Z. and Zenzaro, S. and Boschetti, F. and Del Grosso, A. M. and Enea, A.}, TITLE = {The Greek philosophical schools according to Europe's earliest history of philosophy. Towards a new pioneering critical edition of Philodemus' Arrangement of the Philosophers}, YEAR = {2022}, ABSTRACT = {Our knowledge about Greek philosophical schools is mostly second-hand and based on Diogenes Laërtius' Lives of Eminent Philosophers (3rd century AD) and Philodemus' Arrangement of the Philosophers (75-50 BC), a treatise in several books which represents the earliest 'history of philosophy' to have reached us directly from antiquity. From this work exclusively preserved by the Herculaneum papyri we may derive a virtually systematic account of the history of Greek philosophical schools, which is unique in its kind.}, KEYWORDS = {papyrology, digital papyrology, digital humanities, digital cultural heritage}, URL = {https://publications.cnr.it/doc/472284}, CONFERENCE_NAME = {XXXth International Congress of Papyrology}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {25/07/2022-30/07/2022}, } @INPROCEEDINGS{VENUTI_2022_INPROCEEDINGS_VD_484399, AUTHOR = {Venuti, M. and Del Grosso, A. M.}, TITLE = {La Galassia Musisque Deoque}, YEAR = {2022}, ABSTRACT = {Il contributo illustra il lavoro di riorganizzazione della galassia MQDQ in un sistema distribuito e collaborativo, facendo uso di Domain Specific Languages accanto alla modalità tradizionale di codifica mediante XML/TEI.}, KEYWORDS = {Digital Humanities, Filologia Digitale, MQDQ, Filologia Collaborativa, Domain Specific Languages}, URL = {https://publications.cnr.it/doc/484399}, CONFERENCE_NAME = {Maratona DH}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {13/10/2022}, } @TECHREPORT{ALBANESI_2022_TECHREPORT_ABCGMPS_470012, AUTHOR = {Albanesi, D. and Bellandi, A. and Colombo, M. and Giovannetti, E. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 21}, YEAR = {2022}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo novembre 2021 - aprile 2022. Le principali attività tecniche svolte sul sistema Traduco sono state la risoluzione di bug e l'implementazione di nuove funzionalità richieste. Parallelamente, è stata realizzata una applicazione per la consultazione del corpus biblico e sono proseguite le attività volte alla visualizzazione di risorse lessicali tramite grafi e per l'analisi, l'allineamento di testi, l'aggiornamento e la conversione del lessico computazionale PSC da utilizzarsi per la consultazione avanzata, su base morfologica e semantica, del testo talmudico tradotto in italiano.}, KEYWORDS = {Traduzione Assistita dal Calcolatore, Traduzione Collaborativa, Lessici elettronici, rappresentazione della conoscenza, Linguistica Computazionale, traduzione di testi religiosi}, PAGES = {23}, URL = {https://publications.cnr.it/doc/470012}, } @TECHREPORT{ALBANESI_2022_TECHREPORT_ACGMPS_475381, AUTHOR = {Albanesi, D. and Colombo, M. and Giovannetti, E. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 22}, YEAR = {2022}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo maggio 2022 - dicembre 2022. Le principali attività tecniche svolte sul sistema Traduco sono state la risoluzione di bug e l'implementazione di nuove funzionalità richieste. Parallelamente, è proseguito il lavoro di ricerca e sviluppo su tre fronti: i) la nuova versione di Traduco, ii) la visualizzazione grafica di risorse lessicali, e iii) la ricerca full-text sul testo del Talmud tradotto in italiano.}, KEYWORDS = {Traduzione Assistita dal CalcolatoreTraduzione Collaborativa, Lessici elettronici, rappresentazione della conoscenza, Linguistica Computazionale, traduzione di testi religiosi, ricerca full-text}, PAGES = {40}, URL = {https://publications.cnr.it/doc/475381}, } @TECHREPORT{ALBANESI_2022_TECHREPORT_AGP_470013, AUTHOR = {Albanesi, D. and Giovannetti, E. and Papini, M.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-rapporto integrativo 4}, YEAR = {2022}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto delle attività di progetto previste dalla convenzione integrativa stipulata tra PTTB e ILC-CNR in data 10/07/2018 e condotte nel periodo febbraio 2021 - gennaio 2022.}, KEYWORDS = {Linguistica Computazionale, Traduzione di Testi Religiosi, Traduzione Assistita dal Calcolatore, Traduzione Collaborativa, Sviluppo front-end}, PAGES = {15}, URL = {https://publications.cnr.it/doc/470013}, } @TECHREPORT{CARDILLO_2022_TECHREPORT_C_484278, AUTHOR = {Cardillo, F. A.}, TITLE = {DEEPHEALTH-D6. 2 Validation of DeepHealth platforms and use cases}, YEAR = {2022}, ABSTRACT = {Final report on the validation of DeepHealth platforms and use cases (WP6, T6.2)}, KEYWORDS = {Artificial Intelligence HPC Machine Learning}, URL = {https://publications.cnr.it/doc/484278}, } @TECHREPORT{DIDONATO_2022_TECHREPORT_DEPKVVKTTLPTCB_474551, AUTHOR = {Di Donato, F. and Eskevich, M. and Provost, L. and Kraker, P. and Van Uytvanck, D. and Vignoli, M. and König, A. and Thiel, C. and Tetteh Ocansey, J. and Lombardo, T. and Pohle, S. and Tóth Cifra, E. and Chen, Y. and Blotière, E.}, TITLE = {D6. 5 Report on Open Science within the EOSC}, YEAR = {2022}, ABSTRACT = {The report Open Science within the EOSC collects the findings of T6.3 EOSC guidelines training and advocacy on Open Science, whose main goal is to produce, adapt and reuse general and specific guidelines to implement Open Science practices in disciplinary contexts, as well as to provide training to the SSH community on Open Science practices, tools and EOSC related content. In T6.3 we delivered 12 training sessions and all training materials are published in open access1. In addition, we designed, developed and implemented an open workflow for the definition and management of the Open Science Training series, through the definition of a TRIPLE Training Toolkit, which reproduces the guidelines that have been designed and implemented to produce FAIR-by design training events. This set of documents constitutes section 1 of the present deliverable. Moreover, section 2 of the report presents four use-cases which address Open Science-related issues from different perspectives. The first subsection (CNR) sheds light on pathways for interdisciplinary collaboration and managing networking challenges. The second subsection (OKMAPS) addresses the need for improved discoverability of resources across research publications, research data and research projects. The third (CLARIN) shows how the connection between data, tools and publications can be implemented and made public. A model is explored to enrich the metadata about language resource data and tools from the CLARIN Resource Families with related publications, making use of the GoTriple platform. This cross-connects the CLARIN Virtual Language Observatory, the SSHOC Open Marketplace and the GoTriple platform. The last subsection (CESSDA) examines the balance between making data more accessible and aligning with legal restrictions. It explains how data is aligned internally to make them easily harvestable, and how, in turn, enrichment features from EOSC (OpenAIRE) and GoTriple systems will be used to improve data at their original sources.}, KEYWORDS = {open science, training}, URL = {https://zenodo.org/record/7360227#.Y42-zezMJfU}, } @TECHREPORT{MARTELLI_2022_TECHREPORT_MMCNVUFQKKLDTTCSKIDGM_472421, AUTHOR = {Martelli, F. and Maru, M. and Campagnano, C. and Navigli, R. and Velardi, P. and Ureña Ruiz, R. and Frontini, F. and Quochi, V. and Kallas, J. and Koppel, K. and Langemets, M. and De Does, J. and Tempelaars, R. and Tiberius, C. and Costa, R. and Salgado, A. and Krek, S. and Čibej, J. and Dobrovoljc, K. and Gantar, P. and Munda, T.}, TITLE = {D3. 8 Lexical-semantic analytics for NLP}, YEAR = {2022}, ABSTRACT = {The present document illustrates the work carried out in task 3.3 (work package 3) focused on lexicalsemantic analytics for Natural Language Processing (NLP). This task aims at computing analytics for lexicalsemantic information such as words, senses and domains in the available resources, investigating their role in NLP applications. Specifically, this task concentrates on three research directions, namely i) which grouping senses based on their semantic similari sense clustering , in ty improves the performance of NLP tasks such as Word Sense Disambiguation (WSD), ii) domain labeling of text , in which the lexicographic resources made available by the ELEXIS project for research purposes allow better performances to be achieved, and fin senses ally iii) analysing the , for which a software package is made available. diachronic distribution of In this deliverable, we illustrate the research activities aimed at achieving the aforementioned goals and put forward suggestions for future works. Importantly, we stress the crucial role played by highquality lexicalsemantic r esources when investigating such linguistic aspects and their impact on NLP applications. To this end, as an additional contribution, we address the paucity of manually the ELEXIS parallelannotated data in the lexical senseannotated datasetsemantic research field and introduce , a novel entirely manuallyavailable in 10 European languages and featuring 5 annotation layers.}, KEYWORDS = {research infrastructures, lexicography, lexical resources, word-sense disambiguation, WSD, sense-annotated language data, multilinguality}, PAGES = {67}, URL = {https://elex.is/wp-content/uploads/ELEXIS_D3_8_Lexical-Semantic_Analytics_for_NLP_final_report.pdf}, } @TECHREPORT{TASOVAC_2022_TECHREPORT_TTBBBCUFHHJKKKKMMMMMQRRSSVWWZ_463877, AUTHOR = {Tasovac, T. and Tiberius, C. and Bamberg, C. and Bellandi, A. and Burch, T. and Costa, R. and Ďurčo, M. and Frontini, F. and Hennemann, J. and Heylen, K. and Jakubíček, M. and Khan, F. and Klee, A. and Kosem, I. and Kovář, V. and Matuška, O. and McCrae, J. and Monachini, M. and Mörth, K. and Munda, T. and Quochi, V. and Repar, A. and Roche, C. and Salgado, A. and Sievers, H. and Váradi, T. and Weyand, S. and Woldrich, A. and Zhanial, S.}, TITLE = {D5. 3 Overview of Online Tutorials and Instruction Manuals}, YEAR = {2022}, ABSTRACT = {The ELEXIS Curriculum is an integrated set of training materials which contextualizes ELEXIS tools and services inside a broader, systematic pedagogic narrative. This means that the goal of the ELEXIS Curriculum is not simply to inform users about the functionalities of particular tools and services developed within the project, but to show how such tools and services are a) embedded in both lexicographic theory and practice; and b) representative of and contributing to the development of digital skills among lexicographers. The scope and rationale of the curriculum are described in more detail in the Deliverable D5.2 Guidelines for Producing ELEXIS Tutorials and Instruction Manuals. The goal of this deliverable, as stated in the project DOW, is to provide "a clear, structured overview of tutorials and instruction manuals developed within the project."}, KEYWORDS = {ELEXIS, lexicography, training materials}, PAGES = {31}, URL = {https://elex.is/wp-content/uploads/ELEXIS_D5_3_Overview-of-Online-Tutorials-and-Instruction-Manuals.pdf}, } @MISC{BARONI_2022_MISC_B_484305, AUTHOR = {Baroni, P.}, TITLE = {SIGUL Web Site}, YEAR = {2022}, ABSTRACT = {Sito Web di SIGUL - Gruppo di Interesse Speciale sulle Lingue con Risorse Insufficienti}, KEYWORDS = {under-resourced languages, digital diversity, digital survival}, URL = {https://www.sigul.eu}, } @MISC{BOSCHETTI_2022_MISC_BD_484483, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {Una bussola per navigare nei mari digitali: il ruolo di un K-Centre CLARIN per le esigenze degli studiosi}, YEAR = {2022}, ABSTRACT = {The contribution will give an overview about the structure of the Common Language Research Infrastructure (CLARIN) and its initiatives devoted to the DH, both at the European and at the national level as well as the activities of a new CLARIN Knowledge Centre: the DiPText-KC. Furthermore, it will introduce the practices that we suggest for a workflow to digit(al)ize texts and how to host projects in ILC4CLARIN through the assistance of the DiPText-KC.}, KEYWORDS = {Digital Humanities, CLARIN, DiPText-KC, CoPhiLab, Digital Philology}, URL = {https://publications.cnr.it/doc/484483}, } @MISC{BOSCHETTI_2022_MISC_BDMTP_484468, AUTHOR = {Boschetti, F. and Del Grosso, A. M. and Macchiarelli, A. and Tanozzi, F. and Puliero, J.}, TITLE = {The Remains of the Text-Natural Language Processing}, YEAR = {2022}, ABSTRACT = {The seminar presents some digital technologies for the study of texts of literary nature mediated by the use of computational tools. Specifically, the following topics are addressed: a) Coronelli's Epitome Cosmografica; b) Aratus' Phaenomena and the art of composing acrostics; c)Latin poets between astronomy and mythology d) Lemmatisation and Semi-automated linguistic analysis of Latin texts e) Musisque Deoque: a powerful instrument to study intertextuality.}, KEYWORDS = {Digital Humanities, Digital Philology, Summer School, MQDQ, NLP}, URL = {https://www.unive.it/data/33113/2/60398}, } @MISC{DELGRATTA_2022_MISC_D_471685, AUTHOR = {Del Gratta, R.}, TITLE = {Installing and configuring CLARIN-DSPACE on UBUNTU 22. 04 LTS}, YEAR = {2022}, ABSTRACT = {Il Manuale tecnico descrive le varie procedure per installare iò software CLARIN-DSPACE presso l'Istituto di Linguistica Computazionale}, KEYWORDS = {CLARIN, CLARIN-DSPACE, Repository, Installation Guide}, PAGES = {1-67}, URL = {https://publications.cnr.it/doc/471685}, } @MISC{DIDONATO_2022_MISC_DPLVPTCB_465244, AUTHOR = {Di Donato, F. and Provost, L. and Lombardo, T. and Vignoli, M. and Pohle, S. and Tóth Czifra, E. and Chen, Y. and Blotière, E.}, TITLE = {TRIPLE Training Toolkit (0. 1)}, YEAR = {2022}, ABSTRACT = {The TRIPLE Training Toolkit is part of the work performed by Work Package 6 (WP6) under Task 6.3 in the TRIPLE Project (Transforming Research through Linked Interdisciplinary Exploration). The project is funded by the European Commission, under Grant Agreement No. 863420 and will run for 42 months starting from October 2019. The TRIPLE Open Science Training Series focuses on the design and delivery of competence-oriented training to address the specific and general needs of the research community on Open Science topics and on the EOSC. The experiment enabled a reflection on the current challenges to make FAIR-by-design training resources and how to overcome them. The following files are deposited in Zenodo to serve as a reference for those wishing to reproduce this experiment within their own institution or for their own training activities. Please note that the training series are still ongoing and as such the present document and the files listed below will be followed by updated versions by the end of the project (2023).}, KEYWORDS = {open science, training, TRIPLE}, URL = {https://doi.org/10.5281/zenodo.6256198}, } @MISC{FRONTINI_2022_MISC_FBQMMZUW_463506, AUTHOR = {Frontini, F. and Bellandi, A. and Quochi, V. and Monachini, M. and Mörth, K. and Zhanial, S. and Ďurčo, M. and Woldrich, A.}, TITLE = {CLARIN Tools and Resources for Lexicographic Work}, YEAR = {2022}, ABSTRACT = {This course introduces lexicographers to the CLARIN Research Infrastructure and highlights language resources and tools useful for lexicographic practices. The course consists of two parts. In Part 1, you will learn about CLARIN, its technical and knowledge infrastructure, and about how to deposit and find lexical resources in CLARIN. In Part 2, you will become acquainted with CLARIN tools that can be used to create lexical resources.}, KEYWORDS = {CLARIN, lexicography}, URL = {https://elexis.humanistika.org/id/UnwYPq70Dewbn7XDEjsMM}, } @MISC{MARTELLI_2022_MISC_MNKKGKNPOLKKDUSLVGLQMFTTCSIM_472295, AUTHOR = {Martelli, F. and Navigli, R. and Krek, S. and Kallas, J. and Gantar, P. and Koeva, S. and Nimb, S. and Pedersen, B. S. and Olsen, S. and Langemets, M. and Koppel, K. and Üksik, T. and Dobrovoljc, K. and Ureña Ruiz, R. and Sancho Sánchez, J. and Lipp, V. and Váradi, T. and Győrffy, A. and László, S. and Quochi, V. and Monachini, M. and Frontini, F. and Tiberius, C. and Tempelaars, R. and Costa, R. and Salgado, A. and Čibej, J. and Munda, T.}, TITLE = {Parallel sense-annotated corpus ELEXIS-WSD 1. 0}, YEAR = {2022}, ABSTRACT = {ELEXIS-WSD is a parallel sense-annotated corpus in which content words (nouns, adjectives, verbs, and adverbs) have been assigned senses. Version 1.0 contains sentences for 10 languages: Bulgarian, Danish, English, Spanish, Estonian, Hungarian, Italian, Dutch, Portuguese, and Slovene. The corpus was compiled by automatically extracting a set of sentences from WikiMatrix (Schwenk et al., 2019), a large open-access collection of parallel sentences derived from Wikipedia, using an automatic approach based on multilingual sentence embeddings. The sentences were manually validated according to specific formal, lexical and semantic criteria (e.g. by removing incorrect punctuation, morphological errors, notes in square brackets and etymological information typically provided in Wikipedia pages). To obtain a satisfying semantic coverage, we filtered out sentences with less than 5 words and less than 2 polysemous words were filtered out. Subsequently, in order to obtain datasets in the other nine target languages, for each selected sentence in English, the corresponding WikiMatrix translation into each of the other languages was retrieved. If no translation was available, the English sentence was translated manually. The resulting corpus is comprised of 2,024 sentences for each language.}, KEYWORDS = {Word Sense Disambiguation, corpus parallelo, disambiguazione automatica del senso, annotazione semantica multilingue}, URL = {http://hdl.handle.net/11356/1674}, } @MISC{QUOCHI_2022_MISC_QB_463856, AUTHOR = {Quochi, V. and Bellandi, A.}, TITLE = {LexO editor: the basics-video tutorial}, YEAR = {2022}, ABSTRACT = {Video tutorial sull'uso di LexO, un editor di lessici secondo il modello Ontolex-lemon. Il tutoria è parte dell' ELEXIS training programme disponibile sulla piattaforma DARIAH-teach.}, KEYWORDS = {lexicon editor, video tutorial, training material, lexO, online web application}, URL = {https://www.youtube.com/watch?v=9KE0laMaTAs\&list=PLoD829qNERpYKq8JRkY4EIGgZCdi0QHOd}, } @MISC{SCIOLETTE_2022_MISC_SFGEMS_484511, AUTHOR = {Sciolette and Flavia and Giovannetti and Emiliano and Marchi and Simone}, TITLE = {LexicO}, YEAR = {2022}, ABSTRACT = {LexicO is a resource deriving from Parole-Simple-Clips (http://hdl.handle.net/20.500.11752/ILC-88). This resource contains all four levels of linguistic information represented in PSC (phonology, morphology, syntax, and semantics) which have been automatically analysed to find redundant, erroneous and missing data. The process of updating that conducted to the current version of LexicO starting from PSC included: i) the removal of all sure redundant entries (i.e. duplicates) belonging to all four linguistic levels; ii) the creation of tables dedicated to candidate redundants, detected by considering specific similarities amongst entries; iii) the correction of missing semantic and syntax-semantics interface relations amongst the entries of lexicon.}, KEYWORDS = {computational lexicon, Lexical Database, semantics, morphology, syntax, phonology}, URL = {https://dspace-clarin-it.ilc.cnr.it/repository/xmlui/handle/20.500.11752/ILC-977}, } @MISC{ZENZARO_2022_MISC_ZBD_472305, AUTHOR = {Zenzaro, S. and Boschetti, F. and Del Grosso, A. M.}, TITLE = {Relazione sugli avanzamenti della piattaforma per il progetto GreekSchools}, YEAR = {2022}, ABSTRACT = {Relazione sugli avanzamenti della piattaforma per il progetto GreekSchools}, KEYWORDS = {GreekSchools, Digital Humanities, Digital papyrology}, URL = {https://publications.cnr.it/doc/472305}, } @MISC{ZENZARO_2022_MISC_ZDB_484324, AUTHOR = {Zenzaro, S. and Del Grosso, A. M. and Boschetti, F.}, TITLE = {CophiEditor & Viewer}, YEAR = {2022}, ABSTRACT = {Piattaforma di Filologia Digitale, Computazionale, Collaborativa e Cooperativa in sviluppo presso CNR-ILC nel contesto del progetto ERC Advanced Grant 885222-GreekSchools "The Greek Philosophical Schools according to Europe's earliest 'history of philosophy'. Towards a new pioneering critical edition of Philodemus' Arrangement of the Philosopher". La piattaforma implementa il modello di Edizioni Scientifiche Digitali DSL-based DSE.}, KEYWORDS = {Digital Humanities, Filologia Digitale, Filologia Computazionale, Filologia Collaborativa, Edizione Scientifica Digitale, GreekSchools, DSL-based DSE}, URL = {https://cophi.github.io/gs-data-service-api/#/}, } @ARTICLE{BACCO_2021_ARTICLE_BCDM_472153, AUTHOR = {Bacco, L. and Cimino, A. and Dell'Orletta, F. and Merone, M.}, TITLE = {Explainable sentiment analysis: A hierarchical transformer-based extractive summarization approach}, YEAR = {2021}, ABSTRACT = {In recent years, the explainable artificial intelligence (XAI) paradigm is gaining wide research interest. The natural language processing (NLP) community is also approaching the shift of paradigm: building a suite of models that provide an explanation of the decision on some main task, without affecting the performances. It is not an easy job for sure, especially when very poorly interpretable models are involved, like the almost ubiquitous (at least in the NLP literature of the last years) transformers. Here, we propose two different transformer-based methodologies exploiting the inner hierarchy of the documents to perform a sentiment analysis task while extracting the most important (with regards to the model decision) sentences to build a summary as the explanation of the output. For the first architecture, we placed two transformers in cascade and leveraged the attention weights of the second one to build the summary. For the other architecture, we employed a single transformer to classify the single sentences in the document and then combine the probability scores of each to perform the classification and then build the summary. We compared the two methodologies by using the IMDB dataset, both in terms of classification and explainability performances. To assess the explainability part, we propose two kinds of metrics, based on benchmarking the models' summaries with human annotations. We recruited four independent operators to annotate few documents retrieved from the original dataset. Furthermore, we conducted an ablation study to highlight how implementing some strategies leads to important improvements on the explainability performance of the cascade transformers model.}, KEYWORDS = {Natural Language Processing, Sentiment Analysis, Explainable IA}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85114289346\&origin=inward}, VOLUME = {10}, DOI = {10.3390/electronics10182195}, PUBLISHER = {MDPI (Basel)}, ISSN = {2079-9292}, JOURNAL = {Electronics (Basel)}, } @ARTICLE{BOSCHETTI_2021_ARTICLE_BM_472309, AUTHOR = {Boschetti, F. and Mugelli, G.}, TITLE = {Il metodo Euporia per creare nuovi archivi digitali sulla tragedia greca}, YEAR = {2021}, ABSTRACT = {This article illustrates Euporia, i.e. a method for annotating literary texts based on Domain-Specific Languages. The annotation systems developed using this method are modeled on the needs of the users, their specific habits of studying and annotating texts, and the aims of their research. The two case studies discussed in this contribution show the application of the method in the context of two projects focused on the texts of Greek tragedy: on the one hand, a didactic project based on Aeschylus' Persians, on the other hand, a research project in the field of anthropology of the ancient world, based on the entire corpus of the Greek tragedy. The discussion aims to illustrate how text annotation through DSL can have both a didactic function and a scientific interest.}, KEYWORDS = {Domain-Specific Languages, textual annotation, Ancient Greek Tragedy}, PAGES = {83-113}, URL = {https://ojs.cimedoc.uniba.it/index.php/fc/article/view/1381}, VOLUME = {7}, DOI = {10.15162/2465-0951/1381}, PUBLISHER = {Centro Interuniversitario di Ricerca di Studi sulla Tradizione CIRST (Bari, Italia)}, ISSN = {2465-0951}, JOURNAL = {FuturoClassico FCl}, } @ARTICLE{CARDILLO_2021_ARTICLE_CS_458770, AUTHOR = {Cardillo, F. A. and Straccia, U.}, TITLE = {Fuzzy OWL-Boost: learning fuzzy concept inclusions via real-valued boosting}, YEAR = {2021}, ABSTRACT = {OWL ontologies are nowadays a quite popular way to describe structured knowledge in terms of classes, relations among classes and class instances. In this paper, given an OWL ontology and a target class T, we address the problem of learning fuzzy concept inclusion axioms that describe sufficient conditions for being an individual instance of T (and to which degree). To do so, we present FUZZY OWL-BOOST that relies on the Real AdaBoost boosting algorithm adapted to the (fuzzy) OWL case. We illustrate its effectiveness by means of an experimentation with several ontologies.}, KEYWORDS = {OWL Ontology, Machine Learning, Fuzzy Logic, Boosting}, PAGES = {164-186}, URL = {https://www.sciencedirect.com/science/article/abs/pii/S0165011421002426}, VOLUME = {438}, DOI = {10.1016/j.fss.2021.07.002}, PUBLISHER = {North-Holland (Amsterdam, Paesi Bassi)}, ISSN = {0165-0114}, JOURNAL = {Fuzzy sets and systems}, } @ARTICLE{DELGRATTA_2021_ARTICLE_DBBS_461498, AUTHOR = {Del Gratta, R. and Boschetti, F. and Bambaci, L. and Sarnari, F.}, TITLE = {Document analysis and Textual philology: A Formal Perspective}, YEAR = {2021}, ABSTRACT = {We introduce a formal approach to document and text analysis. The method proposed herein results in a mathematical model/framework which can formalize different challenges in research fields such as computational linguistics, digital philology, and software engineering, principally if applied to document and text analysis. We examine texts and documents from an evolutionary perspective, where both corruption and correction are involved. We describe document evolution via fibre bundles formalism. We also provide other examples to demonstrate the capabilities of the model.}, KEYWORDS = {Formal model, document analysis, evolutionary approach, fibre bundles}, PAGES = {5-15}, URL = {https://www.innove.org/ijist/index.php/ijist/article/view/192}, VOLUME = {5}, PUBLISHER = {[El Mohajir Mohammed] ([S. l. ], Marocco)}, ISSN = {2550-5114}, JOURNAL = {International Journal of Information Science and Technology}, } @ARTICLE{DELGRATTA_2021_ARTICLE_DGPC_451726, AUTHOR = {Del Gratta, R. and Goggi, S. and Pardelli, G. and Calzolari, N.}, TITLE = {The LRE Map: what does it tell us about the last decade of our field?}, YEAR = {2021}, ABSTRACT = {The LRE Map of Language Resources was introduced at LREC 2010. Its intended purpose was: "to shed light on the vast amount of resources that represent the background of the research presented at LREC" (Calzolari et al. in: Calzolari et al. (eds) Proceedings of the seventh international conference on language resources and evaluation (LREC'10). European Language Resources Association (ELRA), Valletta, 2010). It also aimed at a change of culture in the field, actively engaging each researcher both in the documentation task about resources and in sharing resources. When we started to use it regularly also in other conferences, it became clear that it was an innovative instrument able to provide a picture of the field and its evolution as reflected by the creation and use of Language Resources. After 9 years we revisit the Map, considerably extending the data analysed in an LREC 2018 paper. The LRE Map data analysed here have been provided by the authors of 21 conferences during the phase of submission of papers, and contain information about 9405 resources. We analyse the LRE Map data from many different viewpoints and the paper reports on the global picture, along the many Map dimensions, on different trends emerging from a diachronic perspective and finally on some comparisons between five editions of the two major conferences present in the Map: LREC and COLING.}, KEYWORDS = {LR infrastucture, metadata, LR documentation}, PAGES = {259-283}, URL = {https://link.springer.com/article/10.1007/s10579-020-09520-6}, VOLUME = {Volume 55}, DOI = {10.1007/s10579-020-09520-6}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{DELGROSSO_2021_ARTICLE_DFMTN_458287, AUTHOR = {Del Grosso, A. M. and Fihri, D. F. and Mohajir, M. E. and Tonazzini, A. and Nahli, O.}, TITLE = {Challenges in the digital analysis of historical laminated manuscripts}, YEAR = {2021}, ABSTRACT = {In this paper, we analyze and discuss the characteristics of a system for the effective digital preservation and fruition of historical manuscripts degraded by the process of lamination. The most significant degradation caused by lamination is that the parchment or paper support loses its flatness, and usually presents ripples and warnings. This, together with the affixed translucent varnish, dramatically impair the digital acquisition process, so that light reflections in the more disparate directions affect the digital images. A digital system to contrast this irreversible and progressive degradation and to enable an effective access to the fragile asset should provide a number of functionalities: specialized digitization, able to avoid reflections as much as possible; image enhancement, devised to correct the residual degradations and enhance the text for an easier legibility; semi-automatic transcription of the virtually restored pages; and, finally, scholarly encoding and linguistic analysis, which should adapt existing tools to the specificity of the primary source (writing system and language). As a case study, we will make reference to the "Poem in Rajaz on medicine", written by Abubacer in the XII century, and conserved in the Al Quaraouiyine Library located in Fez, Morocco. The feasibility study for the realization of such a system is of general utility, in that it can provide guidelines for the digitization, the enhancement and the text encoding of the many laminated manuscripts conserved in other historical archives. On the other hand, from the cultural heritage point of view, the experimentation on the "Poem in Rajaz on medicine" could foster the systematic philological and ontological study of a unique piece of our documental heritage: the longest poem of medieval Islamic medical literature.}, KEYWORDS = {Cultural Heritage Digital Safeguard, Historical Manuscript Digitization, Document Image Processing, Linguistic Analysis, Ontological Analysis}, PAGES = {34-43}, URL = {https://innove.org/ijist/index.php/ijist/article/view/190}, VOLUME = {5}, DOI = {10.57675/IMIST.PRSM/ijist-v5i1.190}, PUBLISHER = {[El Mohajir Mohammed] ([S. l. ], Marocco)}, ISSN = {2550-5114}, JOURNAL = {International Journal of Information Science and Technology}, } @ARTICLE{GIOVANNETTI_2021_ARTICLE_GABDDM_457778, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Dattilo, D. and Del Grosso, A. M. and Marchi, S.}, TITLE = {An ontology of masters of the Babylonian Talmud}, YEAR = {2021}, ABSTRACT = {The purpose of this research is to build an ontology of the masters appearing in the Babylonian Talmud (BT). The ontology built so far has been shared as a Linked Open Data and it will be linked to existing vocabularies. This work has been developed in the context of the Babylonian Talmud Translation Project, where more than eighty Talmudists are working together, since 2012, at the translation (comprehensive of explicative notes and glossaries) of the Talmud into Italian. The construction of the resource has involved the application of tools leveraging on computational linguistics approaches. The ontology, already describing more than 500 masters, constitutes the first portion of a more comprehensive Talmudic Knowledge Base where the text itself, the terminology, the entities, and the concepts constituting the BT will be formalized and linked to each other.}, KEYWORDS = {ontology, babylonian talmud, terminology, word alignment, linked open data, semantic web, knowledge representation}, PAGES = {725-737}, URL = {https://academic.oup.com/dsh/article-abstract/37/3/725/6410110}, VOLUME = {37}, DOI = {10.1093/llc/fqab043}, PUBLISHER = {Oxford University Press (Oxford, UK, Regno Unito)}, ISSN = {2055-7671}, JOURNAL = {Digital Scholarship in the Humanities}, } @ARTICLE{GUADAGNINI_2021_ARTICLE_G_449622, AUTHOR = {Guadagnini, E.}, TITLE = {3. 5. Paolo e Francesca per bambini, Paolo e Francesca per adulti: censure, revisioni, riletture}, YEAR = {2021}, ABSTRACT = {Selezione e commento di alcune riletture della vicenda di Paolo e Francesca (Dante, Inf. 5).}, KEYWORDS = {Dante Alighieri, riscritture, fumetto, letteratura per l'infanzia}, URL = {http://www.arabeschi.it/35-paolo-e-francesca-per-bambini-adulti-censure-revisioni-riletture/}, VOLUME = {17}, PUBLISHER = {s. n (Catania; Pisa, Italia)}, ISSN = {2282-0876}, JOURNAL = {Arabeschi}, } @ARTICLE{GUADAGNINI_2021_ARTICLE_G_461428, AUTHOR = {Guadagnini, E.}, TITLE = {Hélène Miesse, Un laboratorio di carte. Il linguaggio della politica nel «carteggio» di Francesco Guicciardini}, YEAR = {2021}, KEYWORDS = {Guicciardini, lessico, politica}, PAGES = {206-212}, URL = {https://publications.cnr.it/doc/461428}, VOLUME = {85}, ISSN = {0035-1458}, JOURNAL = {Revue de linguistique romane}, } @ARTICLE{MARZI_2021_ARTICLE_MGSV_447049, AUTHOR = {Marzi, C. and Greco, A. and Scilingo, E. P. and Vanello, N.}, TITLE = {Towards a model of arousal change after affective word pronunciation based on electrodermal activity and speech analysis}, YEAR = {2021}, ABSTRACT = {In this paper, we explore the possibility of building a model of subject arousal by exploiting the acquisition and the analysis of speech and electrodermal activity (EDA). Several issues have to be addressed to reach this goal as the estimation of the relationship between arousal and behavioral measures and the reliability of EDA signal during speech production. To accomplish this task, we will investigate the relation among EDA, speech activity and subject arousal, during isolated affective word pronunciation. Our results show that significant information on subject arousal can be obtained by analyzing EDA during the processing of out-of-context words with an emotional content in a reading aloud task. Based on a sample of eighteen Italian participants, we observed a significant relation between EDA features and self-reported arousal scores. Quantitative models relating EDA and speech-derived features are proposed and discussed. We found that increasing values of tonic and phasic components of EDA signals correspond to increasing self-assessed arousal scores; Mel-frequency cepstral analysis of speech was also shown to carry relevant information about subject arousal, with a significant inverse relation to self-assessed scores. Our results suggest how the analysis of concurrent acquisition of EDA and speech features may offer a valid approach for the prediction of subject arousal during speech production, as well as a method for validating self-assessment ratings themselves.}, KEYWORDS = {speech, electrodermal activity, statistical models, arousal, word pronunciation}, PAGES = {1-8}, URL = {http://www.elsevier.com/locate/bspc}, VOLUME = {67}, DOI = {10.1016/j.bspc.2021.102517}, PUBLISHER = {Elsevier (Oxford, Regno Unito)}, ISSN = {1746-8094}, JOURNAL = {Biomedical signal processing and control (Print)}, } @ARTICLE{MIASCHI_2021_ARTICLE_MBD_454570, AUTHOR = {Miaschi, A. and Brunato, D. and Dell'Orletta, F.}, TITLE = {A NLP-based stylometric approach for tracking the evolution of L1 written language competence}, YEAR = {2021}, ABSTRACT = {In this study we present a Natural Language Processing (NLP)-based stylometric approach for tracking the evolution of written language competence in Italian L1 learners. The approach relies on a wide set of linguistically motivated features capturing stylistic aspects of a text, which were extracted from students' essays contained in CItA (Corpus Italiano di Apprendenti L1), the first longitudinal corpus of texts written by Italian L1 learners enrolled in the first and second year of lower secondary school. We address the problem of modeling written language development as a supervised classification task consisting in predicting the chronological order of essays written by the same student at different temporal spans. The promising results obtained in several classification scenarios allow us to conclude that it is possible to automatically model the highly relevant changes affecting written language evolution across time, as well as identifying which features are more predictive of this process. In the last part of the article, we focus the attention on the possible influence of background variables on language learning and we present preliminary results of a pilot study aiming at understanding how the observed developmental patterns are affected by information related to the school environment of the student.}, KEYWORDS = {stylometry, computational linguistics, language competence}, PAGES = {71-105}, URL = {https://www.jowr.org/abstracts/vol13_1/Miaschi_et_al_2021_13_1_abstract.html}, VOLUME = {vol. 13}, DOI = {10.17239/jowr-2021.13.01.03}, PUBLISHER = {Universiteit Antwerpen (Antwerpen, Belgio)}, ISSN = {2030-1006}, JOURNAL = {Journal of Writing Research}, } @ARTICLE{MONACHINI_2021_ARTICLE_MSCPB_466817, AUTHOR = {Monachini, M. and Stamuli, M. F. and Calamai, S. and Pretto, N. and Bianchi, S.}, TITLE = {The Grey-side of Audio Archives}, YEAR = {2021}, ABSTRACT = {Archives often include documents that can hardly be considered publications or grey literature as such, yet they maintain their documentary value and play a role of primary sources for the specialists. These documents, indeed, can help archivists to reveal the sedimentation process of the archive itself and to preserve the authentic context of the documentary production. They also appear to be very useful for the community of researchers and scholars. This happens more frequently with oral archives which include 'non-conventional sources', thus bringing together audio documents, fieldworks notes, correspondence, slipcases, analogic compact cassettes or open reels. At the cross-road of two disciplines, Archival Science and Grey Literature, this paper aims to argue the applicability of the concept of grey literature to this wide range of documentary materials, by showing the experience of Archivio Vi.Vo, a regional project aiming at building a model for archiving, preserving, managing and disseminating audio documents.}, KEYWORDS = {oral archives, infrastructures}, PAGES = {34-37}, URL = {https://publications.cnr.it/doc/466817}, VOLUME = {22}, PUBLISHER = {TransAtlantic (Amsterdam, Paesi Bassi)}, ISSN = {1386-2316}, JOURNAL = {The GL-conference series. Conference proceedings}, } @ARTICLE{MUGELLI_2021_ARTICLE_MBBDKT_461550, AUTHOR = {Mugelli, G. and Boschetti, F. and Bellandi, A. and Del Gratta, R. and Khan, A. F. and Taddei, A.}, TITLE = {Annotating ritual in ancient greek tragedy: A bottom-up approach in action}, YEAR = {2021}, ABSTRACT = {EuporiaRAGT is one of the pilot projects that adopt the Euporia system as a digital support to an historico-anthropological research on the form and function of rituals in the texts of ancient Greek tragedy. This paper describes the bottom-up approach adopted in the project: During the annotation stage, performed with a Domain Specific Language designed with a usercentred approach, the domain expert can annotate ritual and religious phenomena, with the possibility of registering different textual and interpretive variants; the design of a search engine, in a second phase of the work, allows the database to be tested and reorganized. Finally, the construction of an ontology allows to structure the tags, in order to perform complex queries on the database.}, KEYWORDS = {digital philology, dsl, ancient Greek}, PAGES = {17}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85117031943\&origin=inward}, VOLUME = {15}, PUBLISHER = {Alliance of Digital Humanities Organizations ([Providence, RI?], Stati Uniti d'America)}, ISSN = {1938-4122}, JOURNAL = {Digital humanities quarterly}, } @ARTICLE{NAHLI_2021_ARTICLE_ND_463930, AUTHOR = {Nahli, O. and Del Grosso, A. M.}, TITLE = {Structuring Arabic lexical and morphological resources using TEI: theory and practice}, YEAR = {2021}, ABSTRACT = {An Arabic word can be described according to its lexical and morphological information. The lexical information, conveyed by the root, consists of both semantic meaning and syntactic properties (e.g. parts of speech). The morphological information, encoded by patterns, is useful to group the words having similar syntactic, inflectional and semantic behaviour. Lexical analysis and morphological analysis have been separately described since the very first studies of the Arabic language. Although several scholarly works have illustrated Arabic lexicon models that encode semantic meanings, a systematic description of word patterns is still strongly lacking. In this work, we have implemented an exhaustive resource consisting of two levels: lexical and morphological. The lexical level collects information extracted from the dictionary al=q¯am¯us al=muh. ¯?t.. The morphological level describes pattern formalization, which allows to enrich word descriptions with additional semantic, morphosyntactic and inflectional information. To build our digital resource, taking into account primary source, lexical requirements, and reusability, we followed the guidelines provided by the Text Encoding Initiative (abbreviated as TEI). In particular, we adopted the TEI module for the encoding of digital dictionaries and lexicons to formally represent the medieval al=q¯am¯us al=muh. ¯?t. dictionary. Given the complexity of describing the morphological information present in the patterns, we also used the TEI module devoted to encoding feature structures. Consequently, we are building an exhaustive resource formed by the lexical and the morphological blocks. These two components are distinct but complementary resources where the lexical data are connected to morphological information. In addition, the morphological resource can be used as a stand-alone tool that allows the morphological analyzers to capture aspects of meaning that cannot be identified by current systems.}, KEYWORDS = {classical Arabic dictionary, digital lexicography, al=q'}, PAGES = {3-14}, URL = {https://innove.org/ijist/index.php/ijist/article/view/191}, VOLUME = {5}, PUBLISHER = {[El Mohajir Mohammed] ([S. l. ], Marocco)}, ISSN = {2550-5114}, JOURNAL = {International Journal of Information Science and Technology}, } @ARTICLE{NAHLI_2021_ARTICLE_NSBB_463923, AUTHOR = {Nahli, O. and Sanna, A. and Bandini, M. and Boschetti, F.}, TITLE = {Commerce Numérique: traffic signals for the crossroads between cultures}, YEAR = {2021}, ABSTRACT = {Commerce is a literary French journal founded by Princess Margherita Caetani, involving three prestigious collaborators: Paul Valéry, Léon-Paul Fargue, Valéry Larbaud. It is composed by 29 volumes published between 1924 and 1932. Each volume collects different literary material of various well-known and unknown writers as poems or novels, translating some of the most important authors like Joyce, T.S. Eliot, Pirandello, Ungaretti, Saint-John Perse, Rilke, Hofmannsthal. Considering the historical, literary, and cultural importance of the Commerce journal, our project "Commerce numérique" aims to digitize and to make the journal contents freely available online to both the general public and the research community. This article presents how the journal was encoded. Also, we give importance to the coding of poems present in Commerce. Indeed, some poems are original in another language and they are accompanied by their French translation. Other poems are a French-translated form without original text. In order to fully and accurately express the phenomena and their structures, we have adopted some aspects of the TEI framework, which we will explain in detail. Particular attention was paid to the French translation of a Moroccan Arabic poem from the 13th century. On the one hand, the original Arabic poetry is interesting because it presents some aspects of the Moroccan dialect and some aspects of the oral text. On the other hand, the study and the encoding of Arabic poetry in parallel to its translation highlight some important structural differences between Arabic poetry and Western poetry.}, KEYWORDS = {Commerce Journal, OCR, TEI encoding, literary journal, digital resources, Arabic poetry}, PAGES = {36-45}, URL = {https://innove.org/ijist/index.php/ijist/article/view/193}, VOLUME = {5}, PUBLISHER = {[El Mohajir Mohammed] ([S. l. ], Marocco)}, ISSN = {2550-5114}, JOURNAL = {International Journal of Information Science and Technology}, } @ARTICLE{PANCKHURST_2021_ARTICLE_PF_455049, AUTHOR = {Panckhurst, R. and Frontini, F.}, TITLE = {An Internationally Fair Mediated Digital Discourse Corpus: Improving Knowledge on Reuse}, YEAR = {2021}, ABSTRACT = {In this paper, the authors present a French Mediated Digital Discourse corpus, (88milSMS http://88milsms.huma-num.fr https://hdl.handle.net/11403/comere/ cmr-88milsms). Efforts were undertaken over the years to ensure its publication according to the best practices and standards of the community, thus guaranteeing compliance with FAIR principles and CLARIN recommendations with pertinent scientific and pedagogical reuse. Since knowledge on how resources are reused is sometimes difficult to obtain, ways of improving this are also envisaged.}, KEYWORDS = {Reuse, FAIR, SMS, corpus}, PAGES = {185-193}, URL = {https://ecp.ep.liu.se/index.php/clarin/article/view/20}, VOLUME = {180}, DOI = {10.3384/ecp18020}, PUBLISHER = {Linköping University Electronic Press (Linköping, Svezia)}, ISSN = {1650-3740}, JOURNAL = {Linköping electronic conference proceedings (Online)}, } @ARTICLE{PROIETTI_2021_ARTICLE_PY_454043, AUTHOR = {Proietti, C. and Yuste Ginel, A.}, TITLE = {Dynamic epistemic logics for abstract argumentation}, YEAR = {2021}, ABSTRACT = {This paper introduces a multi-agent dynamic epistemic logic for abstract argumentation. Its main motivation is to build a general framework for modelling the dynamics of a debate, which entails reasoning about goals, beliefs, as well as policies of communication and information update by the participants. After locating our proposal and introducing the relevant tools from abstract argumentation, we proceed to build a three-tiered logical approach. At the first level, we use the language of propositional logic to encode states of a multi-agent debate. This language allows to specify which arguments any agent is aware of, as well as their subjective justification status. We then extend our language and semantics to that of epistemic logic, in order to model individuals' beliefs about the state of the debate, which includes uncertainty about the information available to others. As a third step, we introduce a framework of dynamic epistemic logic and its semantics, which is essentially based on so-called event models with factual change. We provide completeness results for a number of systems and show how existing formalisms for argumentation dynamics and unquantified uncertainty can be reduced to their semantics. The resulting framework allows reasoning about subtle epistemic and argumentative updates--such as the effects of different levels of trust in a source--and more in general about the epistemic dimensions of strategic communication.}, KEYWORDS = {Abstract argumentation, Dynamic epistemic logic, Awareness logics, Multi-agent argumentation frameworks, Persuasion, Strategic Argumentation}, PAGES = {1-60}, URL = {https://link.springer.com/article/10.1007/s11229-021-03178-5#citeas}, DOI = {10.1007/s11229-021-03178-5}, PUBLISHER = {Kluwer (Dordrecht, Paesi Bassi)}, ISSN = {1573-0964}, JOURNAL = {Synthese (Dordr., Online)}, } @ARTICLE{SALVATI_2021_ARTICLE_SR_468981, AUTHOR = {Salvati, L. and Russo, I.}, TITLE = {Indicatori di complessità nel parlato degli insegnanti di italiano L2: un'analisi quantitativa}, YEAR = {2021}, ABSTRACT = {Sul parlato dell'insegnante di L2 (identificato anche come teacher talk) esiste una vasta letteratura, in particolare per quanto riguarda l'insegnamento dell'inglese come lingua seconda. Agli approcci di carattere più teorico-normativo si accompagnano, negli ultimi trenta anni, approcci che prendono spunto dalle tecniche di analisi della linguistica dei corporae che si focalizzano su casi di studio specifici, andando a verificare su dati concreti le ipotesi formulate dalla teoria.L'ipotesi di base di questo ambito di ricerca è che gli insegnanti compiono -in maniera non sempre consapevole e pianificata -degli adeguamenti nel loro modo di parlare di fronte ad una classe di apprendenti. Tali adeguamenti riguardano più livelli linguistici e variano di intensità a seconda del livello complessivo di competenza degli apprendenti. Nel presente lavoro ci proponiamo di analizzare quantitativamente la complessità del parlato di insegnanti madrelingua di italiano L2 raccolto e trascritto durante lezioni appartenenti a due livelli del ), Quadro comune europeo di riferimento per le lingue: apprendimento, insegnamento, valutazione(QCER, Council of Europe, 2002 [2001]), A1 e B13. Una parte delle trascrizioni riguarda lezioni svolte in classe (corpusParInIt, Parlato di Insegnanti di Italiano), nella quale vi è compresenza fisica tra insegnante e apprendenti, un secondo corpusè invece composto da lezioni somministrate onlinein maniera asincrona, tramite un canale YouTube (corpus Oneworlditaliano). Proponiamo una classificazione degli adeguamenti rispetto alla quale l'analisi quantitativa della complessità degli indicatori linguistici verificherà se è possibile distinguere sia tra il livello A1 e il livello B1 nel corpus raccolto in presenza e nelcorpusrelativo alle lezionion line.L'obiettivo finale è comprendere se un'analisi quantitativa dei dati possa aiutare ad individuare gli adeguamenti e le modifiche linguistiche attuate dai docenti per favorire una maggiore comprensibilità dell'inputda parte degli apprendenti}, KEYWORDS = {teacher talk, corpus linguistics, complessità lessicale}, PAGES = {122-132}, URL = {https://riviste.unimi.it/index.php/promoitals/article/view/17132/15088}, VOLUME = {13}, PUBLISHER = {Università degli Studi di Milano (Milano, Italia)}, ISSN = {2037-3597}, JOURNAL = {Italiano linguadue}, } @ARTICLE{SALVATORI_2021_ARTICLE_SBD_472280, AUTHOR = {Salvatori, E. and Boschetti, F. and Del Grosso, A. M.}, TITLE = {A un anno da AIUCD2021}, YEAR = {2021}, KEYWORDS = {Digital Humanities, AIUCD}, PAGES = {1-4}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85127630558\&origin=inward}, VOLUME = {2021}, DOI = {10.6092/issn.2532-8816/14209}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @BOOK{MARZI_2021_BOOK_M_461758, AUTHOR = {Marzi, C.}, TITLE = {Modelling the morphological lexicon-A computational approach to mono-and bilingual learning and processing of verb inflection}, YEAR = {2021}, ABSTRACT = {This work aims at defining an explanatory model of the morphological lexicon as a dynamic system of word learning and processing in both mono- and bilingual contexts. The main focus is on exploring some relevant aspects of the paradigmatic organisation of the mental lexicon in language learning, based on a dynamic analysis of mono- and bilingual contexts. The proposed interdisciplinary approach to lexical acquisition combines theoretically-motivated accounts, psycho-cognitive evidence and methodologies, and machine learning technologies. In particular, I will take into account those basic psychological and cognitive mechanisms that are considered as crucial in language acquisition: (i) the ability to perceive recurrent morphological structures (invariances) in varying temporal contexts, (ii) the capability to access/activate time series of symbols in the short term memory and to selectively integrate them with long term memory expectations, (iii) the attitude towards building novel forms through analogical extension of intra- and inter-paradigmatic relations (generalisation). This investigation is pursued through a computational model based on a recurrent Self-Organising Map, with Hebbian connections defined over a temporal layer (Temporal Self-Organising Map, TSOM), providing a principled algorithmic account of effects of lexical acquisition, processing and access. The computational simulation of a biologically inspired neural architecture of the mental lexicon offers the possibility to reproduce a wide range of conditions of mono- and bi-lingual input exposure, and to illustrate the dynamic of word acquisition and the emergence of morphological organisation. The proposed model provides an adaptive multifactorial account of morphology acquisition affected by a variety of input factors, such as word frequency distributions, paradigm regularity and wordlikeness, whereby lexical perception and organisation are grounded in memory-based processing strategies. In addition, it suggests a processing-based notion of morpheme, as a by-product of processing dynamics, with paradigms emerging as specialised surface relations between inflected forms.}, KEYWORDS = {morphology, learning and processing, monolingual and bilingual acquisition, verb inflection, artificial neural networks, temporal self-organising maps}, PAGES = {5-171}, URL = {https://publications.cnr.it/doc/461758}, VOLUME = {1095. 82}, PUBLISHER = {Franco Angeli (Milano, ITA)}, ISBN = {978-88-351-3548-7}, } @INCOLLECTION{BOSCHETTI_2021_INCOLLECTION_BDS_461545, AUTHOR = {Boschetti, F. and Del Grosso, A. M. and Spinazzè, L.}, TITLE = {La galassia Musisque Deoque: storia e prospettive}, YEAR = {2021}, ABSTRACT = {The resources in support of Latin scholars created under the scientific direction of Paolo Mastandrea are numerous; the undertaking of Musisque Deoque, which aims at equipping the entire corpus of ancient Latin poetry with "significant variants", is the most emblematic effort, open to further developments. Looking at the general history of these projects, we try to trace the future path of the "Musisque Deoque galaxy" within Open Science.}, KEYWORDS = {Classical philology, Latin poetry, Intertextuality, Open data, FAIR principles}, PAGES = {405-419}, URL = {https://edizionicafoscari.unive.it/media/pdf/books/978-88-6969-558-2/978-88-6969-558-2-ch-26.pdf}, VOLUME = {32}, DOI = {10.30687/978-88-6969-557-5/026}, PUBLISHER = {Edizioni Ca' Foscari (Venezia, ITA)}, ISBN = {978-88-6969-557-5}, BOOKTITLE = {Paulo maiora canamus-Raccolta di studi per Paolo Mastandrea}, EDITOR = {Venuti, M. and Manca, M.}, } @INCOLLECTION{CAPPA_2021_INCOLLECTION_CFG_461297, AUTHOR = {Cappa, C. and Ferro, M. and Giulivi, S.}, TITLE = {Valutare l'efficienza di lettura in classe, fra "ecologia" e tecnologie}, YEAR = {2021}, ABSTRACT = {La sperimentazione AEREST ha consentito la creazione di un protocollo in grado di offrire una valutazione accurata e dettagliata delle abilità di lettura e comprensione del testo. Lo strumento si è rivelato semplice da utilizzare per gli insegnanti, ed è stato accolto con curiosità e interesse dagli allievi, certamente attratti dal supporto utilizzato per la somministrazione (il tablet), ma anche dai testi, che sono stati scelti e adattati con particolare cura. L'analisi dei dati ha consentito di identificare una considerevole varietà di profili di lettori, per i quali sarà possibile progettare percorsi di potenziamento mirati. Come già accennato, si è potuta constatare l'efficacia dello strumento nell'identificazione di allievi le cui difficoltà (pur evidenti agli occhi degli insegnanti) non vengono rilevate dai test comunemente utilizzati per la valutazione, ma la cui lettura non può essere considerata 'efficiente'. Costituiscono esempi in questo senso gli allievi che decodificano in modo accurato e veloce, con buone prestazioni nella comprensione all'ascolto, ma che manifestano difficoltà nella comprensione di un testo in lettura silente, poiché in questa attività devono integrare la decodifica con l'accesso al significato. Un ulteriore esempio è costituito dagli allievi che ottengono buoni risultati in tutti i test, impiegando però un tempo eccessivamente lungo per svolgerli. Nella prospettiva qui adottata, anche per questi allievi è necessario individuare strategie di supporto volte a evitare che le attività scolastiche, in particolare i compiti a casa, occupino una parte troppo ampia del tempo dell'allievo, togliendo spazio al gioco, allo svago, agli interessi personali e alla socializzazione. Questi ultimi sono aspetti che, come sottolinea la Carta internazionale dei diritti dei bambini (1959), rivestono un'importanza cruciale per il processo di crescita e il benessere generale di ciascuno. Oltre alle difficoltà, il protocollo AEREST consente di mettere in evidenza le prestazioni eccellenti, grazie alla struttura dei test e alle caratteristiche dei testi e delle domande che li accompagnano. Capire a fondo come "funzionano" gli allievi è indispensabile per poterli sostenere al meglio negli apprendimenti, indipendentemente dalla presenza o meno di un'"etichetta" diagnostica. Gli insegnanti hanno in questo senso una grande responsabilità, e uno strumento come AEREST, grazie anche all'implementazione su piattaforma tecnologica, può aiutarli in quella che forse è la loro principale sfida quotidiana: fare in modo che le difficoltà scolastiche non siano vissute come barriere all'apprendimento, al successo scolastico, alle opportunità professionali, alla realizzazione personale, ma come soglie da superare e da trasformare in trampolini di lancio.}, KEYWORDS = {efficienza di lettura, decodifca, comprensione, scuola primaria}, PAGES = {49-69}, URL = {https://buponline.com/prodotto/disturbi-specifici-dellapprendimento-e-insegnamento-linguistico/}, VOLUME = {3}, PUBLISHER = {Bononia University Press (Bologna, ITA)}, ISBN = {978-88-6923-829-1}, BOOKTITLE = {Didattica dell'italiano}, EDITOR = {Garulli, V. and Pasetti, L. and Viale, M.}, } @INCOLLECTION{CUTUGNO_2021_INCOLLECTION_CFCM_443221, AUTHOR = {Cutugno, P. and Ferretti, M. and Chiarella, D. and Marconi, L.}, TITLE = {A Linguistic preliminary study about noise perception}, YEAR = {2021}, ABSTRACT = {The activities of the project "TRIPLO: TRasporti e collegamenti Innovativi e sostenibili tra Porti e piattaforme Logistiche", financed with the resources of the Interregional Operational Programme Italy-France Maritime 2014- 2020, have as specific objective to improve the sustainability of commercial ports and related logistic platforms, contributing to the reduction of noise pollution [1][2]. Some activities in the project are aimed at assessing the acoustic impact on the population exposed to noise generated by back port activities, in relation to individual perception. In environmental surveys, only technical investigations can objectively describe a phenomenon [3], but at the same time they do not guarantee its universality with respect to perception; a sound can be considered both as a physical phenomenon, and therefore measurable through objective parameters, and as a phenomenon linked to sound perception, of a subjective nature and consequent to the psycho-physical-emotional state of the subject. These two characteristics are strictly interdependent, so it is insufficient to limit to examine them separately. The population constitutes a precious source of information in the evaluation of the quality of the space in which they live or work, suggesting the relationships between environment, comfort and productivity. It is therefore important to use subjective investigation tools through which the opinion of the population can become a valid support to traditional methods of analysis and improve the overall evaluation.}, KEYWORDS = {nosie perception, questionnaire, applied linguistics}, PAGES = {57-61}, URL = {https://publications.cnr.it/doc/443221}, ISBN = {978-959-7174-40-0}, BOOKTITLE = {Contribuciones a la Lingüística y a la Comunicación Social. Tributo a Vitelio Ruiz Hernández}, } @INCOLLECTION{DOLOWYRYBINSKA_2021_INCOLLECTION_DS_443475, AUTHOR = {Dolowy Rybinska, N. and Soria, C.}, TITLE = {Surveying the ethnolinguistic vitality of two contested languages. The case of Kashubian and Piedmontese}, YEAR = {2021}, ABSTRACT = {In this chapter we present the results of a Polish-Italian research project aimed at evaluating and comparing the vitality of two contested languages: Kashubian in Poland and Piedmontese in Italy.}, KEYWORDS = {ethnolinguistic vitality, contested languages}, PAGES = {125-142}, URL = {https://publications.cnr.it/doc/443475}, DOI = {10.1075/wlp.8}, PUBLISHER = {John Benjamins (Amsterdam, NLD)}, ISBN = {9789027208040}, } @INCOLLECTION{GUADAGNINI_2021_INCOLLECTION_G_441969, AUTHOR = {Guadagnini, E.}, TITLE = {Scripta}, YEAR = {2021}, ABSTRACT = {Il concetto di "scripta" negli studi linguistici romanzi.}, KEYWORDS = {linguistica italiana, linguistica romanza}, PAGES = {125-151}, URL = {http://www.carocci.it/index.php?option=com_content\&view=article\&id=98}, PUBLISHER = {Carocci (Roma, ITA)}, ISBN = {9788829004294}, BOOKTITLE = {Storia dell'italiano scritto, vol. VI, Supporti, forme, pratiche di scrittura}, EDITOR = {Antonelli, G. and Motolese, M. and Tomasin, L.}, } @INCOLLECTION{GUADAGNINI_2021_INCOLLECTION_G_461426, AUTHOR = {Guadagnini, E.}, TITLE = {Marco Tullio Cicerone (attr.), Rettorica nova}, YEAR = {2021}, ABSTRACT = {Scheda relativa alla "Rettorica nova", presente nella lista dei libri posseduti da Leonardo.}, KEYWORDS = {Leonardo, Cicerone, Retorica}, PAGES = {187-189}, URL = {https://bibliotecadileonardo.museogalileo.it/index.php/esplora/scheda/La%20biblioteca/61572}, PUBLISHER = {Giunti (Firenze, ITA)}, ISBN = {9788809897786}, BOOKTITLE = {La biblioteca di Leonardo}, EDITOR = {Vecce, C.}, } @INCOLLECTION{ONIGA_2021_INCOLLECTION_OCPGBPCCGMFCFTA_463942, AUTHOR = {Oniga, D. and Cantalupo, B. and Perlo, D. and Grangetto, M. and Bolelli, F. and Pollastri, F. and Cancilla, M. and Canalini, L. and Grana, C. and Muñoz, C. and Franco, A. and Cardillo, A. and Florea, M. and Tartaglione, E. and Aldinucci, M.}, TITLE = {Applications of AI and HPC in the Health Domain}, YEAR = {2021}, ABSTRACT = {This chapter presents the applications of artificial intelligence (AI) and high-computing performance (HPC) in the health domain, illustrated by the description of five of the use cases that are developed in the DeepHealth project. In the context of the European Commission supporting the use of AI and HPC in the health sector, DeepHealth Project is helping health experts process large quantities of images, putting at their disposal DeepLearning and computer vision techniques, combined in the DeepHealth toolkit and HPC infrastructures. The DeepHealth toolkit is tested and validated through 15 use cases, each of them representing a biomedical application. The most promising use cases are described in the chapter, which concludes with the value proposition and the benefits that DeepHealth toolkit offers to future end users.}, KEYWORDS = {artificial intelligence, high performance computing}, PAGES = {217-240}, URL = {https://www.taylorfrancis.com/chapters/edit/10.1201/9781003176664-11/applications-ai-hpc-health-domain-dana-oniga-barbara-cantalupo-enzo-tartaglione-daniele-perlo-marco-grangetto-marco-aldinucci-federico-bolelli-federico-pollastri-michele-cancilla-laura-canalini-costantino-grana-cristina-mu%C3%B1oz-alcalde-franco-alberto-cardillo-monica-florea}, DOI = {10.1201/9781003176664}, PUBLISHER = {CRC Press-Taylor \& Francis Group (London, GBR)}, ISBN = {9781032009841}, } @EDITORIAL{BRANDO_2021_EDITORIAL_BFMRM_453809, AUTHOR = {Brando, C. and Frontini, F. and Moreau, D. and Roche, M. and Masson, É.}, TITLE = {Humanités numériques spatialisées}, YEAR = {2021}, ABSTRACT = {This special issue provides an introduction to the contributions presented in this thematic issue dedicated to the spatial humanities. Three main themes are addressed: (1) the processing of spatial information in textual corpora resulting from work in the human and social sciences, mainly in literary studies; (2) problems of acquisition, spatialisation and dissemination of geographical data of the past and from cultural heritage, thus, here, more connected with research in history; (3) spatial information and its processing and uses in archaeology. For each of these topics, we present the founding initiatives with historiographical elements, a brief status quaestionis and a synthesis of the contributions.}, KEYWORDS = {spatial digital humanities, archaeology, history, history of the digital humanities, geographic information system, cartography, spatial analysis, textual analysis}, URL = {https://journals.openedition.org/revuehn/689}, VOLUME = {3}, } @EDITORIAL{BURGASSI_2021_EDITORIAL_BGV_472160, AUTHOR = {Burgassi, C. and Guadagnini, E. and Vaccaro, G.}, TITLE = {Migrazioni linguistiche e trasmissioni culturali in Italia (secoli XIII-XV)}, YEAR = {2021}, ABSTRACT = {Il terzo volume della collana Plurilinguismo e Migrazioni si concentra sui temi della traduzione, un'istanza storica fondamentale per l'area italoromanza antica che può essere intesa sia nel senso ristretto di riproposizione puntuale di un testo in una lingua diversa da quella originale sia, in senso lato, come operazione che importa nella lingua locale un contenuto originariamente espresso in altra lingua. Migrazioni linguistiche e trasmissioni culturali in Italia (secoli XIII-XV) adotta la traduzione, entro i due poli di senso ora definiti, come punto di vista privilegiato per lo studio della lingua e della cultura italiana del Medioevo: i nove contributi raccolti affrontano temi e problemi relativi a traduzioni dal latino e dal francese, rispetto a testi composti originariamente dall'Antichità all'epoca coeva, caratterizzati da tradizioni "passive" o "attive" (secondo la terminologia classica).}, KEYWORDS = {Traduzione, Volgarizzamenti, Eredità classica}, URL = {https://www.cnr.it/sites/default/files/public/media/attivita/editoria/collana_plurimi/PLURIMI_3_2021.pdf}, VOLUME = {3}, DOI = {10.36173/PLURIMI-2021-3}, PUBLISHER = {CNR EDIZIONI (ROMA, ITA)}, ISBN = {9788880804888}, } @EDITORIAL{MONACHINI_2021_EDITORIAL_ME_472301, AUTHOR = {Monachini, M. and Eskevich, M.}, TITLE = {CLARIN Annual Conference Proceedings}, YEAR = {2021}, ABSTRACT = {CLARIN2021 is organised for the wider Humanities and Social Sciences communities in order to exchange ideas and experiences within the CLARIN infrastructure. This includes the design, construction and operation of the CLARIN infrastructure, the data, tools and services that it contains or for which there is a need, its actual use by researchers, its relation to other infrastructures and projects, and the CLARIN Knowledge Sharing Infrastructure.}, KEYWORDS = {Language Resource Infrastrucuture}, PAGES = {1-178}, URL = {https://publications.cnr.it/doc/472301}, } @EDITORIAL{SALVATORI_2021_EDITORIAL_SBD_484494, AUTHOR = {Salvatori, E. and Boschetti, F. and Del Grosso, A. M.}, TITLE = {DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale (Selected Papers AIUCD 2021)}, YEAR = {2021}, ABSTRACT = {Curatela dei selected paper (Dossier) della conferenza AIUCD2021.}, KEYWORDS = {Digital Humanities, AIUCD2021, AIUCD, Umanistica Digitale, Informatica Umanistica}, PAGES = {1-197}, URL = {https://umanisticadigitale.unibo.it/issue/view/1033}, VOLUME = {11}, } @EDITORIAL{BRANDO_2021_EDITORIAL_BFMRM_453821, AUTHOR = {Brando, C. and Frontini, F. and Moreau, D. and Roche, M. and Masson, É.}, TITLE = {Introduction. Humanités numériques et analyses spatiales: enjeux et perspectives}, YEAR = {2021}, KEYWORDS = {spatial digital humanities, archaeology, history, history of the digital humanities, geographic information system, cartography, spatial analysis, textual analysis}, URL = {https://journals.openedition.org/revuehn/2038}, VOLUME = {3}, PUBLISHER = {Humanistica (Bruxelles, Belgio)}, ISSN = {2736-2337}, BOOKTITLE = {Humanités numériques (Online)}, } @INPROCEEDINGS{ALBERTIN_2021_INPROCEEDINGS_AMB_465394, AUTHOR = {Albertin, G. and Miaschi, A. and Brunato, D.}, TITLE = {On the role of textual connectives in sentence comprehension: A new dataset for Italian}, YEAR = {2021}, ABSTRACT = {In this paper we present a new evaluation resource for Italian aimed at assessing the role of textual connectives in the comprehension of the meaning of a sentence. The resource is arranged in two sections (acceptability assessment and cloze test), each one corresponding to a distinct challenge task conceived to test how subtle modifications involving connectives in real usage sentences influence the perceived acceptability of the sentence by native speakers and Neural Language Models (NLMs). Although the main focus is the presentation of the dataset, we also provide some preliminary data comparing human judgments and NLMs performance in the two tasks.}, KEYWORDS = {neural language models, textual connectives, sentence acceptability}, URL = {http://ceur-ws.org/Vol-3033/paper16.pdf}, VOLUME = {3033}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {8th Italian Conference on Computational Linguistics (CLIC-it 2021)}, CONFERENCE_PLACE = {Milano}, CONFERENCE_DATE = {26-28/01/2022}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{BRUNO_2021_INPROCEEDINGS_BGCMF_461393, AUTHOR = {Bruno, E. and Giulivi, S. and Cappa, C. and Marini, M. and Ferro, M.}, TITLE = {Evaluating the accuracy of decoding in children who read aloud}, YEAR = {2021}, ABSTRACT = {Digital tools based on automatic speech recognition (ASR) could be a useful support for teachers in assessing the reading skills of the students. We focus on the evaluation of the decoding accuracy of children with grade level ranging from the 3rd to the 6th performing a reading aloud task on a narrative text displayed on an ordinary tablet using the ReadLet platform. On the basis of previously collected data, we built a gold dataset with sentences characterised by the audio data, the original text to be read, and the text actually spoken by the child. By using the open-source Kaldi toolkit an ASR system based on the GMM-HMM model was trained on the training portion of the gold dataset. The accuracy of the ASR system was calculated as the ability to correctly decode the test audio data with respect to the annotated text, and the decoding accuracy of the children was estimated by measuring the gap between the results obtained with the annotated text and the original text. A consistent trend with increasing grade level was found in terms of word correctness, substitutions and insertions, while the trained model appears to be significantly able to evaluate the children decoding accuracy.}, KEYWORDS = {speech recognition, decoding accuracy, reading aloud, voice parameters, Kaldi, GMM-HMM acoustic model}, PAGES = {145-148}, URL = {https://publications.cnr.it/doc/461393}, DOI = {10.36253/978-88-5518-449-6}, PUBLISHER = {Firenze University Press (Firenze, ITA)}, ISBN = {978-88-5518-449-6}, CONFERENCE_NAME = {12th International Workshop on Models and Analysis of Vocal Emissions for Biomedical Applications (MAVEBA'21)}, CONFERENCE_PLACE = {Firenze (Italy)}, CONFERENCE_DATE = {14-16/12/2021}, BOOKTITLE = {Proceedings of the 12th International Workshop on Models and Analysis of Vocal Emissions for Biomedical Applications (MAVEBA'21)}, EDITOR = {Manfredi, C.}, } @INPROCEEDINGS{CALAMAI_2021_INPROCEEDINGS_CPSPCBM_466824, AUTHOR = {Calamai, S. and Pretto, N. and Stamuli, M. F. and Piccardi, D. and Candeo, G. and Bianchi, S. and Monachini, M.}, TITLE = {COMMUNITY-BASED SURVEY AND ORAL ARCHIVE INFRASTRUCTURE IN THE ARCHIVIO VI. VO. PROJECT}, YEAR = {2021}, ABSTRACT = {Audio and audiovisual archives are at the crossroads of different fields of knowledge, yet they require common solutions for both their long-term preservation and their description, availability, use and reuse. Archivio Vi.Vo. is an Italian project financed by the Tuscany Region, aiming to (i) explore methods for long-term preservation and secure access to oral sources and (ii) develop an infrastructure under the CLARIN-IT umbrella offering several services for scholars from different domains interested in oral sources. This paper describes the project's infrastructure and its methodology through a case study on the Caterina Bueno's audio archive.}, KEYWORDS = {inglese}, URL = {https://publications.cnr.it/doc/466824}, VOLUME = {180}, DOI = {10.3384/ecp180}, ISBN = {978-91-7929-609-4}, CONFERENCE_NAME = {CLARIN2020 Annual Conference}, CONFERENCE_PLACE = {virtual}, CONFERENCE_DATE = {5/10/2020-7/10/2020}, BOOKTITLE = {SELECTED PAPERS FROM THE CLARIN ANNUAL CONFERENCE 2020}, EDITOR = {Navarretta, C. and Eskevich, M.}, } @INPROCEEDINGS{DEMATTEI_2021_INPROCEEDINGS_DLDN_472158, AUTHOR = {De Mattei, L. and Lai, H. and Dell'Orletta, F. and Nissim, M.}, TITLE = {Human Perception in Natural Language Generation}, YEAR = {2021}, ABSTRACT = {We take a collection of short texts, some of which are human-written, while others are automatically generated, and ask subjects, who are unaware of the texts' source, whether they perceive them as human-produced. We use this data to fine-tune a GPT-2 model to push it to generate more human-like texts, and observe that the production of this fine-tuned model is indeed perceived as more human-like than that of the original model. Contextually, we show that our automatic evaluation strategy correlates well with human judgements. We also run a linguistic analysis to unveil the characteristics of human- vs machine-perceived language.}, KEYWORDS = {Natural Language Generation, Neural Language Models, Evaluation}, PAGES = {15-23}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85123713456\&origin=inward}, DOI = {10.18653/v1/2021.gem-1.2}, ISBN = {978-1-954085-67-1}, CONFERENCE_NAME = {First Workshop on Generation Evaluation and Metrics (GEM 2021)}, CONFERENCE_PLACE = {Online}, CONFERENCE_DATE = {05/08/2021}, BOOKTITLE = {Proceedings of the First Workshop on Generation Evaluation and Metrics (GEM 2021)}, } @INPROCEEDINGS{DELGRATTA_2021_INPROCEEDINGS_DBDB_472299, AUTHOR = {Del Gratta, R. and Boschetti, F. and Del Grosso, A. M. and Bambaci, L.}, TITLE = {La Filologia come sistema dinamico: qualche considerazione preliminare}, YEAR = {2021}, ABSTRACT = {In questo articolo introduciamo un approccio formale all'evoluzione dei documenti con particolare attenzione alla prospettiva filologica e alle problematiche tipiche connesse. Proponiamo un modello/framework matematico in grado di formalizzare diversi fenomeni complessi in vari ambiti di ricerca quali la Linguistica Computazionale, la Filologia Digitale e l'Ingegneria del Software, in particolare quando questa viene applicata all'analisi di documenti e testi di interesse storico-letterario.}, KEYWORDS = {approccio evoluzionistico, modello formale, analisi documentale e testuale, sistema dinamico, filologia computazionale}, PAGES = {484-490}, URL = {http://amsacta.unibo.it/6712/1/AIUCD2021_BOA-versione3A.pdf#page=500}, VOLUME = {aiucd2021}, DOI = {10.6092/unibo/amsacta/6712}, ISBN = {9788894253559}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale-DHs for society: e-quality, participation, rights and values in the Digital Age}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {19/01/2021-22/01/2021}, BOOKTITLE = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale-DHs for society: e-quality, participation, rights and values in the Digital Age. Book of extended abstracts of the 10th national conference}, } @INPROCEEDINGS{DIDONATO_2021_INPROCEEDINGS_DDMP_461475, AUTHOR = {Di Donato, F. and Dumouchel, S. and Monachini, M. and Pohle, S.}, TITLE = {The discovery platform GOTRIPLE: An EOSC service for social sciences and humanities research}, YEAR = {2021}, ABSTRACT = {In this paper we present TRIPLE - Transforming Research through Innovative Practices for Linked Interdisciplinary Exploration - an on-going project funded as part of the European Horizon 2020 programme INFRAEOSC-02-2019 "Prototyping new innovative services" (2019-2023). The project's main objective is to develop a multilingual and multicultural discovery solution for the social sciences and humanities (SSH), which will provide a single access point that allows users to explore, find, access and reuse materials such as literature, data, projects and researcher profiles at European scale. The paper first provides an overview of TRIPLE's main goals and impacts. It then describes the methodology adopted for the design and development of the project platform, GOTRIPLE. Finally, it contextualises the project within the European research landscape, and more specifically in the European Open Science Cloud (EOSC) ecosystem. In the conclusion, some current challenges and open issues are presented.}, KEYWORDS = {EOSC, Open Science, scholarly communication, discoverability, OPERAS, TRIPLE}, PAGES = {31-38}, URL = {http://amsacta.unibo.it/6712/1/AIUCD2021_BOA-versione3A.pdf}, DOI = {10.6092/unibo/amsacta/6712}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale}, CONFERENCE_PLACE = {on-line}, CONFERENCE_DATE = {19-22/01/2021}, BOOKTITLE = {AIUCD 2021-Book of Extended Abstracts. p. 624}, EDITOR = {Boschetti, F. and Del Grosso, A. M. and Salvatori, E.}, } @INPROCEEDINGS{FERRETTI_2021_INPROCEEDINGS_FCCMDCM_456166, AUTHOR = {Ferretti, M. and Chiarella, D. and Cutugno, P. and Marconi, L. and Di Feo, G. and Cerniglia, A. and Magrini, A.}, TITLE = {A linguistic and psychoacoustic study for questionnaire analysis: first results}, YEAR = {2021}, ABSTRACT = {This research is inspired by the consideration that language represents a significant dimension of perception and description of the soundscape. A language is a strategic tool that can consistently describe the qualities of physical acoustic environments, introducing an efficient metric to describe the sound perception by people. In this paper, we explore the possibility of using sentiment analysis for extracting the emotional impact of noise from lists of adjectives that describe sounds. To address this, an investigation campaign was conducted to identify the Italian adjectives that best describe the noise sources associated with port and retro-port infrastructures. 402 Italian university students responded to a listening experiment. They were asked to associate adjectives with audios. The association of these adjectives with the sources of sound formed the basis for sentiment analysis. Psychoacoustic parameters were calculated following measurements and processed for each of the sounds administered to look for possible correlations between the different perceptual aspects. The first results show how industrial or traffic sounds are as expected associated with negative sentiment whereas natural sounds evoke positive emotions. The proposed sentiment analysis, in connection with the psychoacoustic investigations developed, provides a framework for future research in the investigation of sound perception.}, KEYWORDS = {Sound perception, Sentiment Analysis, Psychoacoustics, Acoustics, Synesthesia}, PAGES = {8}, URL = {https://publications.cnr.it/doc/456166}, ISBN = {978-83-7880-799-5}, CONFERENCE_NAME = {27th International Congress on Sound and Vibration}, CONFERENCE_PLACE = {Praga}, CONFERENCE_DATE = {11-16/07/2021}, BOOKTITLE = {Proceedings of the 27th International Congress on Sound and Vibration}, } @INPROCEEDINGS{GIOVANNETTI_2021_INPROCEEDINGS_GABMPS_463795, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Marchi, S. and Papini, M. and Sciolette, F.}, TITLE = {The role of a computational lexicon for query expansion in full-text search}, YEAR = {2021}, ABSTRACT = {This work describes the first experiments conducted with a computational lexicon of Italian in a context of query expansion for full-text search. An application, composed of a graphical user interface and backend services to access the lexicon and the database containing the corpus to be queried, was developed. The text was morphologically analysed to improve the precision of the search process. Some examples of queries are given to show the potential of a text search approach supported by a complex and stratified lexical resource.}, KEYWORDS = {full-text search, computational lexicon, query expansion}, PAGES = {162-168}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85121247840\&origin=inward}, VOLUME = {3033}, DOI = {10.4000/books.aaccademia.10417}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {9791280136947}, CONFERENCE_NAME = {CLiC-it 2021 Italian Conference on Computational Linguistics 2021}, CONFERENCE_PLACE = {Milan, Italy}, CONFERENCE_DATE = {January 26-28, 2022}, BOOKTITLE = {Proceedings of the Eight Italian Conference on Computational Linguistics (Clic-it 2021)}, EDITOR = {Fersini, E. and Passarotti, M. and Patti, V.}, } @INPROCEEDINGS{IAVARONE_2021_INPROCEEDINGS_IMBGSVDG_472155, AUTHOR = {Iavarone, B. and Morelli, M. S. and Brunato, D. and Ghiasi, S. and Scilingo, E. P. and Vanello, N. and Dell'Orletta, F. and Greco, A.}, TITLE = {Analyzing the Interaction between the Reader's Voice and the Linguistic Structure of the Text: a Preliminary Study}, YEAR = {2021}, ABSTRACT = {In this study, we present a preliminary analysis of the relationship between the linguistic profile of a text and the voice properties of the reader aiming to improve the speech-based emotion recognition systems. To this aim, we recorded the speech signals from a group of 32 healthy volunteers reading aloud neutral and affective texts and used the BioVoice toolbox to compute some of the main speech features. The selected texts were analyzed to quantify their lexical, morpho-syntactic, and syntactic content. Correlation and Support Vector Regressor analyses between linguistic and speech features have shown a significant modulation of some voice acoustic properties performed by the linguistic structure of the text. Particularly, a significant effect was shown on some specific speech features often used for the assessment of human emotional state (e.g., F0). This suggests that the lexical, morpho-syntactic, and syntactic properties could play an important role in the emotional dynamics of a person.}, KEYWORDS = {Natural Language Processing, Speech analysis, linguistic profile}, URL = {https://publications.cnr.it/doc/472155}, DOI = {10.36253/978-88-5518-449-6}, ISBN = {978-88-5518-448-9}, CONFERENCE_NAME = {12th INTERNATIONAL WORKSHOP "MODELS AND ANALYSIS OF VOCAL EMISSIONS FOR BIOMEDICAL APPLICATIONS"}, CONFERENCE_PLACE = {Firenze, Italia}, CONFERENCE_DATE = {14-16/12/2021}, BOOKTITLE = {Proceedings of 12th INTERNATIONAL WORKSHOP "MODELS AND ANALYSIS OF VOCAL EMISSIONS FOR BIOMEDICAL APPLICATIONS"}, } @INPROCEEDINGS{KELLI_2021_INPROCEEDINGS_KLKVLBMHDVTV_462357, AUTHOR = {Kelli, A. and Lindén, K. and Kamocki, P. and Vider, K. and Labropoulou, P. and Birštonas, R. and Mantrov, V. and Hannesschläger, V. and Del Gratta, R. and Värv, A. and Tavits, G. and Vutt, A.}, TITLE = {The Interplay of Legal Regimes of Personal Data, Intellectual Property and Freedom of Expression in Language Research}, YEAR = {2021}, ABSTRACT = {Sometimes legal scholars get relevant but baffling questions from laypersons like: "The reference to a work is personal data, so does the GDPR actually require me to anonymise it? Or, as my voice data is personal data, does the GDPR automatically give me access to a speech recognizer using my voice sample? Or, can I say anything about myself without the GDPR requiring the web host to anonymise or remove the post? What can I say about others like politicians? And, what can researchers say about patients in a research report?" Based on these questions, the authors address the interaction of intellectual property and data protection law in the context of data minimisation and attribution rights, access rights, trade secret protection, and freedom of expression.}, KEYWORDS = {Legal aspects, research data}, PAGES = {154-159}, URL = {https://office.clarin.eu/v/CE-2021-1923-CLARIN2021_ConferenceProceedings.pdf}, CONFERENCE_NAME = {CLARIN Annual Conferece 2021}, CONFERENCE_DATE = {27-29/09/2021}, EDITOR = {Monachini, M. and Eskevich, M.}, } @INPROCEEDINGS{MARTELLI_2021_INPROCEEDINGS_MNKTKGKNPOLKKDUSLVGLQMFTCSIM_461705, AUTHOR = {Martelli, F. and Navigli, R. and Krek, S. and Tiberius, C. and Kallas, J. and Gantar, P. and Koeva, S. and Nimb, S. and Pedersen, B. S. and Olsen, S. and Langements, M. and Koppel, K. and Üksik, T. and Dobrovolijc, K. and Ureña Ruiz, R. and Sanchosánchez, J. and Lipp, V. and Varadi, T. and Györffy, A. and László, S. and Quochi, V. and Monachini, M. and Frontini, F. and Tempelaars, R. and Costa, R. and Salgado, A. and Čibej, J. and Munda, T.}, TITLE = {Designing the ELEXIS Parallel Sense-Annotated Dataset in 10 European Languages}, YEAR = {2021}, ABSTRACT = {Over the course of the last few years, lexicography has witnessed the burgeoning of increasingly reliable automatic approaches supporting the creation of lexicographic resources such as dictionaries, lexical knowledge bases and annotated datasets. In fact, recent achievements in the field of Natural Language Processing and particularly in Word Sense Disambiguation have widely demonstrated their effectiveness not only for the creation of lexicographic resources, but also for enabling a deeper analysis of lexical-semantic data both within and across languages. Nevertheless, we argue that the potential derived from the connections between the two fields is far from exhausted. In this work, we address a serious limitation affecting both lexicography and Word Sense Disambiguation, i.e. the lack of high-quality sense-annotated data and describe our efforts aimed at constructing a novel entirely manually annotated parallel dataset in 10 European languages. For the purposes of the present paper, we concentrate on the annotation of morpho-syntactic features. Finally, unlike many of the currently available sense-annotated datasets, we will annotate semantically by using senses derived from high-quality lexicographic repositories.}, KEYWORDS = {Digital lexicography, Natural Language Processing, Computational Linguistics, Corpus Linguistics, Word Sense Disambiguation}, PAGES = {377-396}, URL = {https://static-curis.ku.dk/portal/files/279888836/eLex_2021_22_pp377_395.pdf}, CONFERENCE_NAME = {eLex 2021}, CONFERENCE_DATE = {05/-7/2021-07/07/2021}, BOOKTITLE = {Proceedings of the eLex 2021 conference}, } @INPROCEEDINGS{MIASCHI_2021_INPROCEEDINGS_MABDV_463833, AUTHOR = {Miaschi, A. and Alzetta, C. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Probing tasks under pressure}, YEAR = {2021}, ABSTRACT = {Probing tasks are frequently used to evaluate whether the representations of Neural Language Models (NLMs) encode linguistic information. However, it is still questioned if probing classification tasks really enable such investigation or they simply hint for surface patterns in the data. We present a method to investigate this question by comparing the accuracies of a set of probing tasks on gold and automatically generated control datasets. Our results suggest that probing tasks can be used as reliable diagnostic methods to investigate the linguistic information encoded in NLMs representations.}, KEYWORDS = {Neural Language Models, Linguistic probing, Treebanks}, PAGES = {1-7}, URL = {http://ceur-ws.org/Vol-3033/paper29.pdf}, VOLUME = {3033}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {8th Italian Conference on Computational Linguistics (CLIC-it 2021)}, CONFERENCE_PLACE = {Milano}, CONFERENCE_DATE = {29/06-01/07/2022}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{MIASCHI_2021_INPROCEEDINGS_MBDV_454441, AUTHOR = {Miaschi, A. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {What Makes My Model Perplexed? A Linguistic Investigation on Neural Language Models Perplexity}, YEAR = {2021}, ABSTRACT = {This paper presents an investigation aimed at studying how the linguistic structure of a sentence affects the perplexity of two of the most popular Neural Language Models (NLMs), BERT and GPT-2. We first compare the sentence-level likelihood computed with BERT and the GPT-2's perplexity showing that the two metrics are correlated. In addition, we exploit linguistic features capturing a wide set of morpho-syntactic and syntactic phenomena showing how they contribute to predict the perplexity of the two NLMs.}, KEYWORDS = {nlp, interpretability, deep learning}, PAGES = {40-47}, URL = {https://www.aclweb.org/anthology/2021.deelio-1.5}, ISBN = {978-1-954085-30-5}, CONFERENCE_NAME = {2nd Workshop on Knowledge Extraction and Integrationfor Deep Learning Architectures}, CONFERENCE_DATE = {10/06/2021}, } @INPROCEEDINGS{MIASCHI_2021_INPROCEEDINGS_MRD_469731, AUTHOR = {Miaschi, A. and Ravelli, A. A. and Dell'Orletta, F.}, TITLE = {Evaluating Transformer Models for Punctuation Restoration in Italian}, YEAR = {2021}, ABSTRACT = {In this paper, we propose an evaluation of a Transformerbased punctuation restoration model for the Italian language. Experimenting with a BERT-base model, we perform several fine-tuning with different training data and sizes and tested them in an in- and crossdomain scenario. Moreover, we offer a comparison in a multilingual setting with the same model fine-tuned on English transcriptions. Finally, we conclude with an error analysis of the main weaknesses of the model related to specific punctuation marks.}, KEYWORDS = {transformer models, nlp, punctuation restoration}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85121647978\&origin=inward}, VOLUME = {3015}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {5th Workshop on Natural Language for Artificial Intelligence (NL4AI 2021)}, CONFERENCE_DATE = {29/11/2021}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{PROIETTI_2021_INPROCEEDINGS_PC_463554, AUTHOR = {Proietti, C. and Chiarella, D.}, TITLE = {Measuring bi-polarization with argument graphs}, YEAR = {2021}, ABSTRACT = {Multi-agent models play a significant role in testing hypotheses about the unfolding of opinion dynamics in complex social networks. The model of the Argument Communication Theory of Bi-polarization (ACTB), developed by Maes and Flache (2013), shows that simple circulation of arguments among individuals in a group can determine strong differentiation of opinions (bi-polarization effects) even with a small degree of homophily. The ACTB model and similar ones have nevertheless one limitation: given a topic of discussion, only direct pro and con arguments for it are considered. This does not allow to account for the topology of a more complex debate, where arguments may also interact indirectly with the topic at stake. This gap can be filled by using Quantitative Bipolar Argument Frameworks (QBAF). More specifically, by applying measures of argument strength for QBAFs in order to calculate the agents' opinion. In the present paper we generalize the ACTB measure of opinion strength to acyclic bipolar graphs and compare it with other measures from the literature. We then present a revised version of the ACTB model, where the agents' knowledge bases are structured as subgraphs of an underlying global knowledge base (described as a QBAF). We first test that the predictions of the ACTB model are confirmed when the underlying QBAF contains only direct pro and con arguments for a topic. We then explore more complex topologies of debate with two additional batches of simulations. Our first results show that changing the topology, while keeping the same number of pro and con arguments, has no significant impact on bi-polarization dynamics.}, KEYWORDS = {bi-polarization, abstract argumentation, opinion dynamics, multi-agent modelling}, PAGES = {13}, URL = {https://publications.cnr.it/doc/463554}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {20th International Conference Italian Association for Artificial Intelligence-5th Workshop on Advances in Argumentation in Artificial Intelligence}, CONFERENCE_PLACE = {Milano}, CONFERENCE_DATE = {29/11/2021}, BOOKTITLE = {Advances in Argumentation in Artificial Intelligence 2021}, } @INPROCEEDINGS{PUCCETTI_2021_INPROCEEDINGS_PMD_454440, AUTHOR = {Puccetti, G. and Miaschi, A. and Dell'Orletta, F.}, TITLE = {How Do BERT Embeddings Organize Linguistic Knowledge?}, YEAR = {2021}, ABSTRACT = {Several studies investigated the linguistic information implicitly encoded in Neural Language Models. Most of these works focused on quantifying the amount and type of information available within their internal representations and across their layers. In line with this scenario, we proposed a different study, based on Lasso regression, aimed at understanding how the information encoded by BERT sentence-level representations is arrange within its hidden units. Using a suite of several probing tasks, we showed the existence of a relationship between the implicit knowledge learned by the model and the number of individual units involved in the encodings of this competence. Moreover, we found that it is possible to identify groups of hidden units more relevant for specific linguistic properties.}, KEYWORDS = {nlp, interpretability, deep learning}, PAGES = {48-57}, URL = {https://www.aclweb.org/anthology/2021.deelio-1.6}, ISBN = {978-1-954085-30-5}, CONFERENCE_NAME = {2nd Workshop on Knowledge Extraction and Integrationfor Deep Learning Architectures}, CONFERENCE_DATE = {10/06/2021}, } @INPROCEEDINGS{RICCUCCI_2021_INPROCEEDINGS_RDVC_472300, AUTHOR = {Riccucci, M. and Del Grosso, A. M. and Valecchi, F. and Causarano, G.}, TITLE = {Testimoniare il Lager: l'informatica al servizio della memoria}, YEAR = {2021}, ABSTRACT = {Il contributo illustra il lavoro condotto in seno al progetto di ricerca Voci dall'inferno: le parole per dirlo, condotto e coordinato da Marina Riccucci (Università di Pisa) con il supporto del Dott. Angelo Mario Del Grosso (Università di Pisa) e della Prof.ssa Frida Valecchi. Nel caso specifico sarà dedicata particolare attenzione alla rappresentazione digitale XML-TEI di una testimonianza manoscritta inedita di una donna sopravvissuta alla deportazione in Lager, nonché allo sviluppo di un componente innovativo per l'elaborazione dei documenti digitali, integrato nell'applicazione web Memoriarchivio - software creato specificamente per Voci dall'inferno e che rappresenta un fondamentale strumento per archiviare e analizzare le risorse testuali oggetto d'analisi.}, KEYWORDS = {italianistica digitale, codifica del testo, testimonianze, lager, lessico dantesco, XML-TEI, Saxon-js}, PAGES = {567-572}, URL = {http://amsacta.unibo.it/6712/1/AIUCD2021_BOA-versione3A.pdf#page=583}, VOLUME = {aiucd2021}, DOI = {10.6092/unibo/amsacta/6712}, ISBN = {9788894253559}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale-DHs for society: e-quality, participation, rights and values in the Digital Age}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {19/01/2021-22/01/2021}, BOOKTITLE = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale-DHs for society: e-quality, participation, rights and values in the Digital Age. Book of extended abstracts of the 10th national conference}, } @INPROCEEDINGS{SARTI_2021_INPROCEEDINGS_SBD_464972, AUTHOR = {Sarti, G. and Brunato, D. and Dell'Orletta, F.}, TITLE = {That Looks Hard: Characterizing Linguistic Complexity in Humans and Language Models}, YEAR = {2021}, ABSTRACT = {This paper investigates the relationship between two complementary perspectives in the human assessment of sentence complexity and how they are modeled in a neural language model (NLM). The first perspective takes into account multiple online behavioral metrics obtained from eye-tracking recordings. The second one concerns the offline perception of complexity measured by explicit human judgments. Using a broad spectrum of linguistic features modeling lexical, morpho-syntactic, and syntactic properties of sentences, we perform a comprehensive analysis of linguistic phenomena associated with the two complexity viewpoints and report similarities and differences. We then show the effectiveness of linguistic features when explicitly leveraged by a regression model for predicting sentence complexity and compare its results with the ones obtained by a fine-tuned neural language model. We finally probe the NLM's linguistic competence before and after fine-tuning, highlighting how linguistic information encoded in representations changes when the model learns to predict complexity.}, KEYWORDS = {linguistic complexity, eyetracking, human evaluation}, PAGES = {48-60}, URL = {https://aclanthology.org/2021.cmcl-1.5}, DOI = {10.18653/v1/2021.cmcl-1.5}, ISBN = {978-1-954085-35-0}, CONFERENCE_NAME = {Proceedings of Workshop on Cognitive Modeling and Computational Linguistics (CMCL 2021)}, CONFERENCE_DATE = {10/06/2021}, } @INPROCEEDINGS{SASSOLINI_2021_INPROCEEDINGS_SBDGM_455303, AUTHOR = {Sassolini, E. and Biffi, M. and De Blasi, F. and Guadagnini, E. and Montemagni, S.}, TITLE = {La digitalizzazione del GDLI: un approccio linguistico per la corretta acquisizione del testo?}, YEAR = {2021}, ABSTRACT = {In questo articolo sono discussi metodi e strategie in via di elaborazione per la correzione (propedeutica alla successiva strutturazione) dei contenuti del Grande dizionario della lingua italiana (GDLI) fondato da Salvatore Battaglia, estratti da un formato digitale non standard. La presenza, in questo formato, di errori distribuiti di vario tipo ha condizionato la scelta dell'approccio all'estrazione e messo in luce tutte le difficoltà dell'operazione. Le sperimentazioni fatte sino a oggi portano a privilegiare una strategia di correzione multilivello, che procede scomponendo in sezioni distinte l'individuazione e la correzione degli errori, in modo da rendere gestibili interventi complessi di correzione semi-automatica, altrimenti improponibili, e consentire un loro raffinamento progressivo. Parallelamente alla definizione di regole di riconoscimento di struttura e formato, stiamo analizzando metodi e procedure in grado di migliorare la qualità dell'input e specializzare i moduli di estrazione per i singoli campi della voce a partire dal "lemma". Le finalità del lavoro sono duplici: l'estrazione e strutturazione dei contenuti e la produzione di un formato standard di rappresentazione dei dati. Si tratta di un percorso difficile perché il formato dei dati rende l'uso di strumenti reperibili in letteratura non applicabile. Solamente al termine del lavoro potremo capire se esistono le condizioni per trasformare l'approccio adottato in un protocollo di intervento replicabile.}, KEYWORDS = {dizionari digitali, risorse linguistiche, estrazione dell'informazione, correzione del testo post OCR}, PAGES = {159-166}, URL = {https://aiucd2021.labcd.unipi.it/wp-content/uploads/2021/05/AIUCD2021_BOA-versione3A.pdf}, DOI = {10.6092/unibo/amsacta/6712}, ISBN = {9788894253559}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {19-22/01/2021}, } @INPROCEEDINGS{TAXITARI_2021_INPROCEEDINGS_TCFMNP_441870, AUTHOR = {Taxitari, L. and Cappa, C. and Ferro, M. and Marzi, C. and Nadalini, A. and Pirrelli, V.}, TITLE = {Using mobile technology for reading assessment}, YEAR = {2021}, ABSTRACT = {The enormous potential of Information and Communication Technologies (ICT) for addressing critical educational issues is generally acknowledged, but its use in the assessment of the complex skills of reading and understanding a text has been very limited to date. The paper contrasts traditional reading assessment protocols with ReadLet, an ICT platform with a tablet front-end, designed to support online monitoring of silent and oral reading abilities in early graders. ReadLet makes use of cloud computing and mobile technology for large-scale data collection and allows the time alignment of the child's reading behaviour with texts tagged using Natural Language Processing (NLP) tools. Initial findings replicate established benchmarks from the psycholinguistic literature on reading in both typically and atypically developing children, making the application a new ground-breaking approach in the evaluation of reading skills. Index Terms--reading assessment, reading research, mobile technology, NLP, cloud computing, special education needs.}, KEYWORDS = {reading assessment, reading research, mobile technology, NLP, cloud computing, special education needs}, PAGES = {1-6}, URL = {http://www.ieee.ma/cist20/component/content/?id=26\&Itemid=185}, ISBN = {9781728166469}, CONFERENCE_NAME = {6th IEEE Congress on Information Science \& Technology (IEEE CIST'20)}, CONFERENCE_PLACE = {online}, CONFERENCE_DATE = {05/06/2021}, } @INPROCEEDINGS{VAGIONAKIS_2021_INPROCEEDINGS_VDBBDMM_461540, AUTHOR = {Vagionakis, I. and Del Gratta, R. and Boschetti, F. and Baroni, P. and Del Grosso, A. M. and Mancinelli, T. and Monachini, M.}, TITLE = {'Cretan Institutional Inscriptions' Meets CLARIN-IT}, YEAR = {2021}, ABSTRACT = {This paper describes a project in the domain of Digital Epigraphy named 'Cretan Institutional Inscriptions' and developed at the Ca' Foscari University of Venice. The project is supported by CLARIN-IT as part of the actions addressed to initiatives, projects and events in the field of Humanities and Social Sciences. The main goal is to make the project visible through CLARIN channels with the hope that it will be a forerunner for other digital epigraphy projects in CLARIN. The article illustrates also the dockerization process applied to the 'Cretan Institutional Inscriptions' project, currently hosted on the CLARIN-IT servers.}, KEYWORDS = {Digital Epigraphy, Digital Classics, Ancient Greek, Crete, Institutions, Text Encoding Initiative, TEI, EpiDoc, EpiDoc Front-End Services, EFES, Virtual Language Observatory, Dockerization, ILC4CLARIN, CLARIN-IT, CLARIN}, PAGES = {48-53}, URL = {https://office.clarin.eu/v/CE-2021-1923-CLARIN2021_ConferenceProceedings.pdf}, CONFERENCE_NAME = {CLARIN Annual Conference 2021}, CONFERENCE_PLACE = {Virtual Edition}, CONFERENCE_DATE = {27-29/09/2021}, BOOKTITLE = {Proceedings of CLARIN Annual Conference 2021 (Virtual Edition)}, EDITOR = {Monachini, M. and Eskevich, M.}, } @INPROCEEDINGS{AIOLA_2021_INPROCEEDINGS_AADL_461479, AUTHOR = {Aiola, C. and Andreini, G. and Di Donato, F. and Lombardo, T.}, TITLE = {Sharing Knowledge Digitally. The Muruca case study}, YEAR = {2021}, ABSTRACT = {In this poster we present the evolution of Muruca, a platform that has been conceived as a framework to allow Digital Humanities researchers and research teams to create, curate and share their own Digital Editions. The poster describes the main features of the framework, its evolution over the years in synergy with the researchers' needs and with the progress of technologies and policies, and the open challenges and opportunities of the framework in a European context.}, KEYWORDS = {open culture, digital humanities, digital libraries, digital editions, electronic objects}, PAGES = {428-431}, URL = {https://aiucd2021.labcd.unipi.it/wp-content/uploads/2021/01/27_Aiola-AIUCD2021_presentation_46.pdf}, DOI = {10.6092/unibo/amsacta/6712}, ISBN = {9788894253559}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: eguaglianza, partecipazione, diritti e valori nell'era digitale}, CONFERENCE_PLACE = {on-line}, CONFERENCE_DATE = {19-22/01/2021}, } @INPROCEEDINGS{DELGROSSO_2021_INPROCEEDINGS_D_484396, AUTHOR = {Del Grosso, A. M.}, TITLE = {How to leverage Domain-Driven Design to foster Digital Scholarly Editing and DSL}, YEAR = {2021}, ABSTRACT = {The field of digital scholarly editing is deemed to be particularly challenging from a software engineering point of view. Indeed, after decades of research activities, computational philology still lacks effective tools and efficient procedures organized in common interfaces, decoupled application services and domain-specific commodities. As a matter of fact, there is neither convergence on how to model software applications to meet philological requirements nor traditional philologists fully satisfied with the current digital solutions. By adopting the Domain Driven Design approach, we start our modeling activities by defining the problem space of our domain which, in turn, is broken into small components (called sub-domains) to progressively refine the ongoing digital models. In this way, together with the experts, we are able to identify the main capabilities of the field we want to model and strive to design coherent domain-specific solutions for such capabilities (bounded contexts).}, KEYWORDS = {Digital Humanities, Domain-Driven Design, Digital Philology, Euporia, Domain-Specific Languages}, URL = {https://easychair.org/cfp/EUPORIA2021}, CONFERENCE_NAME = {EUPORIA2021: Webinars in Theories and Practices of the Annotation through Domain-Specific Languages}, CONFERENCE_PLACE = {online}, CONFERENCE_DATE = {15/03/2021}, } @INPROCEEDINGS{DELGROSSO_2021_INPROCEEDINGS_D_484397, AUTHOR = {Del Grosso, A. M.}, TITLE = {La codifica testuale delle testimonianze inedite, manoscritte, audio e video}, YEAR = {2021}, ABSTRACT = {Il contributo intende illustrare gli aspetti più significativi della rappresentazione digitale delle testimonianze sia scritte sia orali, inquadrando la proposta in una prospettiva infrastrutturale e di sostenibilità delle risorse digitali prodotte.}, KEYWORDS = {Digital Humanities, Informatica Umanistica, Archivi Digitali, Voci dall'Inferno, XML/TEI}, URL = {https://www.unipi.it/index.php/archivio/event/6081-voci-dall-inferno-le-testimonianze-dei-sopravvissuti-ai-lager}, CONFERENCE_NAME = {Voci dall'Inferno. Le testimonianze dei sopravvissuti ai Lager}, CONFERENCE_PLACE = {Pisa, Palazzo "La Sapienza"}, CONFERENCE_DATE = {10/12/2021}, } @INPROCEEDINGS{DELGROSSO_2021_INPROCEEDINGS_DT_484390, AUTHOR = {Del Grosso, A. M. and Tessarolo, L.}, TITLE = {Lavori in corso: trasferimento di competenze per una re-ingegnerizzazione del sistema la "galassia MQDQ"}, YEAR = {2021}, ABSTRACT = {L'intervento illustra le attività in corso presso il centro VeDPH dell'Università Ca' Foscari di Venezia informate dal progetto di visiting scholar tra il dipartimento di Scienze Umane e l'Istituto di Linguistica Computazionale CNR-ILC volto alla reingegnerizzazione della tecnologia su cui poggiano i progetti della galassia Musisque Deoque (MQDQ).}, KEYWORDS = {Digital Humanties, Archivi Digitali Latini, MQDQ, XML/TEI, Filologia Collaborativa}, URL = {https://publications.cnr.it/doc/484390}, CONFERENCE_NAME = {Biblioteche digitali di testi latini in Italia. Per un progetto di archivio comune}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {28 giugno 2021}, } @INPROCEEDINGS{ESKEVICH_2021_INPROCEEDINGS_EF_455136, AUTHOR = {Eskevich, M. and Frontini, F.}, TITLE = {SSHOC'ing drama in the cloud}, YEAR = {2021}, ABSTRACT = {At LIBER 2021 Online Conference, CLARIN and SSHOC presented a webinar showcasing how SSH researchers can benefit from the resources and services offered by SSH research infrastructures in order to produce and exploit highly encoded historical textual data. After the webinar, the participants were able to successfully guide and advise SSH researchers (with a particular focus on literature studies) in their choice amongst existing resources and tools, based on their research question.}, KEYWORDS = {CLARIN, infrastrutture, scienze umane e sociali}, URL = {https://zenodo.org/record/5082522#.YOgETBMzb0s}, CONFERENCE_NAME = {LIBER annual conference}, CONFERENCE_PLACE = {virtual event}, CONFERENCE_DATE = {08/07/2021}, } @INPROCEEDINGS{FRONTINI_2021_INPROCEEDINGS_FK_443609, AUTHOR = {Frontini, F. and Khan, A. F.}, TITLE = {Di cosa parliamo quando parliamo di FAIR?}, YEAR = {2021}, ABSTRACT = {Nel 2016 un consorzio di scienziati afferenti a diverse istituzioni e discipline enuncia i principi FAIR; in questi quattro anni l'importanza e la portata del programma FAIR è divenuta sempre più evidente. L'adesione a tali principi nelle discipline umanistiche sembra farsi largo, ma non senza difficoltà e interrogativi. Questo lavoro propone una riflessione sulle implicazioni della proposta FAIR per la gestione dei dati scientifici, confrontandola con la sua effettiva ricezione nella comunità delle DH in Italia e in Europa.}, KEYWORDS = {Principi FAIR, Open Data, dati della ricerca, politiche della ricerca, EOSC}, PAGES = {19-24}, URL = {https://aiucd2021.labcd.unipi.it/en/book-of-abstracts-conference/}, ISBN = {9788894253559}, CONFERENCE_NAME = {AIUCD 2021-DH per la società: e-guaglianza, partecipazione, diritti e valori nell'era digitale}, CONFERENCE_DATE = {19-22/01/2021}, BOOKTITLE = {AIUCD 2021-DH per la società: e-guaglianza, par-tecipazione, diritti e valori nell'era digitale. Raccolta degli abstract estesi della 10a conferenza nazionale, Pisa, 2021}, EDITOR = {Del Grosso, A. M. and Boschetti, F. and Salvatori, E.}, } @INPROCEEDINGS{MARINETTI_2021_INPROCEEDINGS_MMQBBDPRS_461529, AUTHOR = {Marinetti, A. and Murano, F. and Quochi, V. and Ballerini, M. and Boschetti, F. and Del Grosso, A. M. and Piccini, S. and Rigobianco, L. and Solinas, P.}, TITLE = {Languages and Cultures of Ancient Italy. Historical Linguistics and Digital Models}, YEAR = {2021}, ABSTRACT = {The abstract accompanies a poster presenting an overview of the project "Languages and cultures of Ancient Italy", which had just started. The project brings together competences from Historical Linguistics, Computational Lexicography and Digital Humanities. The main objective of the project is to investigate the cultures of ancient Italy on the basis of theirlinguistic documentation (7th - 1stc. B.C.) by means of digital tools specifically tailored for their peculiarities.}, KEYWORDS = {digital epigraphy, computational lexicons, text-lexicon linking, restsprachen, digital models, digital humanities}, PAGES = {528-532}, URL = {https://aiucd2021.labcd.unipi.it/en/book-of-abstracts-conference/}, CONFERENCE_NAME = {10th National Conference of Associazione per l'Informatica Umanistica e la Cultura Digitale}, CONFERENCE_PLACE = {Pisa (Virtuale)}, CONFERENCE_DATE = {19-22 gennaio 2021}, } @INPROCEEDINGS{MARZI_2021_INPROCEEDINGS_MTFNP_445743, AUTHOR = {Marzi, C. and Taxitari, L. and Ferro, M. and Nadalini, A. and Pirrelli, V.}, TITLE = {Valutare la lettura "in tempo reale": un esempio di integrazione tra linguistica computazionale e linguistica applicata}, YEAR = {2021}, ABSTRACT = {In anni recenti, linguistica computazionale e linguistica applicata hanno ampliato i loro rispettivi ambiti d'indagine, utilizzando l'ontologia formale della linguistica teorica e i modelli cognitivi della psicolinguistica per studiare le difficoltà che i parlanti incontrano nello svolgimento di "compiti" linguistici specifici. Nell'ambito della lettura, le tecnologie per il Trattamento Automatico del Linguaggio (TAL) si sono dimostrate capaci di classificare il livello di leggibilità di un testo, basandosi sulla distribuzione di alcuni parametri linguistici in testi pre-classificati per età dei lettori destinatari, o per grado di scolarità, o per livello di sviluppo cognitivo. Ad esempio, parole o frasi più lunghe, o parole più rare tendono a distribuirsi in testi di più difficile comprensione, o destinati a lettori più maturi. E' possibile così assegnare a un testo, o a ogni singola frase, un punteggio di leggibilità in funzione (inversa) della complessità lessicale, morfologica, sintattica o pragmatica dell'unità testuale analizzata. In Linguistica Applicata (LA) la valutazione della difficoltà di lettura ha seguito un approccio funzionale. Nel modello semplice di lettura, ad esempio, la capacità di leggere un testo è analizzata come il prodotto dell'interazione tra decodifica e comprensione. Attraverso l'osservazione di un campione di bambini impegnati nella lettura, è possibile valutare la loro fluenza in decodifica, gli errori di decodifica e comprensione, e l'efficacia di percorsi educativi personalizzati. La piattaforma ReadLet è stata sviluppata con l'obiettivo di integrare l'approccio classificatorio del TAL con quello funzionale della LA. Il bambino legge un breve testo visualizzato sullo schermo di un tablet, ad alta voce o in modalità silente. In entrambi i casi, al bambino viene chiesto di "tenere il segno" con il dito sullo schermo nel corso della lettura. La traccia tattile è registrata e allineata con il testo visualizzato sullo schermo mediante un algoritmo di convoluzione. Al contempo, il testo è annotato automaticamente per tratti linguistici. Alla fine della sessione di lettura silente, il bambino risponde ad alcune semplici domande sul contenuto del testo. I dati raccolti consentono di valutare le difficoltà (rallentamenti o errori) che il bambino incontra nella lettura, e di mettere in relazione "in tempo reale" queste difficoltà con aspetti linguistici specifici del testo. Un'analisi preliminare dei dati raccolti da ReadLet su oltre 400 allievi di alcune scuole elementari toscane e della Svizzera italiana, ha evidenziato il differente "passo" di lettura tra lettori con sviluppo tipico e atipico, e il peso che variabili come lunghezza, frequenza e lessicalità hanno su profili di lettura individuali e aggregati. La possibilità di "controllare" automaticamente la distribuzione di queste variabili nel testo e di correlarle con le difficoltà del singolo bambino consente, infine, di somministrare testi con livelli di difficoltà gradualmente crescenti, rendendo possibili percorsi personalizzati di potenziamento.}, KEYWORDS = {reading assessment, reading strategies, NLP, ICT mobile technologies}, PAGES = {5-5}, URL = {https://publications.cnr.it/doc/445743}, VOLUME = {2021}, CONFERENCE_NAME = {XXI Congresso Internazionale di AItLA}, CONFERENCE_PLACE = {Bergamo (I)}, CONFERENCE_DATE = {11-12/02/2021}, BOOKTITLE = {FARE LINGUISTICA APPLICATA CON LE DIGITAL HUMANITIES}, } @INPROCEEDINGS{ZENZARO_2021_INPROCEEDINGS_ZBD_472303, AUTHOR = {Zenzaro, S. and Boschetti, F. and Del Grosso, A. M.}, TITLE = {Domain Specific Languages on editing papyri: the GreekSchools case study}, YEAR = {2021}, ABSTRACT = {Within the ERC AdG 885222-GreekSchools we aim to manage the edit of multiple papyrologic texts: diplomatic and literary editions and the corresponding apparatuses and their translations. To endow scholars with automatic consistency and coherence of editorial choices and to support the whole editing process, we leverage Domain Specific Languages (DSLs): a formal language definition in a bounded domain. Digital text editing can be handled in multiple ways depending on the editorial purpose. We identify four possible editing approaches to digital textual scholarship: (1) word processor; (2) structured text (e.g. XML); (3) GUI-centric; (4) domain specific language (DSL). Each of them has pros and cons. In particular we analyse five dimensions: familiarity, compactness, completeness, data elaboration support, and the need of technical training. With familiarity we refer to how much the scholar can avoid shifting his established working paradigm/environment. Compactness is the ratio between quantity of information and formalisation size. Completeness refers to the information the content represents. The capability to extract or deduce information from the data is addressed by data elaboration support. Finally, we consider important to evaluate the amount of technical training for text editing. For example, structured texts grant completeness of information, while requiring extensive technical training. In this context only the DSL approach encompasses all these dimensions while the other approaches compromises on some of them. We propose a DSL-based editor that will support and improve the editing workflow in the context of the ERC project.}, KEYWORDS = {Digital Humanities, Digital Papyrology, GreekSchools, Computational philology}, URL = {https://www.clarin.eu/content/clarin-bazaar-2021#data-curation-using-nlp}, CONFERENCE_NAME = {CLARIN Annual Conference 2021}, CONFERENCE_DATE = {27-29/09/2021}, } @TECHREPORT{ALBANESI_2021_TECHREPORT_ABCPGS_463868, AUTHOR = {Albanesi, D. and Bellandi, A. and Colombo, M. and Papini, M. and Giovannetti, E. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 19}, YEAR = {2021}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo novembre 2020 - aprile 2021. Le principali attività tecniche svolte sul sistema Traduco sono state la risoluzione di bug, l'implementazione di nuove funzionalità richieste e lo sviluppo di una nuova interfaccia grafica utente. Le attività di ricerca sono state condotte, in continuità a quelle descritte nel rapporto precedente, nella rappresentazione del lessico, della terminologia e della conoscenza del Talmud.}, KEYWORDS = {Linguistica Computazionale, Traduzione di Testi Religiosi, Traduzione Assistita dal Calcolatore, Traduzione Collaborativa, Lessici elettronici, rappresentazione della conoscenza}, PAGES = {28}, URL = {https://publications.cnr.it/doc/463868}, } @TECHREPORT{ALBANESI_2021_TECHREPORT_ABCPGS_463870, AUTHOR = {Albanesi, D. and Bellandi, A. and Colombo, M. and Papini, M. and Giovannetti, E. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-Rapporto tecnico 20}, YEAR = {2021}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto del Progetto Traduzione Talmud Babilonese nel periodo maggio 2021 - ottobre 2021. Le principali attività tecniche svolte sul sistema Traduco sono state la risoluzione di bug e l'implementazione di nuove funzionalità richieste. Parallelamente, sono proseguite le attività volte alla visualizzazione di risorse testuali e linguistiche tramite grafi e per la conversione e l'utilizzo del lessico computazionale PSC a supporto di ricerca "full-text" sul testo talmudico italiano.}, KEYWORDS = {Traduzione Assistita dal Calcolatore, Traduzione Collaborativa, Lessici elettronici, rappresentazione della conoscenza, Linguistica Computazionale, traduzione di testi religiosi}, PAGES = {20}, URL = {https://publications.cnr.it/doc/463870}, } @TECHREPORT{ALBANESI_2021_TECHREPORT_AGPS_463871, AUTHOR = {Albanesi, D. and Giovannetti, E. and Papini, M. and Sciolette, F.}, TITLE = {Traduco: l'Applicazione Web Linguistico-Computazionale per il Progetto di Traduzione del Talmud Babilonese-rapporto integrativo 3}, YEAR = {2021}, ABSTRACT = {La presente relazione tecnico-scientifica descrive l'attività svolta da ILC-CNR nel contesto delle attività di progetto previste dalla convenzione integrativa stipulata tra PTTB e ILC-CNR in data 10/07/2018 e condotte nel periodo febbraio 2020 - gennaio 2021.}, KEYWORDS = {Linguistica Computazionale, Traduzione di Testi Religiosi, Traduzione Assistita dal Calcolatore, Traduzione Collaborativa, Sviluppo front-end, lessici computazionali}, PAGES = {17}, URL = {https://publications.cnr.it/doc/463871}, } @TECHREPORT{DIDONATO_2021_TECHREPORT_DDLKOTCCEMDT_459083, AUTHOR = {Di Donato, F. and Dumouchel, S. and Lombardo, T. and Katsaloulis, I. and Ocansey, J. T. and Thiel, C. and Capelli, L. and Chen, Y. and Eskevich, M. and Moranville, Y. and De Santis, L. and Tóth Czifra, E.}, TITLE = {TRIPLE Deliverable: D6. 2 Report on Procedure to Follow to Be Part of the EOSC Catalogue}, YEAR = {2021}, ABSTRACT = {The 6.2 Deliverable presents the procedure to onboard the future GoTriple platform into the EOSC catalogue. This deliverable is supposed to guide the TRIPLE consortium in the purpose of adding a SSH discovery platform to the EOSC catalogue but it can also guide other service providers for their own purposes, especially services that are made with different components. Part 1 of this deliverable provides an overview of the GoTriple platform and the five innovative services that are integrated into it, e.g. ScaR, MEOH App, Visualisation components, Pundit, and Head Start. As those innovative services are independent tools, the perspectives, challenges and potential solutions of their onboarding into the EOSC catalogue are discussed in detail on a case-by-case basis. Part 2 contains the core information of this deliverable. First, an overview of the three main steps in the onboarding process (2.1.) provides the overall context of the task. Second, the timeline of all needed steps (already taken and planned closer to the end of the project) to define the final federation that will serve as GoTriple provider is outlined (2.2). Essentially, all project partners that are committed to continue their support of the developed service after the project will be identified as GoTriple providers in the EOSC portal, while the GoTriple discovery service will be part of the OPERAS Research Infrastructure's catalogue. Third, the details of the resource profile that are mandatory and optional, are listed according to the set of fields in the EOSC portal. Part 3 summarises the two main steps in the roadmap for integration which are planned to take place in September 2021 and March 2023. In conclusion, the authors highlight the fact that the EOSC development is an ongoing process, and therefore, the current report reflects the procedures and planning steps that are valid at this point of time and fit the current requirements.}, KEYWORDS = {EOSC, GoTRIPLE}, URL = {https://zenodo.org/record/5702705#.YZYgApDMKHs}, } @TECHREPORT{FRONTINI_2021_TECHREPORT_FGM_463461, AUTHOR = {Frontini, F. and Gamba, F. and Monachini, M.}, TITLE = {D3. 9 Report on Ontology and Vocabulary Collection and Publication}, YEAR = {2021}, ABSTRACT = {This deliverable pertains to SSHOC Task 3.1 which was responsible for investigating and providing resources and tools to support the multilingual aspects of the future pan-EU SSH infrastructure. Making data and services accessible and usable in SSH is very much also a matter of providing relevant translations, translation of metadata concepts, multilingual vocabularies, terminology extraction across languages, multilingual databases. The deliverable offers a detailed report on the gathering and translation of relevant SSH metadata, ontologies and vocabularies for the use-cases indicated in the task's topics: multilingual metadata concepts and vocabularies, the multilingual occupation ontology, with cross-country female occupational titles. In accordance with SSHOC and the EOSC FAIR recommendations and requirements, the metadata vocabularies and ontologies have been published via several different formats and facilities. Section 1. The introduction sets the landscape and describes the need of multilingual vocabularies both for classification and discovery in the context of a cloud-based infrastructure that will offer access to research data and related services adapted to the needs of the SSH community. Section 2. "Multilingual metadata" investigates the possibility to use and test Natural Language Processing (NLP) approaches and Machine Translation (MT) to make the metadata more accessible using national languages other than English. A selected case study was the recommended metadata set of the CLARIN Concept Registry (CCR): the whole set of metadata and definitions were translated into French, Greek, and Italian. The section describes the machine-translation and evaluation process, also comparing different technologies. Section 3. "Multilingual vocabularies and ontologies" introduces two other typical case-studies. The first one addresses one of the pressing needs in social sciences research. Many surveys, indeed, ask respondents to specify their occupation and the occupational ontology is used for the survey questions. For many languages the occupational titles for males and females are not identical. In section 3.1 the enrichment of the occupational ontology with lists for male and female titles, is described for many languages, namely for Dutch, German, Slovenian and French. The second case study focuses on the automatic extraction of terminology from texts: a list of domain- specific terms was automatically extracted from a corpus of Data Curation and Stewardship, validated by domain experts, automatically translated into multiple languages (Dutch, French, German, Greek, Italian, Slovenian) and linked to other existing terminologies. Section 4. describes the SKOS-ification and publication process of the results, together with the challenges posed by multilinguality. Section 5. offers an overview of the exploitation and sustainability of the results and how these are made available to the community. Finally the Conclusions provide some reflections on Machine Translation approaches adopted for translating the vocabularies into multiple languages, the advantages in terms of time saving and some first recommendations to the community.}, KEYWORDS = {Terminologies, Infrastructures, Social Sciences and Humanities, Data Curation, Data Stewardship, vocabularies, Translations, Metadata}, URL = {https://doi.org/10.5281/zenodo.5913485}, } @TECHREPORT{LAZZERI_2021_TECHREPORT_LTPAABBCCCCCCCDDDFFFGGGKLLMNOOPPPPRRRSSSSTTVVZ_466613, AUTHOR = {Lazzeri, E. and Tanlongo, F. and Pavone, G. and Alpi, F. and Ansuini, A. and Bertazzon, E. and Bonaccorsi, D. and Cappelluti, F. and Casati, S. and Castelli, D. and Cippitani, R. and Colcelli, V. and Costantini, A. and Cozzini, S. and Degl'Innocenti, E. and Di Donato, F. and Di Giorgio, S. and Fava, I. and Fiore, S. and Forni, M. and Galimberti, G. and Giglia, E. and Giorgetti, A. and Kurapati, S. and Landoni, M. and Lavitrano, M. and Marras, C. and Niccolucci, F. and Occioni, M. and Osmenaj, E. and Paolini, G. and Pasquale, V. and Petrillo, C. and Pugliese, R. and Ripepi, E. and Rivoira, G. and Rossi, G. and Salon, S. and Sarretta, A. and Sartori, A. and Spiga, D. and Tamagno, D. and Tammaro, A. M. and Vellico, M. and Vignocchi, M. and Zane, D.}, TITLE = {Competence Centre ICDI per Open Science, FAIR, ed EOSC-Mission, strategia e piano d'azione}, YEAR = {2021}, ABSTRACT = {This document presents the mission and strategy of the Italian Competence Centre on Open Science, FAIR, and EOSC. The Competence Centre is an initiative born within the Italian Computing and Data Infrastructure (ICDI), a forum created by representatives of major Italian Research Infrastructures and e-Infrastructures, with the aim of promoting sinergies at the national level, and optimising the Italian participation to European and global challenges in this field, including the European Open Science Cloud (EOSC), the European Data Infrastructure (EDI) and HPC. This working paper depicts the mission and objectives of the ICDI Competence Centre, a network of experts with various skills and competences that are supporting the national stakeholders on topics related to Open Science, FAIR principles application and participation to the EOSC. The different actors and roles are described in the document as well as the activities and services offered, and the added value each stakeholder can find the in Competence Centre. The tools and services provided, in particular the concept for the portal, though which the Centre will connect to the national landscape and users, are also presented. An english translation of this document is provided in Zenodo: Lazzeri, Emma, et all. (2021). ICDI Competence Centre for Open Science, FAIR and EOSC - Mission, Strategy and Action Plan. Zenodo. https://doi.org/10.5281/zenodo.5512638}, KEYWORDS = {EOSC, Competence Centre, ICDI, Open Science, FAIR, EDI, HPC}, PAGES = {13}, URL = {https://doi.org/10.32079/ISTI-TR-2022/022}, } @TECHREPORT{LAZZERI_2021_TECHREPORT_LTPAABBCCCCCCCDDDFFFGGGKLLMNOOPPPPRRRSSSSTTVVZ_466609, AUTHOR = {Lazzeri, E. and Tanlongo, F. and Pavone, G. and Alpi, F. and Ansuini, A. and Bertazzon, E. and Bonaccorsi, D. and Cappelluti, F. and Casati, S. and Castelli, D. and Cippitani, R. and Colcelli, V. and Costantini, A. and Cozzini, S. and Degl'Innocenti, E. and Di Donato, F. and Di Giorgio, S. and Fava, I. and Fiore, S. and Forni, M. and Galimberti, G. and Giglia, E. and Giorgetti, A. and Kurapati, S. and Landoni, M. and Lavitrano, M. and Marras, C. and Niccolucci, F. and Occioni, M. and Osmenaj, E. and Paolini, G. and Pasquale, V. and Petrillo, C. and Pugliese, R. and Ripepi, E. and Rivoira, G. and Rossi, G. and Salon, S. and Sarretta, A. and Sartori, A. and Spiga, D. and Tamagno, D. and Tammaro, A. M. and Vellico, M. and Vignocchi, M. and Zane, D.}, TITLE = {ICDI Competence Centre for Open Science, FAIR and EOSC-Mission, strategy and action plan}, YEAR = {2021}, ABSTRACT = {This document presents the mission and strategy of the Italian Competence Centre on Open Science, FAIR, and EOSC. The Competence Centre is an initiative born within the Italian Computing and Data Infrastructure (ICDI), a forum created by representatives of major Italian Research Infrastructures and e-Infrastructures, with the aim of promoting synergies at the national level, and optimising the Italian participation to European and global challenges in this field, including the European Open Science Cloud (EOSC), the European Data Infrastructure (EDI) and HPC. This working paper depicts the mission and objectives of the ICDI Competence Centre, a network of experts with various skills and competencies that are supporting the national stakeholders on topics related to Open Science, FAIR principles application and participation to the EOSC. The different actors and roles are described in the document as well as the activities and services offered, and the added value each stakeholder can find the in Competence Centre. The tools and services provided, in particular the concept for the portal, through which the Centre will connect to the national landscape and users, are also presented. This record is the English translation of the original Italian (2021). Competence Centre ICDI per Open Science, FAIR, ed EOSC - Mission, Strategia e piano d'azione. Zenodo. https://doi.org/10.5281/zenodo.5071055}, KEYWORDS = {EOSC, Competence Centre, ICDI, Open Science, FAIR, EDI, HPC}, PAGES = {7}, URL = {https://doi.org/10.32079/ISTI-TR-2021/023}, } @TECHREPORT{SAYERS_2021_TECHREPORT_SSHAAABBBCECDDDDDFFGGGGGGHLLJJKKMMMMMNRPSASSSTYBCCLKRP_472131, AUTHOR = {Sayers, D. and Sousa Silva, R. and Höhn, S. and Ahmedi, L. and Allkivi Metsoja, K. and Anastasiou, D. and Beňuš, Š. and Bowker, L. and Bytyçi, E. and Catala, A. and Çepani, A. and Chacón Beltrán, R. and Dadi, S. and Dalipi, F. and Despotovic, V. and Doczekalska, A. and Drude, S. and Fort, K. and Fuchs, R. and Galinski, C. and Galinski, C. and Galinski, C. and Gobbo, F. and Gungor, T. and Guo, S. and Höckner, K. and Láncos, P. and Libal, T. and Jantunen, T. and Jones, D. and Klimova, B. and Korkmaz, E. and Maučec, M. S. and Melo, M. and Meunier, F. and Migge, B. and Mititelu, V. B. and Névéol, A. and Rossi, A. and Pareja Lora, A. and Sanchez Stockhammer, C. and Şahin, A. and Soltan, A. and Soria, C. and Shaikh, S. and Turchi, M. and Yildirim Yayilgan, S. and Bessa, M. and Cabral, L. and Coler, M. and Liebeskind, C. and Kernerman, I. and Rousi, R. and Prys, C.}, TITLE = {The Dawn of the Human-Machine Era: A forecast of new and emerging language technologies}, YEAR = {2021}, ABSTRACT = {New language technologies are coming, thanks to the huge and competing private investment fuelling rapid progress; we can either understand and foresee their effects, or be taken by surprise and spend our time trying to catch up. This report scketches out some transformative new technologies that are likely to fundamentally change our use of language. Some of these may feel unrealistically futuristic or far-fetched, but a central purpose of this report - and the wider LITHME network - is to illustrate that these are mostly just the logical development and maturation of technologies currently in prototype. But will everyone benefit from all these shiny new gadgets? Throughout this report we emphasise a range of groups who will be disadvantaged and issues of inequality. Important issues of security and privacy will accompany new language technologies. A further caution is to re-emphasise the current limitations of AI. Looking ahead, we see many intriguing opportunities and new capabilities, but a range of other uncertainties and inequalities. New devices will enable new ways to talk, to translate, to remember, and to learn. But advances in technology will reproduce existing inequalities among those who cannot afford these devices, among the world's smaller languages, and especially for sign language. Debates over privacy and security will flare and crackle with every new immersive gadget. We will move together into this curious new world with a mix of excitement and apprehension - reacting, debating, sharing and disagreeing as we always do. Plug in, as the human-machine era dawns.}, KEYWORDS = {language technologies, human-machine communication}, URL = {https://doi.org/10.17011/jyx/reports/20210518/1}, } @MISC{ALRAHABI_2021_MISC_ABFPJBKG_453820, AUTHOR = {Alrahabi, M. and Brando, C. and Frontini, F. and Provenier, A. and Jalabert, R. and Bordry, M. and Koskas, C. and Gawley, J.}, TITLE = {Guide d'annotation manuelle d'entités nommées dans des corpus littéraires}, YEAR = {2021}, ABSTRACT = {Guide d'annotation manuelle d'entités nommées dans des corpus littéraires Campagne d'annotation OBVIL 2019-2021}, KEYWORDS = {NER}, URL = {https://hal.archives-ouvertes.fr/hal-03156278}, } @MISC{BARONI_2021_MISC_B_483770, AUTHOR = {Baroni, P.}, TITLE = {DiPText-KC Web Site}, YEAR = {2021}, ABSTRACT = {Sito Web del CLARIN Knowledge Centre for Digital and Public Textual Scholarship, realizzato con WordPress, sviluppato in inglese}, KEYWORDS = {CLARIN, Knowledge Centre, Digital and Public Textual Scholarship}, URL = {https://diptext-kc.clarin-it.it}, } @MISC{BOSCHETTI_2021_MISC_BD_484394, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {Problemi tecnici e questioni teoriche nella gestione degli archivi digitali di testi}, YEAR = {2021}, ABSTRACT = {Il seminario illustra alcune potenzialità offerte dalla rappresentazione digitale di testi d'interesse umanistico. Si prenderà spunto dall'archivio di testi latini conservato e fruibile dalla piattaforma Musisque Deoque per poi passare ad alcuni esempi di indagine condotti mediante il linguaggio di interrogazione XQuery.}, KEYWORDS = {Digital Humanities, Filologia Digitale, Filologia Computazionale, MQDQ, Archivi Digitali di Testi, XML/TEI}, URL = {https://publications.cnr.it/doc/484394}, } @MISC{DELGROSSO_2021_MISC_D_484395, AUTHOR = {Del Grosso, A. M.}, TITLE = {Preparing the XML-TEI text for Euporia}, YEAR = {2021}, ABSTRACT = {Seminario introduttivo alla codifica XML/TEI per la rappresentazione digitale di testi d'interesse storico, letterario e umanistico.}, KEYWORDS = {Digital Humanities, Euporia, Filologia Digitale, XML/TEI, CoPhiLab}, URL = {https://cophilab.ilc.cnr.it/euporia-2021/}, } @MISC{DIDONATO_2021_MISC_DL_465157, AUTHOR = {Di Donato, F. and Lazzeri, E.}, TITLE = {Data Management}, YEAR = {2021}, ABSTRACT = {The Data Management course has been organised by AREA Science Park and tailored to their researchers and support research staff. The course is focused on FAIR data management and introduces to all aspects of Open Science and provides the skills, tools and standards required to embed Open Science in the research workflow. It has been structured in 4 on-line training modules, each one built on 20 mins sub-modules of frontal lessons and on several interactions. The structure of the 4 modules is the following: Module 1: An Introduction to Open Science and Open Data (2hrs) Module 2: Open Science in Horizon Europe, the FAIR principles and Open data (4 hrs) Module 3: Data Management Plans (3hrs) Module 4: Data Management Plans and Metadata schemas (3hrs) Materials of each module start with MX.0. A readme file is associated with each module.}, KEYWORDS = {open science, research data management}, URL = {https://doi.org/10.5281/zenodo.5575096}, } @MISC{DIDONATO_2021_MISC_DL_465158, AUTHOR = {Di Donato, F. and Lazzeri, E.}, TITLE = {Horizon Europe: L'Open Science e la nuova normalita}, YEAR = {2021}, ABSTRACT = {Nel programma quadro Horizon Europe, l'adozione dell'Open Science e delle sue pratiche è un requisito fondamentale. Questo evento, rivolto a ricercatori, tecnologi e personale di supporto alla ricerca, intende fornire gli elementi introduttivi per comprendere come rispondere in modo adeguato alle nuove richieste introdotte dalla Commissione europea. In particolare ci concentreremo sugli elementi fondamentali della comunicazione scientifica, sulla definizione di scienza aperta e dei suoi elementi fondanti, e su come l'Open Science sta cambiando il paradigma di valutazione della ricerca. Programma - Modulo 1.0: Intro e warm-up - Modulo 1.1: La valutazione della ricerca e l'Open Science - Modulo 1.2: Comunicazione scientifica e Open Science - Domande - Modulo 1.3: Open Science e Horizon Europe - Domande - Chiusura lavori}, KEYWORDS = {open science, Horizon Europe, valutazione}, URL = {https://doi.org/10.5281/zenodo.5604541}, } @MISC{ERJAVEC_2021_MISC_EOOLSGRPKBSVDDJHNCDVMLCAFMQVRMBSRDUPBKMDLR_463861, AUTHOR = {Erjavec, T. and Ogrodniczuk, M. and Osenova, P. and Ljubešić, N. and Simov, K. and Grigorova, V. and Rudolf, M. and Pančur, A. and Kopp, M. and Barkarson, S. and Steingrímsson, S. and Van Der Pol, H. and Depoorter, G. and De Does, J. and Jongejan, B. and Haltrup Hansen, D. and Navarretta, C. and Calzada Pérez, M. and De Macedo, L. D. and Van Heusden, R. and Marx, M. and Çöltekin, Ç. and Coole, M. and Agnoloni, T. and Frontini, F. and Montemagni, S. and Quochi, V. and Venturi, G. and Ruisi, M. and Marchetti, C. and Battistoni, R. and Sebők, M. and Ring, O. and Darģis, R. and Utka, A. and Petkevičius, M. and Briedienė, M. and Krilavičius, T. and Morkevičius, V. and Diwersy, S. and Luxardo, G. and Rayson, P.}, TITLE = {Linguistically annotated multilingual comparable corpora of parliamentary debates ParlaMint. ana 2. 1}, YEAR = {2021}, ABSTRACT = {ParlaMint 2.1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (from November 1st 2019), or being "reference" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e.g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e.g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https://clarin-eric.github.io/parla-clarin/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the linguistically marked-up version of the corpus, while the text version is available at http://hdl.handle.net/11356/1432. The ParlaMint.ana linguistic annotation includes tokenization, sentence segmentation, lemmatisation, Universal Dependencies part-of-speech, morphological features, and syntactic dependencies, and the 4-class CoNLL-2003 named entities. Some corpora also have further linguistic annotations, such as PoS tagging or named entities according to language-specific schemes, with their corpus TEI headers giving further details on the annotation vocabularies and tools.}, KEYWORDS = {dibattiti parlamentari, covid-19, ParlaCLARIN, parlamenti, discorso politico, CLARIN, linguistic annotation, pos-tagging, ner, linguistic dependency annotation, UD}, URL = {http://hdl.handle.net/11356/1432}, } @MISC{ERJAVEC_2021_MISC_EOOLSGRPKBSVDDJHNCDVMLCAFMQVRMBSRDUPBKMDLR_463865, AUTHOR = {Erjavec, T. and Ogrodniczuk, M. and Osenova, P. and Ljubešić, N. and Simov, K. and Grigorova, V. and Rudolf, M. and Pančur, A. and Kopp, M. and Barkarson, S. and Steingrímsson, S. and Van Der Pol, H. and Depoorter, G. and De Does, J. and Jongejan, B. and Haltrup Hansen, D. and Navarretta, C. and Calzada Pérez, M. and De Macedo, L. D. and Van Heusden, R. and Marx, M. and Çöltekin, Ç. and Coole, M. and Agnoloni, T. and Frontini, F. and Montemagni, S. and Quochi, V. and Venturi, G. and Ruisi, M. and Marchetti, C. and Battistoni, R. and Sebők, M. and Ring, O. and Darģis, R. and Utka, A. and Petkevičius, M. and Briedienė, M. and Krilavičius, T. and Morkevičius, V. and Diwersy, S. and Luxardo, G. and Rayson, P.}, TITLE = {Multilingual comparable corpora of parliamentary debates ParlaMint 2. 1}, YEAR = {2021}, ABSTRACT = {ParlaMint 2.1 is a multilingual set of 17 comparable corpora containing parliamentary debates mostly starting in 2015 and extending to mid-2020, with each corpus being about 20 million words in size. The sessions in the corpora are marked as belonging to the COVID-19 period (after November 1st 2019), or being "reference" (before that date). The corpora have extensive metadata, including aspects of the parliament; the speakers (name, gender, MP status, party affiliation, party coalition/opposition); are structured into time-stamped terms, sessions and meetings; with speeches being marked by the speaker and their role (e.g. chair, regular speaker). The speeches also contain marked-up transcriber comments, such as gaps in the transcription, interruptions, applause, etc. Note that some corpora have further information, e.g. the year of birth of the speakers, links to their Wikipedia articles, their membership in various committees, etc. The corpora are encoded according to the Parla-CLARIN TEI recommendation (https://clarin-eric.github.io/parla-clarin/), but have been validated against the compatible, but much stricter ParlaMint schemas. This entry contains the ParlaMint TEI-encoded corpora with the derived plain text version of the corpus along with TSV metadata on the speeches. Also included is the 2.0 release of the data and scripts available at the GitHub repository of the ParlaMint project. Note that there also exists the linguistically marked-up version of the corpus, which is available at http://hdl.handle.net/11356/1431.}, KEYWORDS = {dibattiti parlamentari, covid-19, discorso politico, CLARIN, parlamenti, ParlaCLARIN}, URL = {http://hdl.handle.net/11356/1431}, } @MISC{FRONTINI_2021_MISC_FGMB_463503, AUTHOR = {Frontini, F. and Gamba, F. and Monachini, M. and Broeder, D.}, TITLE = {SSHOC Multilingual Data Stewardship Terminology}, YEAR = {2021}, ABSTRACT = {The SSHOC Multilingual Data Stewardship Terminology is a multilingual terminology that collects terms specific to the domain of Data Stewardship, as well as their definitions. A list of domain-specific terms was automatically extracted from a corpus pertaining to the domain of Data Stewardship and Curation, validated by domain experts, assigned a definition, and linked to other existing terminologies (Loterre Open Science Thesaurus, terms4FAIRskills, Linked Open Vocabularies, ISO terms and definitions). Each term-definition pair was then automatically translated into multiple languages (Dutch, French, German, Greek, Italian, Slovenian) by employing Deep-L. The Multilingual Data Stewardship Terminology thus consists of 210 concepts available in Dutch, French, German, Greek, Italian, Slovenian. This resource was created within the frame of the SSHOC (Social Sciences and Humanities Open Cloud) project (H2020-INFRAEOSC-2018-2-823782). It is the result of the work of Task 3.1.2 "extraction of terminology from technical documentation about standards and interoperability", as described in D3.9, carried out jointly by ILC-CNR and CLARIN ERIC.}, KEYWORDS = {terminology, data stewardship}, URL = {http://hdl.handle.net/20.500.11752/ILC-567}, } @MISC{FRONTINI_2021_MISC_FGMB_463504, AUTHOR = {Frontini, F. and Gamba, F. and Monachini, M. and Broeder, D.}, TITLE = {SSHOC Multilingual Metadata}, YEAR = {2021}, ABSTRACT = {SSHOC Multilingual Metadata is based on the metadata set of the CLARIN Concept Registry (CCR). The CCR 232 approved metadata concepts, as well as their definitions, were automatically translated into several languages (Dutch, French, Greek, Italian) thanks to the support of Machine Translation tools, and eventually validated by native speakers who were also expert of the domain. This resource was created within the frame of the SSHOC (Social Sciences and Humanities Open Cloud) project (H2020-INFRAEOSC-2018-2-823782). It is the result of the work of Task 3.1.3 "creating Multilingual metadata and taxonomies for discovery", as described in D3.9, carried out jointly by ILC-CNR and CLARIN ERIC.}, KEYWORDS = {metadata, terminology}, URL = {http://hdl.handle.net/20.500.11752/ILC-568}, } @MISC{GIGLIA_2021_MISC_GLD_465155, AUTHOR = {Giglia, E. and Lazzeri, E. and Di Donato, F.}, TITLE = {Scienza aperta e gestione dei dati per le scienze umane e del patrimonio culturale}, YEAR = {2021}, ABSTRACT = {Corso tenuto per gli istituti CNR di area umanistica, maggio-giugno 2021. Modulo1: Open Science, perché serve; il diritto d'autore; come siamo arrivati alla comunicazione scientifica attuale Modulo 2: Open Access e politiche europee Modulo 3: Gestione dei dati e dati FAIR Modulo 4: Data management plan ed esempi concreti Moduli 5-8: presentazione delle infrastrutture di ricerca, CLARIN, DARIAH, OPERAS, ERHIS, Parthenos, Ariadne+}, KEYWORDS = {open science, fair data, humanities}, URL = {https://zenodo.org/record/5497914#.YjCP05PMJfU}, } @MISC{MATHIAK_2021_MISC_MJHDJWFCBCK_465241, AUTHOR = {Mathiak, B. and Juty, N. and Heger, T. and Di Donato, F. and Jeschke, J. and Widmann, H. and Flügel, A. and Culina, A. and Bardi, A. and Colomb, J. and Kraker, P.}, TITLE = {Stocktaking GO FAIR Discovery IN-Use cases, infrastructure (0. 95)}, YEAR = {2021}, ABSTRACT = {In order to build a better ecosystem for data discovery tools the Data Discovery Implementation Group of GO Fair (https://www.go-fair.org/implementation-networks/overview/discovery) collected use cases between 2019 and 2020 from a variety of sources. We also detail the 'Actors' for these use cases and the 'Source' providing links, whenever possible. Since we found over a hundred individual use cases, we decided to cluster them to provide a better overview. The clustering, as well as the results of a small survey among data infrastructure specialists to find how they rate the importance of the clusters are detailed in the documentation to this dataset, a draft of which can currently be found here. The code and data to produce the figures in the documentation are available as R code in the GO_FAIR_Discovery_Use_case-master.zip file. The use cases themselves are available as Excel sheet and csv.}, KEYWORDS = {discovery, gofair}, URL = {https://doi.org/10.5281/zenodo.5211196}, } @MISC{TESSAROLO_2021_MISC_TDB_484490, AUTHOR = {Tessarolo, L. and Del Grosso, A. M. and Boschetti, F.}, TITLE = {florilegiasyriaca}, YEAR = {2021}, ABSTRACT = {Il portale di produzione e fruizione delle edizioni dei testi critici restituiti al pubblico durante il lavoro del team del progetto ERC 758732-FLOS}, KEYWORDS = {ERC, Digital Humanities, Digital Philology, FLOS, DSL-based DSE}, URL = {https://www.florilegiasyriaca.eu/}, } @MISC{ZENZARO_2021_MISC_ZDB_472307, AUTHOR = {Zenzaro, S. and Del Grosso, A. M. and Boschetti, F.}, TITLE = {CophiEditor A collaborative web platform for the creation of scholarly digital editions}, YEAR = {2021}, ABSTRACT = {Relazione di avanzamento della piattaforma per il progetto GreekSchools}, KEYWORDS = {Digital Humanities, Digital papyrology, GreekSchools}, URL = {https://publications.cnr.it/doc/472307}, } @ARTICLE{ALZETTA_2020_ARTICLE_ADMV_463828, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Linguistically-driven Selection of Difficult-to-Parse Dependency Structures}, YEAR = {2020}, ABSTRACT = {The paper illustrates a novel methodology meeting a twofold goal, namely quantifying the reliability of automatically generated dependency relations without using gold data on the one hand, and identifying which are the linguistic constructions negatively affecting the parser performance on the other hand. These represent objectives typically investigated in different lines of research, with different methods and techniques. Our methodology, at the crossroads of these perspectives, allows not only to quantify the parsing reliability of individual dependency types but also to identify and weight the contextual properties making relation instances more or less difficult to parse. The proposed methodology was tested in two different and complementary experiments, aimed at assessing the degree of parsing difficulty across (a) different dependency relation types, and (b) different instances of the same relation. The results show that the proposed methodology is able to identify difficult-to-parse dependency relations without relying on gold data and by taking into account a variety of intertwined linguistic factors. These findings pave the way to novel applications of the methodology, both in the direction of defining new evaluation metrics based purely on automatically parsed data and towards the automatic creation of challenge sets.}, KEYWORDS = {Linguistic Complexity, Syntactic Parsing, Evaluation metrics}, PAGES = {37-60}, URL = {https://journals.openedition.org/ijcol/719}, VOLUME = {6}, DOI = {10.4000/ijcol.719}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{BOSCHETTI_2020_ARTICLE_BD_472293, AUTHOR = {Boschetti, F. and Del Grosso, A.}, TITLE = {L'annotazione di testi storico-letterari al tempo dei social media}, YEAR = {2020}, ABSTRACT = {The annotation of historical and literary texts is approached differently by traditional philologists and digital philologists. The former are concentrated on the detailed study of a given text (close reading) while the latter are focused on the study of large quantities of texts (distant reading). A structured and collaborative annotation makes it possible both to add information to particular passages of individual texts, as in a traditional linear comment, and to connect data from entire textual collections through rigorous protocols. However, the standards developed by digital philologists are not highly appreciated by traditional academics, since the effort necessary to apply the proposed technologies allegedly diverts researchers' attention from the object of study. As opposed to this objection, we intend to highlight that it is indeed possible to maintain the precision requisite for the application of computational tools to digital resources without renouncing the annotation practices established in traditional contexts. In support of the method, we report a number of case studies of digital scientific editions whose goals include both reconstructing respective texts and encouraging the dissemination of contents and public participation in the academic debate. In particular, we will discuss the following projects: a) the stylistic annotation of three different editions of Giacomo Leopardi's translation of the Batracomiomachia; b) the scientific edition of Bellini's letters; c) the multi-level annotated edition of Bassani; and d) the comparison of Umberto Eco's variants of his Il nome della rosa.}, KEYWORDS = {digital philology, collaborative annotation, communities, digital scholarly edition, formalisation}, PAGES = {65-99}, URL = {https://publications.cnr.it/doc/472293}, VOLUME = {11}, DOI = {10.15804/IW.2020.11.1.03}, PUBLISHER = {Wydawnictwo Adam Marszalek (Torun, Polonia)}, ISSN = {2084-4514}, JOURNAL = {Italica Wratislaviensia (Print)}, } @ARTICLE{DEROSIS_2020_ARTICLE_DGZVF_435971, AUTHOR = {De Rosis, S. and Guidotti, E. and Zuccarino, S. and Venturi, G. and Ferre, F.}, TITLE = {Waiting time information in the Italian NHS: A citizen perspective}, YEAR = {2020}, ABSTRACT = {Public involvement in the management and communication of waiting times is known to support initiatives to reduce waiting times, as well as increase fairness and promote transparency and accountability. In order to improve transparency and communication to citizens, Italy recently updated the National Regulatory Plan for Waiting Lists (2019-2021), which calls for the disclosure of waiting time information on healthcare provider webpages. This study analyses waiting time information for outpatient visits and digital services available on the institutional website pages of 144 public healthcare organisations in nine regions and two autonomous provinces of Italy. Web pages were analysed both in terms of the available information/services, using a grid, and in terms of the quality of the text using an advanced readability assessment tool (READ-IT). This information was complemented and validated by regional healthcare key informants during research-specific workshops. Waiting time information disclosure, digital services and text readability varied both within and between the regional healthcare systems and organisations. The types and characteristics of waiting time information and statistics vary considerably with a negative impact on their use for benchmarking and their readability and usability for booking purposes. Overall, communication weaknesses due to low harmonization and clarity of information can undermine efforts in effectively informing and involving the public through online waiting time data disclosure. (C) 2020 The Author(s). Published by Elsevier B.V.}, KEYWORDS = {Waiting times, Healthcare, Online information, Readability, Italy}, PAGES = {796-804}, URL = {https://www.sciencedirect.com/science/article/pii/S0168851020301111?via%3Dihub}, VOLUME = {124}, DOI = {10.1016/j.healthpol.2020.05.012}, PUBLISHER = {Elsevier (Amsterdam, Paesi Bassi)}, ISSN = {0168-8510}, JOURNAL = {Health policy (Amst. Print)}, } @ARTICLE{DUMOUCHEL_2020_ARTICLE_DBBCDEFGGGMPDPT_437781, AUTHOR = {Dumouchel, S. and Blotière, E. and Breitfuss, G. and Chen, Y. and Di Donato, F. and Eskevich, M. and Forbes, P. and Georgiadis, H. and Gingold, A. and Gorgaini, E. and Moranville, Y. and Pohle, S. and De Paoli, S. and Petitfils, C. and Toth Czifra, E.}, TITLE = {GOTRIPLE: A User-Centric Process to Develop a Discovery Platform}, YEAR = {2020}, ABSTRACT = {Social sciences and humanities (SSH) research is divided across a wide array of disciplines, sub-disciplines and languages. While this specialization makes it possible to investigate the extensive variety of SSH topics, it also leads to a fragmentation that prevents SSH research from reaching its full potential. The TRIPLE project brings answers to these issues by developing an innovative discovery platform for SSH data, researchers' projects and profiles. Having started in October 2019, the project has already three main achievements that are presented in this paper: (1) the definition of main features of the GOTRIPLE platform; (2) its interoperability; (3) its multilingual, multicultural and interdisciplinary vocation. These results have been achieved thanks to different methodologies such as a co-design process, market analysis and benchmarking, monitoring and co-building. These preliminary results highlight the need for respecting diversity of practices and communities through coordination and harmonization.}, KEYWORDS = {user-centric approach user research social sciences and humanities open science European Open Science Cloud (EOSC) FAIR principles discovery research data}, URL = {https://www.mdpi.com/2078-2489/11/12/563}, VOLUME = {2020, 11}, DOI = {10.3390/info11120563}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2078-2489}, JOURNAL = {Information (Basel)}, } @ARTICLE{GIOVANNETTI_2020_ARTICLE_GBDDMPP_453583, AUTHOR = {Giovannetti, E. and Bellandi, A. and Dattilo, D. and Del Grosso, A. M. and Marchi, S. and Pecchioli, A. and Piccini, S.}, TITLE = {The Terminology of the Babylonian Talmud: Extraction, Representation and Use in the Context of Computational Linguistics}, YEAR = {2020}, ABSTRACT = {A formal digital structuring of the terminology of the Talmud is being carried out in the context of the Project for the Translation of the Babylonian Talmud in Italian. The terminological resource was encoded in the form of a multi-language Explanatory Combinatorial Dictionary (Hebrew-Aramaic-Italian) according to the principles of the Meaning-Text Theory. The construction of such a resource was supported by text processing and computational linguistics techniques aimed at automatically extracting terms from the Italian translation of the Talmud and aligning them with the corresponding Hebrew/Aramaic source terms. The paper describes the process that was set up for the construction of the terminological resource with the ultimate goal of illustrating the advantages of the adoption of a formal linguistic model. The terminological resource aims, indeed, to be a useful tool to deepen the characteristics of the languages of the Talmud, to help translators in their work and more generally scholars in their study of the Talmud itself.}, KEYWORDS = {Babylonian Talmud, Computational Linguistics, Explanatory and Combinatorial Lexicology}, PAGES = {61-74}, URL = {https://publications.cnr.it/doc/453583}, VOLUME = {XXV}, DOI = {10.1400/283235}, PUBLISHER = {Giuntina (Firenze, Italia)}, ISSN = {2282-4499}, JOURNAL = {Materia giudaica Print}, } @ARTICLE{GUADAGNINI_2020_ARTICLE_G_426575, AUTHOR = {Guadagnini, E.}, TITLE = {Alessandro, la 'cautela' e altri latinismi: un esercizio traduttologico su un estratto della tradizione latino-romanza del "Secretum secretorum" (SS/B)}, YEAR = {2020}, ABSTRACT = {This paper takes into consideration several translation and linguistic issues concerning the Secretum secretorum and its various Romance versions, including the Franco-Italian Amaestramens (ms. Paris BnF fr. 821). Romance inflections of Latin the words cautela, custodia, munimen, providentia, and salus are considered as well.}, KEYWORDS = {Secretum secretorum, Traductology, Lexicology, Latinism}, PAGES = {243-278}, URL = {https://www.francigena-unipd.com/index.php/francigena/article/view/50}, VOLUME = {6}, DOI = {10.25430/2420-9767/V6-239-278}, PUBLISHER = {Università degli Studi di Padova (Italia, Italia)}, ISSN = {2420-9767}, JOURNAL = {Francigena}, } @ARTICLE{KHALFI_2020_ARTICLE_KZN_438041, AUTHOR = {Khalfi, M. and Zarghili, A. and Nahli, O.}, TITLE = {A New Rich Lexical Resource For Classical Arabic}, YEAR = {2020}, ABSTRACT = {Currently, large lexical resources are getting a high potential relevance for information systems and need of Lexical resources in Natural Language Processing (NLP) fields is paramount. To contribute meet these needs, we build a lexical resource from the famous dictionary al=q?m?s al=mu???(AQAM). Using a rule based approach, we have designed a system that allows extracting morpho-syntactical, semantics and lexical information from the famous dictionary. So, we obtained a digitized and structured version of AQAM, enriched by morpho-syntactical and lexical explicit information. In addition, the obtained resource is enriched by English translations of lemma and accompanying senses using a bilingual English-Arabic dictionary. Then we present an overview of an experiment alignment of the section of the letter b?" on Princeton"s WordNet (PWN) and Suggested Upper Merged Ontology (SUMO). This experience turned out to be interesting because it revealed that mapping an Arabic lexical resource on an English resource shows commonality between the two languages, but it allows especially to emphasize the non-equivalences between them. All obtained resources are represented in XML format anddistributed under free license}, KEYWORDS = {Information Extraction Arabic Lexicon Al Qamus Al Muhit Machine-readable dictionary Arabic Lexical Resource}, PAGES = {3863-3884}, URL = {https://www.ijact.in/index.php/ijact/article/view/1196}, VOLUME = {Volume-IX, Issue-X}, PUBLISHER = {Research India Publications (New Delhi, India)}, ISSN = {2249-3123}, JOURNAL = {International journal of advanced computer science and technology}, } @ARTICLE{MARZI_2020_ARTICLE_M_424281, AUTHOR = {Marzi, C.}, TITLE = {Modeling Word Learning and Processing with Recurrent Neural Networks}, YEAR = {2020}, ABSTRACT = {The paper focuses on what two different types of Recurrent Neural Networks, namely a recurrent Long Short-Term Memory and a recurrent variant of self-organizing memories, a Temporal Self-Organizing Map, can tell us about speakers' learning and processing a set of fully inflected verb forms selected from the top-frequency paradigms of Italian and German. Both architectures, due to the re-entrant layer of temporal connectivity, can develop a strong sensitivity to sequential patterns that are highly attested in the training data. The main goal is to evaluate learning and processing dynamics of verb inflection data in the two neural networks by focusing on the effects of morphological structure on word production and word recognition, as well as on word generalization for untrained verb forms. For both models, results show that production and recognition, as well as generalization, are facilitated for verb forms in regular paradigms. However, the two models are differently influenced by structural effects, with the Temporal Self-Organizing Map more prone to adaptively find a balance between processing issues of learnability and generalization, on the one side, and discriminability on the other side.}, KEYWORDS = {word-learning, serial word processing, recurrent neural networks, long short-term memories, temporal self-organizing memories}, PAGES = {14}, URL = {https://www.mdpi.com/2078-2489/11/6/320}, VOLUME = {11}, DOI = {10.3390/info11060320}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2078-2489}, JOURNAL = {Information (Basel)}, } @ARTICLE{MARZI_2020_ARTICLE_M_424627, AUTHOR = {Marzi, C.}, TITLE = {Modelling the interaction of regularity and morphological structure: the case of Russian verb inflection}, YEAR = {2020}, ABSTRACT = {The main focus of this paper is to investigate how aspects of morphological regularity may have an impact on early stages of word processing, prior to full lexical access. Here I explore the interaction of regularity and morphological structure by using a computational simulation of the process of learning Russian verb forms, without any morpho-syntactic or morphosemantic additional information. With a recurrent variant of self-organising memories, namely a Temporal Self-Organising Map, or TSOM, experimental results allow an investigation of the impact of incremental learning and online processing principles on paradigm organisation, by assessing the differential impact of several aspects of regularity, ranging from formal transparency and predictability to allomorphy, on the processing/learning behaviour in a connectionist framework. The proposed analysis suggests a performance-oriented account of inflectional regularity in morphology, whereby perception of morphological structure is not the by-product of the design of the human word processor, with rules separated from exceptions, but rather an emergent property of the dynamic self-organisation of stored lexical representations, dependent on the adaptive processing history of inflected word forms, intrinsically graded and probabilistic.}, KEYWORDS = {morphological complexity, discriminative learning, recurrent neural networks, self-organisation, Russian verb in?ection}, PAGES = {131-156}, URL = {https://www.mulino.it/riviste/issn/1720-9331}, VOLUME = {XIX}, DOI = {10.1418/97534}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{MASINI_2020_ARTICLE_MP_444782, AUTHOR = {Masini, F. and Pirrelli, V.}, TITLE = {L'evidenza morfologica nell'era digitale: per un'integrazione di teoria e computazione}, YEAR = {2020}, ABSTRACT = {This article proposes a research perspective on morphological and lexical data based on an integrated approach that merges linguistic theory and computational analyses of a large quantity of textual data. Starting from a description of the units and processes of morphology, and of the issues they raise, we discuss to what extent these theoretical notions can be translated into the algorithmic procedures of Natural Language Processing (NLP) and what resources and methods are nowadays available to make morphological and lexical knowledge explicit within texts. At the same time, we explore the repercussions that the application of computational (but also psycho-/neuro-linguistic) techniques may have on our theoretical representations and on their plausibility.}, KEYWORDS = {morphology-lexicon-categories-Natural Language Processing-Italian}, PAGES = {77-126}, URL = {https://publications.cnr.it/doc/444782}, VOLUME = {VI}, PUBLISHER = {Editrice CLUEB; [poi] Edizioni dell'Orso (Bologna; [poi] Alessandria, Italia)}, ISSN = {0393-1226}, JOURNAL = {Quaderni di semantica (Testo stampato)}, } @ARTICLE{NICOLOSI_2020_ARTICLE_NMN_429366, AUTHOR = {Nicolosi, A. and Monachini, M. and Nava, B.}, TITLE = {CLARIN-IT and the Definition of a Digital Critical Edition for Ancient Greek Poetry}, YEAR = {2020}, ABSTRACT = {Ancient Greek studies, and Classics in general, is a perfect field of investigation in Digital Humanities. Indeed, DH approaches could become a means of building models for complex realities, analyzing them with computational methods and sharing the results with a broader public. Ancient texts have a complex tradition, which includes many witnesses (texts that handed down other texts) and different typologies of supports (papyri, manuscripts, and epigraphs). These texts are the basis of all European Literatures and it is crucial to spread their knowledge, in a reliable and easy way. Our project on ancient Greek fragmentary poetry (DEA - Digital Edition of Archilochus: New models and tools for authoring, editing and indexing an ancient Greek fragmentary author), growing out of the existing experience, tries to define a TEI-based digital critical edition combined with NLP techniques and semantic web technologies. Our goal is to provide a complete and reliable tool for scholars, suitable for critical studies in Classics, and a user-friendly environment also for non-specialist users. The project represents one of the attempts within the context of CLARIN-IT to contribute to the wider impact of CLARIN on the specific Italian community interested in Digital Classics. It is intended to improve services in fostering new knowledge in SSH digital research and sustaining the existing one.}, KEYWORDS = {Digital edition, Ancient Greek, research infrastructures, digital humanities, digital classics}, PAGES = {85-93}, URL = {https://ep.liu.se/ecp/172/011/ecp20172011.pdf}, VOLUME = {172}, DOI = {10.3384/ecp2020172011}, PUBLISHER = {Linköping University Electronic Press (Linköping, Svezia)}, ISSN = {1650-3740}, JOURNAL = {Linköping electronic conference proceedings (Online)}, } @ARTICLE{VENTURI_2020_ARTICLE_VDMMS_441971, AUTHOR = {Venturi, G. and Dell'Orletta, F. and Montemagni, S. and Morini, E. and Sagri, M. T.}, TITLE = {Metodi e Tecniche di Trattamento Automatico della Lingua per l'Estrazione di Conoscenza dalla Documentazione Scolastica}, YEAR = {2020}, ABSTRACT = {Il contributo riguarda la creazione di un sistema integrato di "knowledge management", per la gestione e condivisione della conoscenza prodotta e utilizzata dalla scuola.}, KEYWORDS = {Estrazione di informazione, Documenti scolastici, Indicizzazione, Terminology extraction}, PAGES = {49-68}, URL = {https://publications.cnr.it/doc/441971}, VOLUME = {2}, DOI = {10.3280/CAD2020-002005}, PUBLISHER = {Franco Angeli (Napoli, Italia)}, ISSN = {1122-5165}, JOURNAL = {Cadmo (Testo stamp.)}, } @ARTICLE{VERTECCHI_2020_ARTICLE_VADMV_441967, AUTHOR = {Vertecchi, B. and Agrusti, F. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Verba et Acta. Un esperimento per promuovere l'evoluzione delle compe-tenze linguistiche degli studenti degli istituti professionali}, YEAR = {2020}, ABSTRACT = {Ricerche in corso. Verba et Acta. Un esperimento per promuovere l'evoluzione delle competenze linguistiche degli studenti degli istituti professionali}, KEYWORDS = {Evoluzione competenze linguistiche, Annotazione linguistica, Previsione dello sviluppo delle competenze di scrittura}, PAGES = {109-117}, URL = {https://publications.cnr.it/doc/441967}, DOI = {10.3280/CAD2020-001008}, PUBLISHER = {Franco Angeli (Napoli, Italia)}, ISSN = {1122-5165}, JOURNAL = {Cadmo (Testo stamp.)}, } @INCOLLECTION{GUADAGNINI_2020_INCOLLECTION_G_444062, AUTHOR = {Guadagnini, E.}, TITLE = {Les Métamorphoses d'Ovide et le Moyen Âge italien: une esquisse du cadre général}, YEAR = {2020}, ABSTRACT = {Cette contribution propose un cadre d'ensemble de la réception des Métamorphoses ovidiennes dans la production vernaculaire italienne du Moyen Âge (XII -XIV siècles). Trois typologies de réception sont isolées : le poème en tant que source de contenus, le poème en tant que source textuelle et les traductions du poème. Une attention particulière est accordée à la tradition manuscrite des oeuvres citées, ainsi qu'à la recherche des sources vraisemblablement employées par les auteurs.}, KEYWORDS = {Volgarizzamenti, eredità dei classici, Ovidio, Filologia romanza}, PAGES = {209-236}, URL = {https://classiques-garnier.com/traire-de-latin-et-espondre-etudes-sur-la-reception-medievale-d-ovide.html}, PUBLISHER = {Editions Classiques Garnier (Paris, FRA)}, ISBN = {9782406105077}, BOOKTITLE = {« Traire du latin et espondre ». Études sur la réception médiévale d'Ovide}, EDITOR = {Baker, C. and Cavagna, M. and Guadagnini, E.}, } @INCOLLECTION{MARZI_2020_INCOLLECTION_MBBP_421742, AUTHOR = {Marzi, C. and Blevins, J. P. and Booij, G. and Pirrelli, V.}, TITLE = {Inflection at the morphology-syntax interface}, YEAR = {2020}, ABSTRACT = {What is inflection? Is it part of language morphology, syntax or both? What are the basic units of inflection and how do speakers acquire and process them? How do they vary across languages? Are some inflection systems somewhat more complex than others, and does inflectional complexity affect the way speakers process words? This chapter addresses these and other related issues from an interdisciplinary perspective. Our main goal is to map out the place of inflection in our current understanding of the grammar architecture. In doing that, we will embark on an interdisciplinary tour, which will touch upon theoretical, psychological, typological, historical and computational issues in morphology, with a view to looking for points of methodological and substantial convergence from a rather heterogeneous array of scientific approaches and theoretical perspectives. The main upshot is that we can learn more from this than just an additive medley of domain-specific results. In the end, a cross-domain survey can help us look at traditional issues in a surprisingly novel light.}, KEYWORDS = {inflection, paradigmatic relations, word processing, word learning, inflectional complexity, family size, entropy}, PAGES = {228-294}, URL = {https://www.degruyter.com/view/book/9783110440577/10.1515/9783110440577-007.xml}, VOLUME = {337}, DOI = {10.1515/9783110440577-007}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, BOOKTITLE = {Word Knowledge and Word Usage. A cross-interdisciplinary guide to the mental lexicon}, EDITOR = {Pirrelli, V. and Plag, I. and Dressler, W. U.}, } @INCOLLECTION{PIRRELLI_2020_INCOLLECTION_PMFCBM_421741, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Baayen, H. R. and Milin, P.}, TITLE = {Psycho-computational modelling of the mental lexicon}, YEAR = {2020}, ABSTRACT = {Over the last decades, a growing body of evidence on the mechanisms governing lexical storage, access, acquisition and processing has questioned traditional models of language architecture and word usage based on the hypothesis of a direct correspondence between modular components of grammar competence (lexicon vs. rules), processing correlates (memory vs. computation) and neuro-anatomical localizations (prefrontal vs. temporo-parietal perisylvian areas of the left hemisphere). In the present chapter, we explore the empirical and theoretical consequences of a distributed, integrative model of the mental lexicon, whereby words are seen as emergent properties of the functional interaction between basic, language-independent processing principles and the language- specific nature and organization of the input. From this perspective, language learning appears to be inextricably related to the way language is processed and internalized by the speakers, and key to an interdisciplinary understanding of such a way, in line with Tomaso Poggio's suggestion that the development of a cognitive skill is causally and ontogenetically prior to its execution (and sits "on top of it"). In particular, we discuss conditions, potential and prospects of the epistemological continuity between psycholinguistic and computational modelling of word learning, and illustrate the yet largely untapped potential of their integration. We use David Marr's hierarchy to clarify the complementarity of the two viewpoints. Psycholinguistic models are informative about how speakers learn to use language (interfacing Marr's levels 1 and 2). When we move from the psycholinguistic analysis of the functional operations involved in language learning to an algorithmic description of how they are computed, computer simulations can help us explore the relation between speakers' behavior and general learning principles in more detail. In the end, psycho-computational models can be instrumental to bridge Marr's levels 2 and 3, bringing us closer to understanding the nature of word knowledge in the brain.}, KEYWORDS = {mental lexicon, word storage and processing, psycholinguistics, computational linguistics, connectionist models, discriminative learning}, PAGES = {23-82}, URL = {https://www.degruyter.com/view/book/9783110440577/10.1515/9783110440577-002.xml}, VOLUME = {337}, DOI = {10.1515/9783110440577-002}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, BOOKTITLE = {Word Knowledge and Word Usage}, EDITOR = {Pirrelli, V. and Plag, I. and Dressler, W. U.}, } @INCOLLECTION{PIRRELLI_2020_INCOLLECTION_PPD_423388, AUTHOR = {Pirrelli, V. and Plag, I. and Dressler, U. W.}, TITLE = {Word knowledge in a cross-disciplinary world}, YEAR = {2020}, ABSTRACT = {This editorial project stemmed from a 4-year period of intense interdisciplinary research networking funded by the European Science Foundation within the framework of the NetWordS project (09-RNP-089). The project mission was to bring together experts of various research fields (from brain sciences and computing to cognition and linguistics) and of different theoretical inclinations, to advance the current awareness of theoretical, typological, psycholinguistic, computational and neurophysiological evidence on the structure and processing of words, with a view to promoting novel methods of research and assessment for grammar architecture and language usage. The unprecedented cross-disciplinary fertilization prompted by a wide range of scientific and educational initiatives (three international workshops, two summer schools, one main conference and over a hundred grants supporting short visits and multilateral exchanges) persuaded us to pursue this effort beyond the project lifespan, spawning the idea of an interdisciplinary handbook, where a wide range of central topics on word knowledge and usage are dealt with by teams of authors with common interests and different backgrounds. Unsurprisingly (with the benefit of the hindsight), the project turned out to be more challenging and time-consuming than initially planned. Cross-boundary talking and mutual understanding are neither short-term, nor immediately rewarding efforts, but part of a long-sighted, strategic vision, where stamina, motivation and planning ahead play a prominent role. We believe that this book, published as an open access volume, significantly sharpens the current understanding of issues of word knowledge and usage, and has a real potential for promoting novel research paradigms, and bringing up a new generation of language scholars.}, KEYWORDS = {interdisciplinarity, word knowledge, word usage, language units, statistical and computer modeling, levels of understanding, between-level mapping, linking hypotheses, scale effects}, PAGES = {1-20}, URL = {https://doi.org/10.1515/9783110440577}, VOLUME = {337}, DOI = {10.1515/9783110440577}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9783110440577}, BOOKTITLE = {Word Knowledge and Word Usage. A Cross-Disciplinary Guide to the Mental Lexicon}, EDITOR = {Pirrelli, V. and Plag, I. and Dressler, U. W.}, } @EDITORIAL{BEERMANN_2020_EDITORIAL_BBSS_472133, AUTHOR = {Beermann, D. and Besacier, L. and Sakriani, S. and Soria, C.}, TITLE = {Proceedings of 1st Joint SLTU and CCURL Workshop (SLTU-CCURL 2020)}, YEAR = {2020}, ABSTRACT = {Proceedings of the 1st Joint SLTU and CCURL Workshop (SLTU-CCURL 2020)}, KEYWORDS = {less-resourced languages, NLP, language resources}, URL = {https://aclanthology.org/events/lrec-2020/#2020-sltu-1}, ISBN = {979-10-95546-35-1}, } @EDITORIAL{BROEDER_2020_EDITORIAL_BEM_472326, AUTHOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, TITLE = {Proceedings of the Workshop about Language Resources for the SSH Cloud}, YEAR = {2020}, ABSTRACT = {This workshop was envisaged to focus on the goals and aims of realising the SSHOC part of the EOSC, where SSH data, language processing tools, and services are made available, adjusted and accessible for users across SSH domain. It provides a forum to discuss common requirements, challenges and opportunities for developing, enhancing, integrating tools and services for managing and processing SSH research data. Such SSH scenarios based implementations of currently existing language tools and services demonstrate their multidisciplinary usability and stimulate further multidisciplinary collaboration across the various subfields of SSH and beyond, which will increase the potential for societal impact.}, KEYWORDS = {Social Science and Humanties Open Cloud, EOSC, Language Resource Infrastructure}, PAGES = {1-46}, URL = {https://aclanthology.org/2020.lr4sshoc-1}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-43-6}, } @EDITORIAL{DISEGNI_2020_EDITORIAL_D_441369, AUTHOR = {Di Segni, R.}, TITLE = {Talmud Babilonese, Trattato Chaghigà (Sacrificio festivo)}, YEAR = {2020}, ABSTRACT = {Coordinamento della traduzione in italiano del trattato del Talmud Babilonese "Chaghigà", con commento esplicativo e note scientifiche, tramite il software Traduco messo a punto dall'ILC-CNR di Pisa}, KEYWORDS = {software Traduco, talmud babilonese}, PAGES = {299}, URL = {https://publications.cnr.it/doc/441369}, VOLUME = {12}, PUBLISHER = {La Giuntina (Firenze, ITA)}, ISBN = {978-88-8057-858-1}, } @EDITORIAL{PIRRELLI_2020_EDITORIAL_PPD_424203, AUTHOR = {Pirrelli, V. and Plag, I. and Dressler, W. U.}, TITLE = {Word knowledge and word usage: a cross-disciplinary guide to the mental lexicon}, YEAR = {2020}, ABSTRACT = {This editorial project stemmed from a 4-year period of intense interdisciplinary research networking funded by the European Science Foundation within the framework of the NetWordS project (09-RNP-089).}, KEYWORDS = {interdisciplinarity, word knowledge, word usage, language units, statistical and computer modeling, levels of understanding, between-level mapping, linking hypotheses, scale effects}, PAGES = {1-717}, URL = {https://doi.org/10.1515/9783110440577}, VOLUME = {337}, DOI = {10.1515/9783110440577}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {978-3-11-051748-4}, } @INPROCEEDINGS{ALZETTA_2020_INPROCEEDINGS_ADMOSV_444113, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Osenova, P. and Simov, K. and Venturi, G.}, TITLE = {Quantitative linguistic investigations across universal dependencies treebanks}, YEAR = {2020}, ABSTRACT = {The paper illustrates a case study aimed at identifying cross-lingual quantitative trends in the distribution of dependency relations in treebanks for typologically different languages. Preliminary results show interesting differences rooted either in language-specific peculiarities or cross-lingual annotation inconsistencies, with a potential impact on different application scenarios.}, KEYWORDS = {Universal Dependencies Treebanks, Cross-linguistic analysis, Typology}, PAGES = {1-7}, URL = {http://ceur-ws.org/Vol-2769/paper_59.pdf}, VOLUME = {2769}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, ISBN = {979-12-80136-28-2}, CONFERENCE_NAME = {7th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Online}, CONFERENCE_DATE = {1-3/03/2021}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{ALZETTA_2020_INPROCEEDINGS_AMDKT_442044, AUTHOR = {Alzetta, C. and Miaschi, A. and Dell'Orletta, F. and Koceva, F. and Torre, I.}, TITLE = {PRELEARN @ EVALITA 2020: Overview of the Prerequisite Relation Learning Task for Italian}, YEAR = {2020}, ABSTRACT = {The Prerequisite Relation Learning (PRELEARN) task is the EVALITA 2020 shared task on concept prerequisite learning, which consists of classifying prerequisite relations between pairs of concepts distinguishing between prerequisite pairs and non-prerequisite pairs. Four sub-tasks were defined: two of them define different types of features that participants are allowed to use when training their model, while the other two define the classification scenarios where the proposed models would be tested. In total, 14 runs were submitted by 3 teams comprising 9 total individual participants.}, KEYWORDS = {nlp, prerequisite learning, shared task}, URL = {http://ceur-ws.org/Vol-2765/paper164.pdf}, CONFERENCE_NAME = {Seventh Evaluation Campaign of Natural Language Processing and Speech Tools for Italian (EVALITA)}, CONFERENCE_DATE = {17/12/2020}, } @INPROCEEDINGS{BELLANDI_2020_INPROCEEDINGS_BG_427282, AUTHOR = {Bellandi, A. and Giovannetti, E.}, TITLE = {Involving Lexicographers in the LLOD Cloud with LexO, an Easy-to-use Editor of Lemon Lexical Resources}, YEAR = {2020}, ABSTRACT = {In this contribution, we show LexO, a user-friendly web collaborative editor of lexical resources based on the lemon model. LexO has been developed in the context of Digital Humanities projects, in which a key point in the design of an editor was the ease of use by lexicographers with no skill in Linked Data or Semantic Web technologies. Though the tool already allows creating a lemon lexicon from scratch and lets a team of users work on it collaboratively, many developments are possible. The involvement of the LLOD community appears now crucial both to find new users and application fields where to test it, and, even more importantly, to understand in which way it should evolve.}, KEYWORDS = {lexO, lexical resources editor, semantic web, llod}, PAGES = {70-74}, URL = {https://www.aclweb.org/anthology/2020.ldl-1.10.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-36-8}, CONFERENCE_NAME = {7th Workshop on Linked Data in Linguistics (LDL-2020)}, CONFERENCE_PLACE = {Marseille, France}, CONFERENCE_DATE = {22-23/06/2020}, EDITOR = {Ionov, M. and McCrae, J. P. and Chiarcos, C. and Declerck, T. and Bosque Gil, J. and Gracia, J.}, } @INPROCEEDINGS{BOSCHETTI_2020_INPROCEEDINGS_BDMBMD_462360, AUTHOR = {Boschetti, F. and Del Gratta, R. and Monachini, M. and Buzzoni, M. and Monella, P. and Del Turco, R. R.}, TITLE = {"Tea for two": the Archive of the Italian Latinity of the Middle Ages meets the CLARIN infrastructure}, YEAR = {2020}, ABSTRACT = {This paper presents the Archive of the Italian Latinity of the Middle Ages (ALIM) and focuses, particularly, on its structure and metadata for its integration into the ILC4CLARIN repository. Access to this archive of Latin texts produced in Italy during the Middle Ages is of great importance in providing CLARIN-IT and the CLARIN community, at large, with critically reliable texts for the use of philologists, historians of literature, historians of institutions, culture and science of the Middle Ages.}, KEYWORDS = {Latin resources, CLARIN, corpus, repository}, PAGES = {121-125}, URL = {https://office.clarin.eu/v/CE-2020-1738-CLARIN2020_ConferenceProceedings.pdf}, CONFERENCE_NAME = {CLARIN Annual Conference 2020}, CONFERENCE_DATE = {05-07/10/2021}, } @INPROCEEDINGS{BROEDER_2020_INPROCEEDINGS_BEM_472328, AUTHOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, TITLE = {LR4SSHOC: The Future of Language Resources in the Context of the Social Sciences and Humanities Open Cloud}, YEAR = {2020}, ABSTRACT = {This paper outlines the future of language resources and identifies their potential contribution for creating and sustaining the social sciences and humanities (SSH) component of the European Open Science Cloud (EOSC).}, KEYWORDS = {EOSC, Social Science and Humanities Open Cloud}, PAGES = {33-36}, URL = {https://aclanthology.org/2020.lr4sshoc-1.6}, ISBN = {979-10-95546-43-6}, CONFERENCE_NAME = {LREC}, CONFERENCE_PLACE = {virtual}, CONFERENCE_DATE = {10/5/2020}, BOOKTITLE = {Proceedings of the Workshop about Language Resources for the SSH Cloud}, EDITOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, } @INPROCEEDINGS{BRUNATO_2020_INPROCEEDINGS_BCDMVZ_444114, AUTHOR = {Brunato, D. and Chesi, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G. and Zamparelli, R.}, TITLE = {AcCompl-it @ EVALITA2020: Overview of the acceptability & complexity evaluation task for Italian}, YEAR = {2020}, ABSTRACT = {The Acceptability and Complexity evaluation task for Italian (AcCompl-it) was aimed at developing and evaluating methods to classify Italian sentences according to Acceptability and Complexity. It consists of two independent tasks asking participants to predict either the acceptability or the complexity rate (or both) of a given set of sentences previously scored by native speakers on a 1-to-7 points Likert scale. In this paper, we introduce the datasets distributed to the participants, we describe the different approaches of the participating systems and provide a first analysis of the obtained results.}, KEYWORDS = {Shared Task, Linguistic Complexity, Acceptability}, PAGES = {1-8}, URL = {http://ceur-ws.org/Vol-2765/paper163.pdf}, VOLUME = {2765}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {EVALITA '20, Evaluation of NLP and Speech Tools for Italian}, CONFERENCE_PLACE = {Online}, CONFERENCE_DATE = {17/12/2020}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{BRUNATO_2020_INPROCEEDINGS_BCDMV_435966, AUTHOR = {Brunato, D. and Cimino, A. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Profiling-UD: a Tool for Linguistic Profiling of Texts}, YEAR = {2020}, ABSTRACT = {In this paper, we introduce Profiling-UD, a new text analysis tool inspired to the principles of linguistic profiling that can support language variation research from different perspectives. It allows the extraction of more than 130 features, spanning across different levels of linguistic description. Beyond the large number of features that can be monitored, a main novelty of Profiling-UD is that it has been specifically devised to be multilingual since it is based on the Universal Dependencies framework. In the second part of the paper, we demonstrate the effectiveness of these features in a number of theoretical and applicative studies in which they were successfully used for text and author profiling.}, KEYWORDS = {Computational Language Variation Analysis, Linguistic Profiling, Universal Dependencies}, PAGES = {7145-7151}, URL = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.883.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-34-4}, CONFERENCE_NAME = {Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_DATE = {11-16/05/2020}, } @INPROCEEDINGS{CALAMAI_2020_INPROCEEDINGS_CPMSBB_466823, AUTHOR = {Calamai, S. and Pretto, N. and Monachini, M. and Stamuli, M. F. and Bianchi, S. and Bonazzoli, P.}, TITLE = {Building a Home for Italian Audio Archives}, YEAR = {2020}, ABSTRACT = {Audio and audiovisual archives are at the crossroads of different fields of knowledge, yet they require common solutions for both their long-term preservation and their description, availability, use and reuse. Archivio Vi.Vo. is an Italian project financed by the Tuscany Region, aiming to (i) explore methods for long-term preservation and secure access to oral sources and (ii) develop an infrastructure under the CLARIN-IT umbrella offering several services for scholars from different domains interested in oral sources. This paper describes the project's infrastructure and its methodology through a case study on the Caterina Bueno's audio archive.}, KEYWORDS = {oral archives, infrastructures}, PAGES = {112-116}, URL = {https://publications.cnr.it/doc/466823}, CONFERENCE_NAME = {CLARIN2020 Annual Conference Proceedings ISSN 2773-2177 (online)}, CONFERENCE_PLACE = {virtual}, CONFERENCE_DATE = {5/10/2020-7/10/2020}, } @INPROCEEDINGS{DEMATTEI_2020_INPROCEEDINGS_DDIMPR_442042, AUTHOR = {De Mattei, L. and De Martino, G. and Iovine, A. and Miaschi, A. and Polignano, M. and Rambelli, G.}, TITLE = {ATE ABSITA@ EVALITA2020: Overview of the Aspect Term Extraction and Aspect-based Sentiment Analysis Task}, YEAR = {2020}, ABSTRACT = {Over the last years, the rise of novel sentiment analysis techniques to assess aspect-based opinions on product reviews has become a key component for providing valuable insights to both consumers and businesses. To this extent, we propose ATE\_ABSITA: the EVALITA 2020 shared task on Aspect Term Extraction and Aspect-Based Sentiment Analysis. In particular, we approach the task as a cascade of three subtasks: Aspect Term Extraction (ATE), Aspect-based Sentiment Analysis (ABSA) and Sentiment Analysis (SA). Therefore, we invited participants to submit systems designed to automatically identify the "aspect terms" in each review and to predict the sentiment expressed for each aspect, along with the sentiment of the entire review. The task received broad interest, with 27 teams registered and more than 45 participants. However, only three teams submitted their working systems. The results obtained underline the task's difficulty, but they also show how it is possible to deal with it using innovative approaches and models. Indeed, two of them are based on large pre-trained language models as typical in the current state of the art for the English language.}, KEYWORDS = {nlp, sentiment analysis, shared task}, URL = {http://ceur-ws.org/Vol-2765/paper153.pdf}, CONFERENCE_NAME = {Seventh Evaluation Campaign of Natural Language Processing and Speech Tools for Italian (EVALITA)}, CONFERENCE_DATE = {17/12/2020}, } @INPROCEEDINGS{DELGRATTA_2020_INPROCEEDINGS_DBBS_462341, AUTHOR = {Del Gratta, R. and Boschetti, F. and Bambaci, L. and Sarnari, F.}, TITLE = {Approaching document analysis with a formal model}, YEAR = {2020}, ABSTRACT = {We introduce a formal approach to document and text analysis. The method proposed herein results in a mathematical and physical model/framework which can formalize different challenges in research fields such as computational linguistics, digital philology, and software engineering, principally if applied to document and text analysis. We examine texts and documents from an evolutionary perspective, where both corruption and correction are involved. We describe document evolution via fiber bundles formalism.}, KEYWORDS = {Formal model, document analysis, evolutionary approach}, PAGES = {208-214}, URL = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=\&arnumber=9357202\&tag=1}, DOI = {10.1109/CIST49399.2021.9357202}, CONFERENCE_NAME = {6th International IEEE Colloquium on Information Science and Technology}, CONFERENCE_PLACE = {Agadir, Morocco}, CONFERENCE_DATE = {5-12/06/2021}, } @INPROCEEDINGS{DELGROSSO_2020_INPROCEEDINGS_DFENT_439862, AUTHOR = {Del Grosso, A. M. and Fassi, F. D. and El Mohajir, M. and Nahli, O. and Tonazzini, A.}, TITLE = {Digital safeguard of laminated historical manuscripts: the treatise "Poem in Rajaz on medicine" as a case study}, YEAR = {2020}, ABSTRACT = {In this paper, we analyze and discuss the characteristics of a system for the effective digital preservation and fruition of historical manuscripts degraded by the process of lamination. As a case study, we will make reference to the "Poem in Rajaz on medicine", written by Abubacer in the XII century, and conserved in the Al Quaraouiyine Library located in Fez, Morocco. The conceived system should have at least four main functionalities: image acquisition (i.e. digitization), image enhancement, text encoding, and linguistic analysis. Based on the evaluation of the manuscript damages, the acquisition set up should be designed in such a way to be able to avoid reflections as much as possible. Suitable digital image processing techniques should also be devised to correct the residual degradations and enhance the text for an easier legibility. Finally, semi-automatic transcription, scholarly encoding and linguistic analysis, to be performed on the virtually restored pages, should adapt existing tools to the specificity of the primary source writing system and language. The feasibility study for the realization of such a system is of general utility, in that it can provide guidelines for the digitization, the enhancement and the text encoding of the many laminated manuscripts conserved in other historical archives. On the other hand, from the cultural heritage point of view, the experimentation on the "Poem in Rajaz on medicine" could foster the systematic philological and ontological study of a unique piece of our documental heritage: the longest poem of medieval Islamic medical literature.}, KEYWORDS = {Cultural Heritage, Digital Safeguard, Historical Manuscript Digitization, Document Image Processing, Linguistic Analysis, Ontological Analysis}, PAGES = {192-197}, URL = {https://ieeexplore.ieee.org/document/9357192}, DOI = {10.1109/CiSt49399.2021.9357192}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-7281-6646-9}, CONFERENCE_NAME = {CiSt'2020-6th IEEE Congress on Information Science \& Technology}, CONFERENCE_PLACE = {Agadir-Essaouira, Morocco}, CONFERENCE_DATE = {June 5-12, 2021}, } @INPROCEEDINGS{DELGROSSO_2020_INPROCEEDINGS_DGM_427281, AUTHOR = {Del Grosso, A. M. and Giovannetti, E. and Marchi, S.}, TITLE = {Enriching a Multilingual Terminology Exploiting Parallel Texts: An Experiment on the Italian Translation of the Babylonian Talmud}, YEAR = {2020}, ABSTRACT = {Parallel texts can represent an extremely useful source of information in a number of text and linguistic processing tasks. In this work we show an experiment conducted on the Italian translation of the Babylonian Talmud, a text we have analyzed and processed to support in the construction of a multilingual Hebrew/Aramaic/Italian terminological resource. The approach we adopted comprised: i) the TEI encoding of the text, ii) the automatic extraction of the Italian terms, iii) the addition of Hebrew/Aramaic terms via word-by-word alignment, iv) the revision of the obtained results.}, KEYWORDS = {multilingual terminology, parallel text, text alignment, babylonian talmud}, PAGES = {119-124}, URL = {http://amsacta.unibo.it/6316/1/AIUCD_2020_volume_FINAL.pdf}, DOI = {10.6092/unibo/amsacta/6316}, ISBN = {978-88-942535-4-2}, CONFERENCE_NAME = {IX Convegno Annuale AIUCD}, CONFERENCE_PLACE = {Milano: Università Cattolica del Sacro Cuore}, CONFERENCE_DATE = {15-17/01/2020}, } @INPROCEEDINGS{DIDONATO_2020_INPROCEEDINGS_DMEPMD_425644, AUTHOR = {Di Donato, F. and Monachini, M. and Eskevich, M. and Pohle, S. and Moranville, Y. and Dumouchel, S.}, TITLE = {Social Sciences and Humanities Pathway. Towards the European Open Science Cloud}, YEAR = {2020}, ABSTRACT = {The paper describes a journey which starts from various social sciences and humanities (SSH) Research Infrastructures (RI) in Europe and arrives at the comprehensive "ecosystem of infrastructures", namely the European Open Science Cloud (EOSC). We highlight how the SSH Open Science infrastructures contribute to the goal of establishing the EOSC. First, through the example of OPERAS, the European Research Infrastructure for Open Scholarly Communication in the SSH, to see how its services are conceived to be part of the EOSC and to address the communities' needs. The next two sections highlight collaboration practices between partners in Europe to build the SSH component of the EOSC and a SSH discovery platform, as a service of OPERAS and the EOSC. The last two sections focus on an implementation network dedicated to SSH data fairification.}, KEYWORDS = {EOSC, Open science, SSH, Infrastructures}, PAGES = {5-9}, URL = {https://www.aclweb.org/anthology/2020.lr4sshoc-1.2.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-43-6}, CONFERENCE_NAME = {Language Resources and Evaluation Conference (LREC 2020)}, CONFERENCE_PLACE = {Marseille}, CONFERENCE_DATE = {11-16/05/2020}, BOOKTITLE = {Proceedings of the Workshop about Language Resources for the SSH Cloud}, EDITOR = {Broeder, D. and Eskevich, M. and Monachini, M.}, } @INPROCEEDINGS{DUVAL_2020_INPROCEEDINGS_DG_442467, AUTHOR = {Duval, F. and Guadagnini, E.}, TITLE = {La rappresentazione lessicale del teatro antico nel Medioevo francese e italiano: per una lessicologia storica tra "transferts culturels" e comparatismo}, YEAR = {2020}, ABSTRACT = {Studio della ricezione italiana e francese medievale del lessico e dei concetti del teatro classico.}, KEYWORDS = {Lessicologia, teatro}, PAGES = {21-44}, URL = {http://www.brepols.net/Pages/ShowProduct.aspx?prod_id=IS-9782503587714-1}, VOLUME = {8}, PUBLISHER = {Brepols (Turnhout, BEL)}, ISBN = {9782503587714}, CONFERENCE_NAME = {Transferts culturels franco-italiens au Moyen Âge, Colloque organisé par la «Società italiana di Filologia romanza», la «Société de langues et littératures médiévales d'oc et d'oïl» et la «Société de Linguistique romane», sous le haut patronage de l'Académie des Inscriptions et Belles-Lettres et de la Fondation Primoli}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {20-22 septembre 2018}, BOOKTITLE = {Transferts culturels franco-italiens au Moyen Âge-Trasferimenti culturali italo francesi}, EDITOR = {Galderisi, C. and Antonelli, R. and Punzi, A. and Ducos, J.}, } @INPROCEEDINGS{FERRO_2020_INPROCEEDINGS_FGC_441873, AUTHOR = {Ferro, M. and Giulivi, S. and Cappa, C.}, TITLE = {The AEREST reading database}, YEAR = {2020}, ABSTRACT = {Aerest is a reading assessment protocol for the concurrent evaluation of a child's decoding and comprehension skills. Reading data complying with the Aerest protocol were automatically collected and structured with the ReadLet web-based platform in a pilot study, to form the Aerest Reading Database. The content, structure and potential of the database are described here, together with the main directions of current and future developments.}, KEYWORDS = {reading database, reading efficiency, decoding, comprehension, multimodal analysis}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85097912116\&origin=inward}, VOLUME = {2769}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {9791280136282}, CONFERENCE_NAME = {7th Italian Conference on Computational Linguistics (CLIC-IT'20)}, CONFERENCE_PLACE = {Bologna, Italy}, CONFERENCE_DATE = {01-03/03/2021}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{LENCI_2020_INPROCEEDINGS_LMBDDDDMPPVL_435958, AUTHOR = {Lenci, A. and Montemagni, S. and Boschetti, F. and De Felice, I. and Dei Rossi, S. and Dell'Orletta, F. and Di Giorgio, M. and Miliani, M. and Passaro, L. C. and Puddu, A. and Venturi, G. and Labanca, N.}, TITLE = {Voices of the Great War: A Richly Annotated Corpus of Italian Texts on the First World War}, YEAR = {2020}, ABSTRACT = {Voci della Grande Guerra ("Voices of the Great War") is the first large corpus of Italian historical texts dating back to the period of First World War. This corpus differs from other existing resources in several respects. First, from the linguistic point of view it gives account of the wide range of varieties in which Italian was articulated in that period, namely from a diastratic (educated vs. uneducated writers), diaphasic (low/informal vs. high/formal registers) and diatopic (regional varieties, dialects) points of view. From the historical perspective, through a collection of texts belonging to different genres it represents different views on the war and the various styles of narrating war events and experiences. The final corpus is balanced along various dimensions, corresponding to the textual genre, the language variety used, the author type and the typology of conveyed contents. The corpus is annotated with lemmas, part-of-speech, terminology, and named entities. Significant corpus samples representative of the different "voices" have also been enriched with meta-linguistic and syntactic information. The layer of syntactic annotation forms the first nucleus of an Italian historical treebank complying with the Universal Dependencies standard. The paper illustrates the final resource, the methodology and tools used to build it, and the Web Interface for navigating it.}, KEYWORDS = {Historical Corpora, Linguistic and Meta-linguistic Annotation, Information Extraction}, PAGES = {911-918}, URL = {https://www.aclweb.org/anthology/2020.lrec-1.114.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-34-4}, CONFERENCE_NAME = {Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_DATE = {11-16/05/2020}, } @INPROCEEDINGS{MARZI_2020_INPROCEEDINGS_MRNTP_438979, AUTHOR = {Marzi, C. and Rodella, A. and Nadalini, A. and Taxitari, L. and Pirrelli, V.}, TITLE = {Does finger-tracking point to child reading strategies?}, YEAR = {2020}, ABSTRACT = {The movement of a child's index finger that points to a printed text while (s)he is reading may provide a proxy for the child's eye movements and attention focus. We validated this correlation by showing a quantitative analysis of patterns of "finger-tracking" of Italian early graders engaged in reading a text displayed on a tablet. A web application interfaced with the tablet monitors the reading behaviour by modelling the way the child points to the text while reading. The analysis found significant developmental trends in reading strategies, marking an interesting contrast between typically developing and atypically developing readers.}, KEYWORDS = {reading assessment, reading strategies, mobile technology, special educiation needs}, PAGES = {1-7}, URL = {http://ceur-ws.org/Vol-2769/paper_60.pdf}, VOLUME = {vol-2769}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Italian Conference on Computational Linguistics 2020}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {1-3/03/2021}, BOOKTITLE = {Proceedings of the Seventh Italian Conference on Computational Linguistics}, EDITOR = {Monti, J. and Dell'Orletta, F. and Tamburini, F.}, } @INPROCEEDINGS{MIASCHI_2020_INPROCEEDINGS_MABDV_442040, AUTHOR = {Miaschi, A. and Alzetta, C. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Is Neural Language Model Perplexity Related to Readability?}, YEAR = {2020}, ABSTRACT = {This paper explores the relationship between Neural Language Model (NLM) perplexity and sentence readability. Starting from the evidence that NLMs implicitly acquire sophisticated linguistic knowledge from a huge amount of training data, our goal is to investigate whether perplexity is affected by linguistic features used to automatically assess sentence readability and if there is a correlation between the two metrics. Our findings suggest that this correlation is actually quite weak and the two metrics are affected by different linguistic phenomena.}, KEYWORDS = {nlp, neural language models, readability}, URL = {http://ceur-ws.org/Vol-2769/paper_57.pdf}, ISBN = {979-12-80136-28-2}, CONFERENCE_NAME = {Seventh Italian Conference on Computational Linguistics}, CONFERENCE_DATE = {01-03/03/2021}, } @INPROCEEDINGS{MIASCHI_2020_INPROCEEDINGS_MBDV_438491, AUTHOR = {Miaschi, A. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Linguistic Profiling of a Neural Language Model}, YEAR = {2020}, ABSTRACT = {In this paper we investigate the linguistic knowledge learned by a Neural Language Model (NLM) before and after a fine-tuning process and how this knowledge affects its predictions during several classification problems. We use a wide set of probing tasks, each of which corresponds to a distinct sentence-level feature extracted from different levels of linguistic annotation. We show that BERT is able to encode a wide range of linguistic characteristics, but it tends to lose this information when trained on specific downstream tasks. We also find that BERT's capacity to encode different kind of linguistic properties has a positive influence on its predictions: the more it stores readable linguistic information of a sentence, the higher will be its capacity of predicting the expected label assigned to that sentence.}, KEYWORDS = {Linguistic Profiling, Neural Language Model, Interpretability}, PAGES = {745-756}, URL = {https://www.aclweb.org/anthology/2020.coling-main.65/}, ISBN = {978-1-952148-27-9}, CONFERENCE_NAME = {International Conference on Computational Linguistics (COLING)}, CONFERENCE_PLACE = {Online}, CONFERENCE_DATE = {8-13/12/2020}, } @INPROCEEDINGS{MIASCHI_2020_INPROCEEDINGS_MDBDSSV_435969, AUTHOR = {Miaschi, A. and Davidson, S. and Brunato, D. and Dell'Orletta, F. and Sagae, K. and Sanchez Gutierrez, C. H. and Venturi, G.}, TITLE = {Tracking the Evolution of Written Language Competence in L2 Spanish Learners}, YEAR = {2020}, ABSTRACT = {In this paper we present an NLP-based approach for tracking the evolution of written language competence in L2 Spanish learners using a wide range of linguistic features automatically extracted from students' written productions. Beyond reporting classification results for different scenarios, we explore the connection between the most predictive features and the teaching curriculum, finding that our set of linguistic features often reflects the explicit instruction that students receive during each course.}, KEYWORDS = {Evolution of Language Competence, Natural Language Processing, Linguistic Profiling}, PAGES = {92-101}, URL = {https://www.aclweb.org/anthology/2020.bea-1.9.pdf}, DOI = {10.18653/v1/W16-05}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-941643-83-9}, CONFERENCE_NAME = {15th Workshop on Innovative Use of NLP for Building Educational Applications}, CONFERENCE_DATE = {10/07/2020}, } @INPROCEEDINGS{MIASCHI_2020_INPROCEEDINGS_MD_442036, AUTHOR = {Miaschi, A. and Dell'Orletta, F.}, TITLE = {Contextual and Non-Contextual Word Embeddings: an in-depth Linguistic Investigation}, YEAR = {2020}, ABSTRACT = {In this paper we present a comparison between the linguistic knowledge encoded in the internal representations of a contextual Language Model (BERT) and a contextual-independent one (Word2vec). We use a wide set of probing tasks, each of which corresponds to a distinct sentence-level feature extracted from different levels of linguistic annotation. We show that, although BERT is capable of understanding the full context of each word in an input sequence, the implicit knowledge encoded in its aggregated sentence representations is still comparable to that of a contextual-independent model. We also find that BERT is able to encode sentence-level properties even within single-word embeddings, obtaining comparable or even superior results than those obtained with sentence representations.}, KEYWORDS = {nlp, interpretability, representation learning}, PAGES = {110-119}, URL = {https://www.aclweb.org/anthology/2020.repl4nlp-1.15}, DOI = {10.18653/v1/2020.repl4nlp-1.15}, ISBN = {978-1-952148-15-6}, CONFERENCE_NAME = {5th Workshop on Representation Learning for NLP}, CONFERENCE_DATE = {09/07/2020}, } @INPROCEEDINGS{MIASCHI_2020_INPROCEEDINGS_MSBDV_442038, AUTHOR = {Miaschi, A. and Sarti, G. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Italian Transformers Under the Linguistic Lens}, YEAR = {2020}, ABSTRACT = {In this paper we present an in-depth investigation of the linguistic knowledge encoded by the transformer models currently available for the Italian language. In particular, we investigate whether and how using different architectures of probing models affects the performance of Italian transformers in encoding a wide spectrum of linguistic features. Moreover, we explore how this implicit knowledge varies according to different textual genres.}, KEYWORDS = {nlp, neural language models, interpretability}, URL = {http://ceur-ws.org/Vol-2769/paper_56.pdf}, ISBN = {979-12-80136-28-2}, CONFERENCE_NAME = {Seventh Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_DATE = {01-03/03/2021}, } @INPROCEEDINGS{NAHLI_2020_INPROCEEDINGS_ND_439789, AUTHOR = {Nahli, O. and Del Grosso, A. M.}, TITLE = {Creating Arabic Lexical Resources in TEI; A Schema for Discontinuous Morphology Encoding}, YEAR = {2020}, ABSTRACT = {This article aims at formally grouping lexical and morphological information in order to obtain an electronic resource with respect to the Arabic language starting from the classical dictionary al=q?m?s al=mu???. This contribution examines practical aspects about the adoption of the guidelines provided by the Text Encoding Initiative (TEI) to encode the Arabic dictionary as a primary source. Moreover, the contribution points out a possible way to integrate semantic, morphological and syntactic information characterizing word patterns within the same TEI document. Specifically, the formalization of word patterns allows us to emphasize additional morphosyntactic regularities mainly concerning word distribution within sentences. Consequently, the obtained digital object represents both the medieval Arabic dictionary and a suitable resource that can be exploited for a number of Natural Language Processing tasks.}, KEYWORDS = {classical Arabic dictionary, digital lexicography, al=q?m?s al=mu???, word patterns, TEI}, PAGES = {9}, URL = {https://publications.cnr.it/doc/439789}, DOI = {10.1109/CiSt49399.2021.9357273}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-7281-6646-9}, CONFERENCE_NAME = {IEEE-CIST2020 DPWH}, CONFERENCE_PLACE = {Agadir-Essaouira, Morocco}, CONFERENCE_DATE = {5/06/2021-12/06/2021}, } @INPROCEEDINGS{RICCI_2020_INPROCEEDINGS_RMBSD_430888, AUTHOR = {Ricci, L. and Melighetti, F. and Boschetti, F. and Salvatori, E. and Del Grosso, A. M.}, TITLE = {DH as an Ideal Educational Environment: the Ethnographic Museum of La Spezia}, YEAR = {2020}, ABSTRACT = {The authors present the outcomes of an educational experimentation that took place in the academic year 2018-2019 at the degree course in Informatica Umanistica at the University of Pisa. The first objective of the project concerned the digitization of a corpus of postcards from the period of the First World War owned by the ethnographic Museum of La Spezia "G. Podenzana". The aims of the work are not only the historical study of the corpus, but also the organization of a public history project with the Museum.}, KEYWORDS = {digital humanities}, PAGES = {222-226}, URL = {http://amsacta.unibo.it/id/eprint/6316}, DOI = {10.6092/unibo/amsacta/6316}, ISBN = {978-88-942535-4-2}, CONFERENCE_NAME = {La svolta inevitabile: sfide e prospettive per l'Informatica Umanistica}, CONFERENCE_PLACE = {Milano}, CONFERENCE_DATE = {15/01/2020-17/01/2020}, BOOKTITLE = {Quaderni di Umanistica Digitale}, EDITOR = {Marras, C. and Passarotti, M. and Franzini, G. and Litta, E.}, } @INPROCEEDINGS{SANNA_2020_INPROCEEDINGS_SCBN_439796, AUTHOR = {Sanna, A. and Cinerari, R. and Boschetti, F. and Nahli, O.}, TITLE = {Digitizing and Encoding a Multilingual Literary Review: Commerce Numerique}, YEAR = {2020}, ABSTRACT = {Commerce was an important literary review founded in Paris by Princess Margherita Caetani, Prince Roffredo Caetani's wife. Born in America, she was polyglot and maecenas. Between 1924 and 1932 she surrounded herself with three prestigious collaborators: Paul Valéry, Léon-Paul Fargue, Valéry Larbaud. The review promoted the translation of World and European literature in French, translating some of the most important authors like Joyce, T.S. Eliot, Pirandello, Ungaretti, Saint-John Perse, Rilke, Hofmannsthal. The aim of this project is to promote by digitizing the dissemination of the review, to develop studies and research concerning the Caetani family's cultural activities in Europe. All the volumes of the literary review Commerce have been scanned, acquired by OCR and encoded in TEI-XML. The cultural value of the operation is discussed and the work-flow to create the digital textual corpus is described in detail.}, KEYWORDS = {Review Commerce, OCR, TEI encoding, literary review, digital resources}, PAGES = {4}, URL = {https://publications.cnr.it/doc/439796}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-7281-6646-9}, CONFERENCE_NAME = {IEEE-CIST2020 DPWH}, CONFERENCE_PLACE = {Agadir-Essaouira, Morocco}, CONFERENCE_DATE = {5/06/2021-12/06/2021}, } @INPROCEEDINGS{SASSOLINI_2020_INPROCEEDINGS_SB_455300, AUTHOR = {Sassolini, E. and Biffi, M.}, TITLE = {Strategie e metodi per il recupero di dizionari storici}, YEAR = {2020}, ABSTRACT = {L'articolo descrive un approccio sperimentale all'estrazione, da formato digitale non standard, della completa struttura delle entrate lessicali del Grande Dizionario storico della Lingua Italiana (GDLI) di S. Battaglia. Sono riportati i risultati preliminari di una collaborazione tra l'Accademia della Crusca e Istituto di Linguistica Computazionale "A. Zampolli" del CNR, che mira a convertire i contenuti testuali in dati digitali strutturati per offrirli alla consultazione e allo studio degli utenti e/o per la successiva integrazione con altre risorse linguistiche, sia dizionari che corpora. Il processo di estrazione si articola da un lato nella definizione di procedure di estrazione dei dati, dall'altro nell'adozione di strategie finalizzate al supporto alla correzione degli errori.}, KEYWORDS = {Archivi digitali, recupero e conservazione, estrazione dell'informazione}, PAGES = {235-239}, URL = {https://publications.cnr.it/doc/455300}, DOI = {10.6092/unibo/amsacta/6316}, ISBN = {978-88-942535-4-2}, CONFERENCE_NAME = {IX Convegno annuale AIUCD: LA SVOLTA INEVITABILE: SFIDE E PROSPETTIVE PER L'INFORMATICA UMANISTICA}, CONFERENCE_PLACE = {Università Cattolica del Sacro Cuore, Milano}, CONFERENCE_DATE = {15-17/01/2020}, } @INPROCEEDINGS{BIANCHI_2020_INPROCEEDINGS_BCMPS_466816, AUTHOR = {Bianchi, S. and Calamai, S. and Monachini, M. and Pretto, N. and Stamuli, M. F.}, TITLE = {The grey-side of audio archives}, YEAR = {2020}, ABSTRACT = {Archives often include documents that can hardly be considered publications or grey literature as such, yet they maintain their documentary value and play a role of primary sources for the specialists. These documents, indeed, can help archivists to reveal the sedimentation process of the archive itself and to preserve the authentic context of the documentary production. They also appear to be very useful for the community of researchers and scholars. This happens more frequently with oral archives which include 'non-conventional sources', thus bringing together audio documents, fieldworks notes, correspondence, slipcases, analogic compact cassettes or open reels. At the cross-road of two disciplines, Archival Science and Grey Literature, this paper aims to argue the applicability of the concept of grey literature to this wide range of documentary materials, by showing the experience of Archivio Vi.Vo, a regional project aiming at building a model for archiving, preserving, managing and disseminating audio documents.}, KEYWORDS = {archivi orali}, URL = {https://publications.cnr.it/doc/466816}, VOLUME = {2020-November}, PUBLISHER = {TransAtlantic (Amsterdam, Paesi Bassi)}, ISSN = {1386-2316}, CONFERENCE_NAME = {GL2020 22nd International Conference on Grey Literature}, CONFERENCE_DATE = {20/11/2020}, BOOKTITLE = {The GL-conference series. Conference proceedings}, } @INPROCEEDINGS{BOSCHETTI_2020_INPROCEEDINGS_BDM_416444, AUTHOR = {Boschetti, F. and Del Gratta, R. and Monachini, M.}, TITLE = {Latin digital archives and research infrastructures: just a trendy option or a substantive need?}, YEAR = {2020}, ABSTRACT = {Latin digital archives and research infrastructures: just a trendy option or a substantive need?}, KEYWORDS = {Research Infrastructure, Digital Libraries, Latin}, URL = {https://publications.cnr.it/doc/416444}, CONFERENCE_NAME = {ALIM and beyond}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {27-28/01/2020}, } @INPROCEEDINGS{DIDONATO_2020_INPROCEEDINGS_DP_437795, AUTHOR = {Di Donato, F. and Pohle, S.}, TITLE = {GOTRIPLE: Building an innovative discovery platform for the social sciences and humanities}, YEAR = {2020}, ABSTRACT = {The proposed poster presents the goals of the TRIPLE project and the ways the project is addressing them both through the work of its 8 intertwined work packages, and via the collaboration with existing research infrastructures in SSH, i.e. mainly CESSDA, CLARIN and DARIAH.}, KEYWORDS = {GOTRIPLE, discovery, EOSC}, URL = {https://www.clarin.eu/sites/default/files/clarin2020_bazaar_didonato_triple.pdf}, CONFERENCE_NAME = {Clarin Annual Conference 2020}, CONFERENCE_DATE = {05/10/2020}, } @INPROCEEDINGS{FRONTINI_2020_INPROCEEDINGS_F_437563, AUTHOR = {Frontini, F.}, TITLE = {Dans les coulisses des infrastructures européennes en SHS. Rôle et opportunités pour les acteurs de la recherche (ingénieurs et chercheurs)}, YEAR = {2020}, ABSTRACT = {La composante technologique prend une dimension de jour en jour plus importante en LLASHS. Les projets de recherche sont de plus en plus nombreux à mobiliser de gros volumes de données exigeant des services adaptés garants de formes de méthodologies augmentées (exploitation, interopérabilité, accessibilité, archivage). Afin de partager les savoirs et de garantir l'interopérabilité et la préservation à long terme de ces ressources et services, de grandes infrastructures informatiques se mettent en place aux niveaux national et international. Dans cette présentation, vous allez découvrir le panorama, en la matière, des e-infrastructures et des grands projets européens à caractère infrastructurel, avec un accent particulier sur les technologies utilisées, les principaux services offerts, et les aspects les plus intéressants en termes de synergie entre approches et disciplines différentes. La présentation portera sur des ERICs (European Research Infrastructure Consortium) établis, comme CLARIN et DARIAH, et sur des projets récents ou en cours de développement, comme PARTHENOS, SSHOC, ELEXIS et TRIPLE. Concernant les aspects techniques, on abordera les questions liées au dépôt, au stockage, à l'identification (sigle sign on), aux formats et choix des métadonnées et de modélisation formelle, à la recherche fédérée des sources. Nous soulignerons en particulier l'interaction de ces projets avec les infrastructures nationales, notamment Huma-Num, ainsi qu'avec la récemment constituée European Open Science Cloud (EOSC). La présentation aura une visée pratique, avec l'objectif de fournir des indications concrètes aux acteurs de la recherche (chercheurs, ingénieurs...) qui souhaitent participer à ces initiatives et aux groupes de travail qui les animent, ou plus largement favoriser l'accès des chercheurs français aux nombreux services et opportunités offerts.}, KEYWORDS = {Infrastrutture di ricerca, Scienze umane e sociali}, URL = {https://ja-mate2020.sciencesconf.org/data/pages/Resume_Frontini_Nov.pdf}, CONFERENCE_NAME = {Journées annuelles du réseau Mate-shs (JA2020)}, CONFERENCE_PLACE = {Montpellier}, CONFERENCE_DATE = {10/11/2020}, } @INPROCEEDINGS{PIRRELLI_2020_INPROCEEDINGS_PCCDFGMNT_442758, AUTHOR = {Pirrelli, V. and Cappa, C. and Crepaldi, D. and Del Pinto, V. and Ferro, M. and Giulivi, S. and Marzi, C. and Nadalini, A. and Taxitari, L.}, TITLE = {Tracking the pace of reading with finger movements}, YEAR = {2020}, ABSTRACT = {Recent experimental evidence in visual perception analysis shows that eye and finger movements strongly correlate during scene exploration, at both individual and group levels. A familiar context which exploits this synergistic behaviour is when children learn to read, with the practice of finger-pointing to text as a support for their attention focus, directional movement and voice-print match. Using a tablet to display short texts, we collected evidence on the finger-pointing behaviour of 3rd-6th Italian graders engaged in both silent and oral reading. "Finger-tracking" data, sampled by the tablet and aligned with the text, made it possible to time a child's reading paceat word and sentence level. Results are shown to replicate established benchmarks in the reading literature, such as the difference in reading pace between age-matched typical and atypical readers as a function of word frequency and length, and neighbourhood entropy and Old20. Atypical readers show increasing difficulty with longer words, with a steeper time increment for word length > 6, integrating previous evidence. In addition, neighbourhood density plays a sparse facilitative role in atypical reading, with no significant interaction with neighbourhood entropy, pointing to a non trivial developmental interplay between sublexical reading and the richness of the Italian orthographic-phonological lexicon. Despite their different dynamics, optical and tactile strategies for text exploration prove to be highly congruent: this suggests that finger-tracking can be used as an ecological proxy for eye-tracking in reading assessment.}, KEYWORDS = {Reading, Finger tracking, Mental Lexicon, Word frequency, Word Length, Neighbourhood entropy}, PAGES = {1}, URL = {https://osf.io/hr62g/}, CONFERENCE_NAME = {Words in the World International Conference}, CONFERENCE_PLACE = {Montreal (Canada)}, CONFERENCE_DATE = {16-18/10/2020}, } @TECHREPORT{BARTOLINI_2020_TECHREPORT_BQMA_453502, AUTHOR = {Bartolini, R. and Quochi, V. and Monachini, M. and Affé, F.}, TITLE = {Relazione di fine progetto "PIM-Piattaforma Integrata Monitoraggio"}, YEAR = {2020}, ABSTRACT = {Il documento presenta l'attività svolta dal CNR-ILC nel ruolo di subcontraente di COMDATA per la realizzazione di moduli di trattamento automatico del linguaggio e la consulenza per l'integrazione di metodi di clustering automatico di documenti nella Digital Library del progetto PIM.}, KEYWORDS = {accesso intelligente al testo, digital library, natural language processing}, PAGES = {156}, URL = {https://publications.cnr.it/doc/453502}, } @TECHREPORT{CARDILLO_2020_TECHREPORT_CS_428576, AUTHOR = {Cardillo, F. A. and Straccia, U.}, TITLE = {Fuzzy OWL-BOOST: Learning Fuzzy Concept Inclusions via Real-Valued Boosting}, YEAR = {2020}, ABSTRACT = {OWL ontologies are nowadays a quite popular way to describe structured knowledge in terms of classes, relations among classes and class instances. In this paper, given a target class T of an OWL ontology, we address the problem of learning fuzzy concept inclusion axioms that describe sufficient conditions for being an individual instance of T. To do so, we present Fuzzy OWL-BOOST that relies on the Real AdaBoost boosting algorithm adapted to the (fuzzy) OWL case. We illustrate its effectiveness by means of an experimentation. An interesting feature is that the learned rules can be represented directly into Fuzzy OWL 2. As a consequence, any Fuzzy OWL 2 reasoner can then be used to automatically determine/classify (and to which degree) whether an individual belongs to the target class T.}, KEYWORDS = {Fuzzy Logic, Description Logics, OWL 2, Machine Learning, AdaBoost}, PAGES = {1-26}, URL = {https://arxiv.org/abs/2008.05297}, } @TECHREPORT{DIDONATO_2020_TECHREPORT_DLBCDEGGMMOTT_437796, AUTHOR = {Di Donato, F. and Lombardo, T. and Breitfuss, G. and Chen, Y. and Dumouchel, S. and Eskevich, M. and Gingold, A. and Gorgaini, E. and Monachini, M. and Moranville, Y. and Ocansey, J. T. and Thiel, C. and Tóth Czifra, E.}, TITLE = {TRIPLE D 6. 1-Report on the General Interoperability Requirements}, YEAR = {2020}, ABSTRACT = {TRIPLE - Transforming Research Through Innovative Practices for Linked Interdisciplinary Exploration is a EU funded project under the INFRAEOSC-02-2019 - Prototyping new innovative services topic, which started in October 2019 and will end in March 2023. Its main objective is to design and develop a discovery platform for SSH, called GOTRIPLE. This deliverable is the main outcome of Task 6.1 which started at M4 at ends at M36, whose aim is to deal with the definition and the set-up of general TRIPLE's interoperability requirements, considering all the components which are composing the TRIPLE ecosystem (data, resources and tools). As preliminary results, we thus present here a general overview of the main EOSC interoperability requirements and specifications, both coming from a mapping of the EOSC Working Groups outputs, and of the most relevant results of EOSC related projects related to TRIPLE. We also attempt to provide TRIPLE's answers, proposals and solutions to the above mentioned requirements. The final picture presents different levels of precision, which depends on the fluidity of the EOSC definition on the one hand, and on the consequent fact that some implications are still unclear, and a discussion on the measures to address EOSC requirements is still on-going. While tackling interoperability, we introduce TRIPLE in its context in order to locate the GOTRIPLE platform in the EOSC frame, and more specifically in the SSH cluster of the EOSC (section 1). Section 2 defines the general interoperability requirements, starting with the software (2.2) and then presents an analysis of the main outputs released by the EOSC Working groups (2.3), taking into consideration as a general reference, the EOSC Interoperability Framework, and, more specifically, the FAIR and Architecture WGs documents (2.3.2, 2.3.4). These are the main guiding references for the design and realization of the EOSC, considering specifically interoperability. Section 2.3.3 illustrates how TRIPLE is translating into practice the FAIR requirements, while section 2.3.5 is focused on TRIPLE current decisions regarding the integration of the TRIPLE solution into the EOSC. To present an enriched scenario, the section includes as well a brief overview of other relevant outputs released by the EOSC WGs (Landscape, RoP, Sustainability and Skills and Training) (2.3.6). With the aim to provide a comprehensive analysis of the EOSC interoperability requirements, the WP6 partners have analyzed relevant deliverables produced by the main EOSC related projects as preparatory activity. The analysis was useful to understand the EOSC environment and its evolution in terms of interoperability and at the same time to understand which external deliverables have to be taken into consideration for the overall project development in TRIPLE. Section 3 includes a synthesis of this work, which is fully presented in Annex I. Section 4 - Conclusions and Outlook, outlines TRIPLE's the next steps to achieve interoperability and the strategies that will be adopted.}, KEYWORDS = {TRIPLE, GOTRIPLE, EOSC}, URL = {https://zenodo.org/record/4322806}, } @MISC{BOSCHETTI_2020_MISC_BD_484370, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {Textual markup (typographic, structural, semantic: HTML, CSS, XML)}, YEAR = {2020}, ABSTRACT = {Lezione tenuta nel contesto dell Summer School organizzata nel 2020 dal centro Venice Centre for Digital and Public Humanities (VeDPH)}, KEYWORDS = {Digital Humanities, Public Humanities, XML/TEI, Digital Scholarly Edition, VeDPH}, URL = {https://vedph.github.io/summercamp/}, } @MISC{DELGROSSO_2020_MISC_D_484365, AUTHOR = {Del Grosso, A. M.}, TITLE = {Dalla Recensio all'Emendatio Digitale. Teoria, metodi e applicazioni della filologia digitale}, YEAR = {2020}, ABSTRACT = {Presentazione svolta nel contesto del ciclo di webinars di informatica umanistica a cura di G. Ferrante e A. Mazzucchi intitolato "La tecnologia informatica applicata alle scienze filologiche e librarie". Il seminario, inserito nel corso di Alta Formazione in Storia e Filologia del Manoscritto e del Libro Antico della Scuola Superiore Meridionale (Girolamini), introduce in 5 ore di lezione all'uso dei linguaggi di markup per la rappresentazione e conservazione digitale di risorse filologiche e di apparati critici.}, KEYWORDS = {Digital Humanities, Filologia Digitale, XML/TEI, Digital Scholarly Edition, Apparati Critici Digitali}, URL = {https://publications.cnr.it/doc/484365}, } @MISC{DELGROSSO_2020_MISC_D_484376, AUTHOR = {Del Grosso, A. M.}, TITLE = {Git per edizioni digitali collaborative su GitHub}, YEAR = {2020}, ABSTRACT = {This workshop will cover the basic functionalities of the software Git and the platform GitHub. Two days, each day divided into two parts of three hours each. Morning sessions: presentation of Git topics/features (theory and practice) Afternoon sessions: hands-on exercises (developing a dynamic and collaborative digital scholarly edition) Total hours for the workshop 12 hours. Objectives of the workshop: Learning the most relevant features regarding Git tool with references to GitHub platform. Git is a "distributed" Version Control System. It is able to handle the history of the changes made to tracked resources over a time period. Git allows users to manage the evolution of collaborative documents and to revisit and/or revert the content of these documents to a particular older state. GitHub is a development platform where it is possible to host and review Git repositories, and many more functionalities. First day topics: Introduction to systems for version control Preliminaries about command line environment Installing Git and testing the correctness of the local installation Git design model and the basic work-flow Local version control: git init, git status, git add, git commit, git log Local files: git rm, git mv Second day topics: Git help: git help and man pages Remote basics: GitHub, git remote, git push, git fetch, git pull Searching and examination: git diff, git grep Advanced log options Branching system: git branch, git checkout, git merge Handling the commit history: git rebase, git reset}, KEYWORDS = {git, github, Digital Scholarly Edition, Filologia Collaborativa, Filologia Digitale}, URL = {https://www.unive.it/data/33113/2/43767}, } @MISC{DELGROSSO_2020_MISC_DPCDFG_484385, AUTHOR = {Del Grosso, A. M. and Piccini, S. and Cosenza, G. and D'Ottavi, G. and Fadda, E. and Gambarara, D.}, TITLE = {Saussure General Linguistics Project: Beyond the Course. A Knowledge Site for Rethinking Saussure's General Linguistics}, YEAR = {2020}, ABSTRACT = {The project is primarily committed to establish a standard scholarly edition of Saussure's General Linguistics related texts, making use of a consistent philological methodology, i.e. aware of the profoundly different textualities of the available sources, that is: autograph unfinished notes for a book, didactic materials (students' notes and Saussure's), and an edited volume.}, KEYWORDS = {Digital Humaniteis, Digital Scholarly Edition, Saussure's manuscripts, Filologia Digitale, Filologia computazionale}, URL = {https://github.com/saussure-team/general-linguistics-project}, } @MISC{DELTURCO_2020_MISC_DCDDMMSZ_484347, AUTHOR = {Del Turco, R. R. and Cacioli, G. and Del Grosso, A. M. and Di Pietro, C. and Martignano, C. and Memaj, J. and Spinelli, F. and Zenzaro, S.}, TITLE = {EVT-Edition Visualization Technology 2 (v. beta 2)}, YEAR = {2020}, ABSTRACT = {EVT (Edition Visualization Technology) is a software for creating and browsing digital editions of manuscripts based on text encoded according to the TEI XML (http://www.tei-c.org/) schemas and Guidelines. This tool was born as part of the [Digital Vercelli Book] (http://vbd.humnet.unipi.it/) project in order to allow the creation of a digital edition of the Vercelli Book, a parchment codex of the late tenth century, now preserved in the Archivio e Biblioteca Capitolare of Vercelli and regarded as one of the four most important manuscripts of the Anglo-Saxon period as regards the transmission of poetic texts in the Old English language. To ensure that it will be working on all the most recent web browsers, and for as long as possible on the World Wide Web itself, EVT is built on open and standard web technologies such as HTML, CSS and JavaScript. Specific features, such as the image viewer, are entrusted to the most effective and stable ones (e.g. Openseadragon in the case of the image viewer), again chosen among the open source and best supported ones to reduce the risk of future incompatibilities. The general architecture of the software, in any case, is modular, so that any component which may cause trouble or turn out to be not completely up to the task can be replaced easily. This version is based on the AngularJS programming framework and implements the MVC (Model - View - Controller) design pattern.}, KEYWORDS = {edizioni scientifiche digitali, filologia digitale, critica testuale, filologia di testi medievali, edizioni diplomatiche, web-publishing, XML/TEI, HTML5, CSS, AngularJS, visualization software, manuscript viewer}, URL = {http://evt.labcd.unipi.it/}, } @ARTICLE{ALZETTA_2019_ARTICLE_ADMV_423880, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {INFERRING QUANTITATIVE TYPOLOGICAL TRENDS FROM MULTILINGUAL TREEBANKS. A CASE STUDY}, YEAR = {2019}, ABSTRACT = {In the past decades, linguistic typology went through a renewing phase that involved a significant change in the research questions and methods of the discipline, which is now interested in fine-grained features underlying language diversity. In this paper, we propose a novel approach to address the newly defined needs of linguistic typology by extracting qualitative and quantitative information about a wide range of features from multilingual annotated corpora based on Natural Language Processing methods and techniques. We tested our method in a case study focusing on word order variation in two widely investigated constructions, VERB-SUBJ(ect) and NOUN-ADJ(ective), with a specific view to structural and functional factors underlying the preference for one or the other order, both intra- and cross-linguistically, and their interaction. Preliminary experiments have been carried out aimed at acquiring typological evidence from a selection of linguistically annotated treebanks for three different languages, namely Italian, Spanish and English. Our results show the effectiveness of the method in letting similarities and differences also emerge from typologically close languages.}, KEYWORDS = {language typology, multilingual annotated corpora, linguistic knowledge extraction and modelling, word order variation}, PAGES = {209-242}, URL = {https://www.rivisteweb.it/doi/10.1418/95391}, VOLUME = {18}, DOI = {10.1418/95391}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{CHAVEZ_2019_ARTICLE_CRCZBB_398298, AUTHOR = {Chavez, A. G. and Ranieri, A. and Chiarella, D. and Zereik, E. and Babić, A. and Birk, A.}, TITLE = {CADDY Underwater Stereo-Vision Dataset for Human-Robot Interaction (HRI) in the Context of Diver Activities}, YEAR = {2019}, ABSTRACT = {In this article, we present a novel underwater dataset collected from several field trials within the EU FP7 project "Cognitive autonomous diving buddy (CADDY)", where an Autonomous Underwater Vehicle (AUV) was used to interact with divers and monitor their activities. To our knowledge, this is one of the first efforts to collect a large public dataset in underwater environments with the purpose of studying and boosting object classification, segmentation and human pose estimation tasks. The first part of the dataset contains stereo camera recordings (?10 K) of divers performing hand gestures to communicate with an AUV in different environmental conditions. The gestures can be used to test the robustness of visual detection and classification algorithms in underwater conditions, e.g., under color attenuation and light backscatter. The second part includes stereo footage (?12.7 K) of divers free-swimming in front of the AUV, along with synchronized measurements from Inertial Measurement Units (IMU) located throughout the diver's suit (DiverNet), which serve as ground-truth for human pose and tracking methods. In both cases, these rectified images allow the investigation of 3D representation and reasoning pipelines from low-texture targets commonly present in underwater scenarios. This work describes the recording platform, sensor calibration procedure plus the data format and the software utilities provided to use the dataset.}, KEYWORDS = {dataset, underwater imaging, image processing, marine robotics, field robotics, human-robot interaction, stereo vision, object classification, human pose estimation}, PAGES = {1-14}, URL = {https://www.mdpi.com/2077-1312/7/1/16}, VOLUME = {7}, DOI = {10.3390/jmse7010016}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2077-1312}, JOURNAL = {Journal of marine science and engineering}, } @ARTICLE{CININI_2019_ARTICLE_CCFFMMN_407448, AUTHOR = {Cinini, A. and Cutugno, P. and Ferraris, C. and Ferretti, M. and Marconi, L. and Morgavi, G. and Nerino, R.}, TITLE = {Final results of the NINFA project: impact of new technologies in the daily life of elderly people}, YEAR = {2019}, ABSTRACT = {Background:The paper presents the work carried out within NINFA (iNtelligent Integrated Network For Aged people), a project for the wellbeing of the elderly people at home. Aims:The impact of new technologies on elderly people is evaluated with respect to the three main topics faced by NINFA. Methods:NINFA was structured into three main topics: (1) active user engagement from the very beginning of the plan-ning stage: the use of specially designed questionnaires to evaluate the acceptability of new technology in general and robot caregiver specifically; (2) assessment of the well-being through non-invasive techniques: natural language processing for language change monitoring in elderly subjects; (3) automated assessment of motor and cognitive functions at home: systems to deliver tests and exergames through user interfaces compliant with elderly subjects. Results:The analysis shows that there is no a priori closure to support the technology, but it must not be invasive and must allow social interactions. The study of speech transcripts shows that a large variations in the number of words used to describe the same situation could be a sign on the onset of cognitive impairments. The specifically designed systems highlight, after the training period, significant improvements in the performances of the participants and a satisfaction with regards to the systems usability. Conclusions: The outcomes of NINFA project highlight some important aspects of the relationship between elderly people and new technologies concerning: engagement and acceptability, assessment of the wellbeing and of the modifications of motor, cognitive and language functions.}, KEYWORDS = {User Engagement, Wellbeing assessment, Linguistic and Cognitive Analysis, Movement Analysis, Exergames, At-Home monitoring, Postural stability}, PAGES = {1-10}, URL = {https://link.springer.com/content/pdf/10.1007/s40520-019-01357-6.pdf}, DOI = {10.1007/s40520-019-01357-6}, PUBLISHER = {Editrice Kurtis (Milano, Italia)}, ISSN = {1720-8319}, JOURNAL = {Aging Clinical and Experimental Research (Online)}, } @ARTICLE{CONNOR_2019_ARTICLE_CVCR_403045, AUTHOR = {Connor, R. and Vadicamo, L. and Cardillo, F. A. and Rabitti, F.}, TITLE = {Supermetric search}, YEAR = {2019}, ABSTRACT = {Metric search is concerned with the efficient evaluation of queries in metric spaces. In general, a large space of objects is arranged in such a way that, when a further object is presented as a query, those objects most similar to the query can be efficiently found. Most mechanisms rely upon the triangle inequality property of the metric governing the space. The triangle inequality property is equivalent to a finite embedding property, which states that any three points of the space can be isometrically embedded in two-dimensional Euclidean space. In this paper, we examine a class of semimetric space which is finitely four-embeddable in three-dimensional Euclidean space. In mathematics this property has been extensively studied and is generally known as the four-point property. All spaces with the four-point property are metric spaces, but they also have some stronger geometric guarantees. We coin the term supermetric(1) space as, in terms of metric search, they are significantly more tractable. Supermetric spaces include all those governed by Euclidean, Cosine,(2) Jensen-Shannon and Triangular distances, and are thus commonly used within many domains. In previous work we have given a generic mathematical basis for the supermetric property and shown how it can improve indexing performance for a given exact search structure. Here we present a full investigation into its use within a variety of different hyperplane partition indexing structures, and go on to show some more of its flexibility by examining a search structure whose partition and exclusion conditions are tailored, at each node, to suit the individual reference points and data set present there. Among the results given, we show a new best performance for exact search using a well-known benchmark. (C) 2018 Elsevier Ltd. All rights reserved.}, KEYWORDS = {Similarity search, Metric space, Supermetric space, Metric indexing, Four-point property, Hilbert Exclusion}, PAGES = {108-123}, URL = {https://www.sciencedirect.com/science/article/pii/S0306437917301588?via%3Dihub}, VOLUME = {80}, DOI = {10.1016/j.is.2018.01.002}, PUBLISHER = {Pergamon (Oxford, Regno Unito)}, ISSN = {0306-4379}, JOURNAL = {Information systems (Oxf.)}, } @ARTICLE{DELGROSSO_2019_ARTICLE_DCCDGMSS_427276, AUTHOR = {Del Grosso, A. M. and Capizzi, E. and Cristofaro, S. and De Luca, M. R. and Giovannetti, E. and Marchi, S. and Seminara, G. and Spampinato, D.}, TITLE = {Bellini's Correspondence: a Digital Scholarly Edition for a Multimedia Museum}, YEAR = {2019}, ABSTRACT = {Within the "Museo Virtuale della Musica BellinInRete" project, a corpus of letters, written by the renowned composer Vincenzo Bellini (1801-1835) from Catania, will be encoded and made publicly available. This contribution aims at illustrating the part of the project regarding the implementation of the prototype for the metadata and text encoding, indexing and visualisation of Bellini's correspondence. The encoding scheme has been defined according to the latest guidelines of the Text Encoding Initiative and it has been instantiated on a sample of letters. Contextually, a first environment has been implemented by customizing two open source tools: Edition Visualization Technology and Omega Scholarly platform. The main objective of the digital edition is to engage general public with the cultural heritage held by the Belliniano Civic Museum of Catania. This wide access to Bellini's correspondence has been conceived preserving the scholarly transcriptions of the letters edited by Seminara within her most recent critical edition (Olschki, 2017). The digital edition of the corpus takes care of handling the correspondence metadata by means of the correspDesc TEI tagset. Finally, Bellini's letters will be accessible via the Web platform as well as integrated into a forthcoming interactive and multimedia tour hosted at the museum.}, KEYWORDS = {digital scholarly edition, correspondence, Digital and Computational Philology, Software Design, Vincenzo Bellini, Music, Multimedia Museum}, PAGES = {23-47}, URL = {https://umanisticadigitale.unibo.it/article/view/9162/9918}, VOLUME = {7}, DOI = {10.6092/issn.2532-8816/9162}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @ARTICLE{DELLORLETTA_2019_ARTICLE_DGMMRSV_423874, AUTHOR = {Dell'Orletta, F. and Greco, S. and Montemagni, S. and Morini, E. and Rossi, F. and Sagri, M. T. and Venturi, G.}, TITLE = {Le parole del miglioramento. Come le scuole descrivono il cambiamento}, YEAR = {2019}, ABSTRACT = {Il presente contributo intende illustrare i risultati di una ricerca condotta con l'uso di strumenti di trattamento automatico del linguaggio (Natural Language Processing: nlp) su quanto dichiarato dalle scuole in circa 2500 Piani di Miglioramento (modello indire ) con l'obiettivo di comprendere le scelte strategiche in un'ottica di miglioramento continuo. Il disegno d'analisi permette di restituire sia una visione complessiva dei Piani di Miglioramento che approfondimenti qualitativi di confronto tra tipologie di scuola e aree geografiche e relativi a tematiche strategiche quali formazione e innovazione.}, KEYWORDS = {Piano di Miglioramento, Natural Language Processing, Formazione, Innovazione}, PAGES = {47-68}, URL = {https://www.rivistainfanzia.it/pvw/app/default/pvw_sito.php?sede_codice=1PWPSE01\&page=2432193}, VOLUME = {1/2019}, PUBLISHER = {Edizioni Centro Studi Erickson (Gardolo (TN), Italia)}, ISSN = {1971-3711}, JOURNAL = {Psicologia dell'educazione}, } @ARTICLE{DELLAGALA_2019_ARTICLE_DCDPV_423878, AUTHOR = {Della Gala, V. and Chiriatti, G. and Dell'Orletta, F. and Pettenati, M. C. and Venturi, G.}, TITLE = {Analytics dei testi riflessivi scritti dai docenti neoassunti nel portfolio digitale}, YEAR = {2019}, ABSTRACT = {Presentiamo i risultati preliminari e l'analisi svolta su circa 50.000 testi scritti dai docenti neo nominati in ruolo per riflettere su due attività didattiche svolte con gli studenti, nel contesto del percorso dell'anno di formazione e prova 2016/17. Il percorso prevede attività in presenza e attività a distanza completate sul portfolio digitale, ospitato nell'ambiente online gestito dall'Indire. Nell'ambito del monitoraggio della formazione, con il fine di ottimizzare gli strumenti e il supporto fornito, abbiamo interrogato i dati testuali prodotti dai docenti nell'interazione con l'ambiente per capire se i testi presentassero evidenze riconducibili alle scritture riflessive. Obiettivi dell'indagine sono stati la definizione di uno schema per la classificazione dei testi sulla base del livello di riflessività evidenziato e l'impiego di strumenti di Trattamento Automatico del Linguaggio (TAL) per l'analisi dell'interocorpus testuale prodotto dai docenti. Descriveremo il contesto scientifico e progettuale,le caratteristiche dei dati analizzati, come questo abbia determinato il disegno d'indagine;descriveremo inoltre la sua implementazione e dunque le procedure, gli strumenti e le metriche adottate o elaborate per rappresentare il contenuto dei dati; infine discuteremo i primi risultati e alcuni vantaggi e limiti dell'approccio adottato.}, KEYWORDS = {Teacher professional development, Natural Language Processing, Reflective writing, Linguistic Profiling, Document Classification}, PAGES = {187-204}, URL = {https://ojs.pensamultimedia.it/index.php/sird/article/view/3454/3360}, VOLUME = {Special issue}, DOI = {10.7346/SIRD-2S2019-P189}, PUBLISHER = {Pensa Multimedia (Lecce, Italia)}, ISSN = {2038-9744}, JOURNAL = {Giornale italiano della ricerca educativa (Online)}, } @ARTICLE{GOGGI_2019_ARTICLE_GPBMBC_411599, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic Query Analysis from the Global Science Gateway}, YEAR = {2019}, ABSTRACT = {Nowadays web portals play an essential role in searching and retrieving information in the several fields of knowledge: they are ever more technologically advanced and designed for supporting the storage of a huge amount of information in natural language originating from the queries launched by users worldwide. Given this scenario, we focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal: the aim is to retrieve information related to social media which as of today represent a considerable source of data more and more widely used for research ends.}, KEYWORDS = {Information Extraction, Query Log, WorldWideScience Alliance, Information gateways, Social Media}, PAGES = {147-155}, URL = {https://publications.cnr.it/doc/411599}, VOLUME = {15}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{MARZI_2019_ARTICLE_MFP_406277, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {A processing-oriented investigation of inflectional complexity}, YEAR = {2019}, ABSTRACT = {Due to the typological diversity of their inflectional processes, some languages are intuitively more difficult than other languages. Yet, finding a single measure to quantitatively assess the comparative complexity of an inflectional system proves an exceedingly difficult endeavor. In this paper we propose to investigate the issue from a processing-oriented standpoint, using data processed by a type of recurrent neural network to quantitatively model the dynamic of word processing and learning in different input conditions. We evaluate the relative complexity of a set of typologically different inflectional systems (Greek, Italian, Spanish, German, English and Standard Modern Arabic) by training a Temporal Self-Organizing Map (TSOM), a recurrent variant of Kohonen's Self-Organizing Maps, on a fixed set of verb forms from top-frequency verb paradigms, with no information about the morphosemantic and morphosyntactic content conveyed by the forms. After training, the behavior of each language-specific TSOM is assessed on different tasks, looking at self-organizing patterns of temporal connectivity and functional responses. Our simulations show that word processing is facilitated by maximally contrastive inflectional systems, where verb forms exhibit the earliest possible point of lexical discrimination. Conversely, word learning is favored by a maximally generalizable system, where forms are inferred from the smallest possible number of their paradigm companions. Based on evidence from the literature and our own data, we conjecture that the resulting balance is the outcome of the interaction between form frequency and morphological regularity. Big families of stem-sharing, regularly inflected forms are the productive core of an inflectional system. Such a core is easier to learn but slower to discriminate. In contrast, less predictable verb forms, based on alternating and possibly suppletive stems, are easier to process but are learned by rote. Inflection systems thus strike a balance between these conflicting processing and communicative requirements, while staying within tight learnability bounds, in line with Ackermann and Malouf's Low Conditional Entropy Conjecture. Our quantitative investigation supports a discriminative view of morphological inflection as a collective, emergent system, whose global self-organization rests on a surprisingly small handful of language-independent principles of word coactivation and competition.}, KEYWORDS = {Morphological complexity, Discriminative learning, Recurrent neural networks (RNNs), self-organization, emergence, processing uncertainty, stem-family size}, PAGES = {1-23}, URL = {https://www.frontiersin.org/articles/10.3389/fcomm.2019.00048/full}, VOLUME = {4}, DOI = {10.3389/fcomm.2019.00048}, PUBLISHER = {Frontiers Media (Lausanne, Svizzera)}, ISSN = {2297-900X}, JOURNAL = {Frontiers in communication}, } @ARTICLE{SAURI_2019_ARTICLE_SMRB_407031, AUTHOR = {Sauri, R. and Mahon, L. and Russo, I. and Bitinis, M.}, TITLE = {Cross-dictionary linking at sense level with a double-layer classifier}, YEAR = {2019}, ABSTRACT = {We present a system for linking dictionaries at the sense level, which is part of a wider programme aiming to extend current lexical resources and to create new ones by automatic means. One of the main challenges of the sense linking task is the existence of non one-to-one mappings among senses. Our system handles this issue by addressing the task as a binary classification problem using standard Machine Learning methods, where each sense pair is classified independently from the others. In addition, it implements a second, statistically-based classification layer to also model the dependence existing among sense pairs, namely, the fact that a sense in one dictionary that is already linked to a sense in the other dictionary has a lower probability of being linked to a further sense. The resulting double-layer classifier achieves global Precision and Recall scores of 0.91 and 0.80, respectively.}, KEYWORDS = {Word sense linking, word sense mapping, lexical translation, lexical resources, language data construction, Word sense linking, word sense mapping, lexical translation, lexical resources, language data construction, multilingual data, word sense linking, word sense mapping, lexical translation, lexical resources, language data construction}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85068085716\&origin=inward}, VOLUME = {70}, DOI = {10.4230/OASIcs.LDK.2019.20}, PUBLISHER = {Schloss Dagstuhl, Leibniz-Zentrum für Informatik (Wadern, Germania)}, ISSN = {2190-6807}, JOURNAL = {Open access series in informatics}, } @ARTICLE{SPRUGNOLI_2019_ARTICLE_SPBD_403257, AUTHOR = {Sprugnoli, R. and Pardelli, G. and Boschetti, F. and Del Gratta, R.}, TITLE = {Un'Analisi Multidimensionale della Ricerca Italiana nel Campo delle Digital Humanities e della Linguistica Computazionale}, YEAR = {2019}, ABSTRACT = {This article proposes the first comparative study of four years of Italian conferences in the fields of Digital Humanities and Computational Linguistics. More specifically, we created a corpus consisting of the contributions presented in the AIUCD and CLiC-it conferences between 2014 and 2017 to which we applied a multidimensional analysis taking into consideration: (i) the study of collaborations between authors using social networks analysis techniques, (ii) the automatic extraction of terminology and information and (iii) the examination of citational practices. By combining both qualitative and quantitative methods of investigation, this paper aims to shed light on convergences and discrepancies between two research areas that historically have common origins.}, KEYWORDS = {Digital Humanities, Computational Linguistics, Comparative study}, PAGES = {59-89}, URL = {https://umanisticadigitale.unibo.it/article/view/8581}, VOLUME = {5}, DOI = {10.6092/issn.2532-8816/8581}, ISSN = {2532-8816}, JOURNAL = {Umanistica Digitale}, } @ARTICLE{STEFANINI_2019_ARTICLE_SNM_452483, AUTHOR = {Stefanini, A. E. and Nicolosi, A. and Monachini, M.}, TITLE = {A Mock-up for the Development of a Digital Edition for Ancient Greek Fragmentary Poetry: Results of Its Evaluation}, YEAR = {2019}, ABSTRACT = {Ancient Greek poetry is an essential part of the western cultural heritage; thus, it is important that people have access to its texts and whatever relates to their understanding in a reliable and easy way. Whenever user evaluation is concerned, mock-ups are used by designers to acquire feedback from users. A mock-up is defined as a model of the final product, and may be used for demonstration, evaluation and other purposes. The authors prototyped a mock-up for focusing on the requirements of a scholarly digital edition of Archilochus. This was put under evaluation to assess its usability: it was submitted to extensive use and testing by a sample of prospective users, to better focus on the requirements from a product's perspective. Experimentation involved a group of university students, attending a Greek Philology course at Parma University. More than half of the respondents considered the mock-up a useful study support. The evaluation also pointed out that the mock-up had to be revised, so as to guarantee better cognitive simplicity of the user interface.}, KEYWORDS = {Ancient Greek Poetry, Digital Edition, Greek Philology, Digital Humanities, Digital Philology, Didactics, Evaluation}, PAGES = {41-57}, URL = {https://www.igi-global.com/article/a-mock-up-for-the-development-of-a-digital-edition-for-ancient-greek-fragmentary-poetry/237162}, VOLUME = {8}, DOI = {10.4018/IJACDT.2019070103}, PUBLISHER = {IGI Global (Hershey, PA, Stati Uniti d'America)}, ISSN = {2155-420X}, JOURNAL = {International journal of art, culture and design technologies (Online)}, } @BOOK{BERZINS_2019_BOOK_BCGLMPRSSVV_443016, AUTHOR = {Berzins, A. and Choukri, K. and Giagkou, M. and Lösch, A. and Mazo, H. and Piperidis, S. and Rigault, M. and Schnur, E. and Smal, L. and Van Genabith, J. and Vasiljevs, A.}, TITLE = {ELRC White Paper-Sustainable Language Data Sharing to Support Language Equality in Multilingual Europe}, YEAR = {2019}, ABSTRACT = {The ELRC White Paper "Sustainable Language Data Sharing to Support Language Equality in Multilingual Europe - Why Language Data Matters" provides an analysis of European practices for sharing language data and the corresponding challenges, as well as clear recommendations for policy-level decision-makers on how to overcome these challenges.}, KEYWORDS = {Sustainable Language Data Sharing Language Equality Multilingual Europe}, URL = {https://www.lr-coordination.eu/sites/default/files/Documents/ELRCWhitePaper.pdf?lang=bg}, ISBN = {978-3-943853-05-6}, } @INCOLLECTION{BELLANDI_2019_INCOLLECTION_BMK_429245, AUTHOR = {Bellandi, A. and Monachini, M. and Khan, F.}, TITLE = {LexO: Where Lexicography Meets the Semantic Web}, YEAR = {2019}, ABSTRACT = {LexO is a collaborative web editor used for the creation and management of (multilingual) lexical and terminological resources as linked data resources. The editor makes use of Semantic Web technologies (which enrich web data with semantic information in order to make them machine readable) and the linked data publishing paradigm in order to ensure that lexical resources can be more easily shared and reused by the scientific community.}, KEYWORDS = {Semantic Web technologies, multilingual lexical resources, collaborative web editor}, PAGES = {43-47}, URL = {https://publications.cnr.it/doc/429245}, VOLUME = {2}, BOOKTITLE = {Tour de CLARIN volume two}, EDITOR = {Fiser, D. and Lenardic, J.}, } @INCOLLECTION{GIOVANNETTI_2019_INCOLLECTION_G_457777, AUTHOR = {Giovannetti, E.}, TITLE = {Traduzione Talmud Babilonese}, YEAR = {2019}, ABSTRACT = {Traduzione Talmud Babilonese è un progetto di ricerca che ha come obiettivo la traduzione in lingua italiana del Talmud Babilonese, un testo fondamentale della cultura ebraica non solo in campo religioso ma che tocca anche ogni aspetto della conoscenza umana, dalla giurisprudenza alla scienza, dalla filosofia alla vita di tutti i giorni}, KEYWORDS = {traduzione assistita dal calcolatore, talmud babilonese}, PAGES = {126-126}, URL = {https://publications.cnr.it/doc/457777}, VOLUME = {1}, DOI = {10.36173/PLURIMI-2019-1}, PUBLISHER = {CNR EDIZIONI (ROMA, ITA)}, ISBN = {9788880803775}, BOOKTITLE = {Linguaggi, ricerca, comunicazione. Focus CNR}, EDITOR = {Cadeddu, M. E. and Marras, C.}, } @INCOLLECTION{RUSSO_2019_INCOLLECTION_RMCM_429036, AUTHOR = {Russo, I. and Marconi, L. and Cutugno, P. and Monachini, M.}, TITLE = {Le parole sono ponti: risorse digitali per l'integrazione in contesti multilingue}, YEAR = {2019}, ABSTRACT = {Nel presente lavoro esporremo due esperienze inerenti all'uso e alla produzione di risorse linguistiche multilingui, svolte da alcuni ricercatori dell'Istituto di Linguistica Computazionale "Antonio Zampolli" (ilc) del cnr. Più nello specifico verrà descritta la realizzazione di un glossario nell'ambito del progetto Ascolto Accoglienza Azioni Offresi (aaa Offresi) e l'uso sperimentale di ImagAct (Moneglia et alii 2012) - una risorsa lessicale multilingue sui verbi d'azione - in una scuola primaria caratterizzata da una forte presenza di alunni stranieri. Il fine della ricerca è quello di favorire l'emergere delle competenze metalinguistiche degli apprendenti, valorizzando la diversità linguistica e culturale.}, KEYWORDS = {Multilingual lexical resources, I2 teaching, Translanguaging, Public administration terminology}, PAGES = {127-136}, URL = {https://publications.cnr.it/doc/429036}, VOLUME = {I}, DOI = {10.36173/PLURIMI-2019-1/09}, PUBLISHER = {Consiglio Nazionale delle Ricerche (Roma, ITA)}, ISBN = {9788880803775}, BOOKTITLE = {Linguaggi, ricerca, comunicazione. Focus CNR}, EDITOR = {Cadeddu, M. E. and Marras, C.}, } @INPROCEEDINGS{ALZETTA_2019_INPROCEEDINGS_ADMV_423881, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Dissecting Treebanks to Uncover Typological Trends. A Multilingual Comparative Approach}, YEAR = {2019}, ABSTRACT = {Over the last years, linguistic typology started attracting the interest of the community working on cross- and multi-lingual NLP as a way to tackle the bottleneck deriving from the lack of annotated data for many languages. Typological information is mostly acquired from publicly accessible typological databases, manually constructed by linguists. As reported in Ponti et al. (2018), despite the abundant information contained in them for many languages, these resources suffer from two main shortcomings, i.e. their limited coverage and the discrete nature of features (only "the majority value rather than the full range of possible values and their corresponding frequencies" is reported). Corpus-based studies can help to automatically acquire quantitative typological evidence which might be exploited for polyglot NLP. Recently, the availability of corpora annotated following a cross-linguistically consistent annotation scheme such as the one developed in the Universal Dependencies project is prompting new comparative linguistic studies aimed to identify similarities as well as idiosyncrasies among typologically different languages (Nivre, 2015). The line of research described here is aimed at acquiring quantitative typological evidence from UD treebanks through a multilingual contrastive approach.}, KEYWORDS = {Natural Language Processing, Linguistic Typology}, PAGES = {1-3}, URL = {https://typology-and-nlp.github.io/2019/assets/2019/papers/5.pdf}, ISBN = {978-1-950737-29-1}, CONFERENCE_NAME = {1st TyP-NLP: The Workshop on Typology for Polyglot NLP, ACL workshop}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {01/08/2019}, } @INPROCEEDINGS{BOSCHETTI_2019_INPROCEEDINGS_BPV_409872, AUTHOR = {Boschetti, F. and Pardelli, G. and Venturi, G.}, TITLE = {Nove Anni di jTEI: What's New?}, YEAR = {2019}, ABSTRACT = {Questo contributo illustra metodi e strumenti per studiare il cambiamento diacronico degli interessi di ricerca della comunità TEI grazie all'uso di metodi di estrazione automatica della terminologia da corpora di dominio.}, KEYWORDS = {Natural Language Processing, Digital Humanities}, PAGES = {1-6}, URL = {http://ceur-ws.org/Vol-2481}, VOLUME = {Vol-2481 urn: nbn: de: 0074-2481-7}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, CONFERENCE_NAME = {CLiC-it 2019-Sesta Conferenza Italiana di Linguistica Computazionale}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {13-15/11/2019}, BOOKTITLE = {CLiC-it 2019 Italian Conference on Computational Linguistics}, EDITOR = {Bernardi, R. and Navigli, R. and Semeraro, G.}, } @INPROCEEDINGS{CERNIGLIA_2019_INPROCEEDINGS_CCCMMDF_403916, AUTHOR = {Cerniglia, A. and Chiarella, D. and Cutugno, P. and Marconi, L. and Magrini, A. and Di Feo, G. and Ferretti, M.}, TITLE = {QUESTIONNAIRE ANALYSIS TO DEFINE THE MOST SUITABLE SURVEY FOR PORT-NOISE INVESTIGATION}, YEAR = {2019}, ABSTRACT = {The high level of noise pollution affecting the areas between ports and logistic platforms represents a problem that can be faced from different points of view. Acoustic monitoring, mapping, short-term measurements, port and road traffic flows analyses can give useful indications on the strategies to be proposed for a better management of the problem. A survey campaign through the preparation of questionnaires to be submitted to the population exposed to noise in the back-port areas will help to better understand the subjective point of view. The paper analyses a sample of questions suitable for the specific research, chosen as part of the wide database of questionnaires internationally proposed for subjective investigations. The preliminary results of a first data collection campaign are consid-ered to verify the adequacy of the number, the type of questions, and the type of sample noise used for the survey. The questionnaire will be optimized to be distributed in the TRIPLO project (TRans-ports and Innovative sustainable connections between Ports and LOgistic platforms). The results of this survey will be the starting point for the linguistic investigation carried out in combination with the acoustic monitoring, to improve understanding the connections between personal feeling and tech-nical aspects.}, KEYWORDS = {port noise, acoustic monitoring, subjective survey, psychoacoustics}, URL = {https://publications.cnr.it/doc/403916}, ISBN = {978-1-9991810-0-0}, CONFERENCE_NAME = {26th International Congress on Sound \& Vibration}, CONFERENCE_PLACE = {Montréal, Canada}, CONFERENCE_DATE = {7-11/07/2019}, } @INPROCEEDINGS{CHIRIATTI_2019_INPROCEEDINGS_CBDV_423885, AUTHOR = {Chiriatti, G. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {What makes a review helpful? Predicting the helpfulness of Italian tripadvisor reviews}, YEAR = {2019}, ABSTRACT = {In this paper we introduce a classification system devoted to predict the helpfulness of Italian online reviews. It is based on a wide set of features reflecting the different factors involved and tested on different categories of TripAdvisor reviews. For this purpose, we collected the first Italian corpus of online reviews enriched with metadata related to their helpfulness and we carried out an in-depth analysis of the most predictive features.}, KEYWORDS = {Natural Language Processing, Documenti Classification, Linguistic Profiling}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85074834351\&origin=inward}, VOLUME = {2481}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {6th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {13-15/11/2019}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{CUTUGNO_2019_INPROCEEDINGS_CMFC_400412, AUTHOR = {Cutugno, P. and Marconi, L. and Ferretti, M. and Chiarella, D.}, TITLE = {Estudios lingüísticos en antologías narrativas sobre la experiencia del viaje}, YEAR = {2019}, ABSTRACT = {Las antologías "Partire: Antologia narrativa di geografia emozionale" y "Partire: Antologia illustrata per eterni viaggiatori" fueron elaboradas por el CTS: Centro turístico Studentesco e Giovanile, en 2009, 2010, 2011 y 2013. Las cuatro obras relacionan historias, imágenes y pequeñas frases referidas con un "viaje de los sueños", que contiene el lugar y por qué ese viaje es el de los deseos. Cada obra contiene imágenes o historias relacionadas con temas específicos en los que se coloca cada narración; para cada una de las ediciones se anzó un concurso para estimular la participación en la redacción de las narraciones de jóvenes italianos entre dieciocho y treinta y cinco años sobre el tema del viaje. En otro artículo previamente producido, fueron analizados los componentes gramaticales de las contribuciones escritas. Los objetivos fueron identificar las posibles diferencias de las partes del discurso en los textos que componen los distintos volúmenes y, al mismo tiempo, tratar de establecer para los diversos textos, dentro de qué gama de valores se coloca la relación entre sustantivos y verbos. El análisis cualitativo y cuantitativo de las palabras más frecuentes permitió monitorear cómo algunos de los relatos de viaje se convierten en reflexiones introspectivas cambiando el enfoque de los que escriben del viaje en el mundo a los que lo hacen sobre el viaje de la vida y viceversa.}, KEYWORDS = {viaggio, analisi linguistica, linguistica computazionale}, PAGES = {117-120}, URL = {https://publications.cnr.it/doc/400412}, ISBN = {9789597174363}, CONFERENCE_NAME = {XVI° Simposio Internacional de Comunicación Social}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {21-25/01/2019}, } @INPROCEEDINGS{FIEROMONTE_2019_INPROCEEDINGS_FBDV_423883, AUTHOR = {Fieromonte, M. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Italian and English sentence simplification: How many differences?}, YEAR = {2019}, ABSTRACT = {The paper proposes a cross-linguistic analysis of two parallel monolingual corpora conceived for automatic text simplification in two languages, Italian and English. The aim is to find similarities and differences in the process of simplification in two typologically different languages. To carry out the comparison, 1,000 sentences were extracted from the two corpora and annotated with a scheme previously used to annotate simplification phenomena..}, KEYWORDS = {Natural Language Processing, Automatic Text Simplification}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85074816689\&origin=inward}, VOLUME = {2481}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {6th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {13-15/11/2019}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{GOGGI_2019_INPROCEEDINGS_GPBMBC_400343, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic query analysis from the global science gateway}, YEAR = {2019}, ABSTRACT = {We focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal.}, KEYWORDS = {Information Extraction, Terminology}, PAGES = {105-113}, URL = {https://publications.cnr.it/doc/400343}, VOLUME = {20}, ISBN = {978-90-77484-33-3}, CONFERENCE_NAME = {GL20-Twentieth International Conference on Grey Literature: Research Data Fuels and Sustains Grey Literature}, CONFERENCE_PLACE = {New Orleans, USA}, CONFERENCE_DATE = {3-4 December 2018}, BOOKTITLE = {Research Data Fuels and Sustains Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GRECO_2019_INPROCEEDINGS_GMLSV_415175, AUTHOR = {Greco, A. and Marzi, C. and Lanata, A. and Scilingo, E. P. and Vanello, N.}, TITLE = {Combining Electrodermal Activity and Speech Analysis towards a more Accurate Emotion Recognition System}, YEAR = {2019}, ABSTRACT = {Current research in the emotion recognition field is exploring the possibility of merging the information from physiological signals, behavioural data, and speech. Electrodermal activity (EDA) is amongst the main psychophysiological arousal indicators. Nonetheless, it is quite difficult to be analyzed in ecological scenarios, like, for instance, when the subject is speaking. On the other hand, speech carries relevant information of subject emotional state and its potential in the field of affective computing is still to be fully exploited. In this work, we aim at exploring the possibility of merging the information from electrodermal activity (EDA) and speech to improve the recognition of human arousal level during the pronunciation of single affective words. Unlike the majority of studies in the literature, we focus on speakers' arousal rather than the emotion conveyed by the spoken word. Specifically, a support vector machine with recursive feature elimination strategy (SVM-RFE) is trained and tested on three datasets, i.e. using the two channels (i.e., speech and EDA) separately and then jointly. The results show that the merging of EDA and speech information significantly improves the marginal classifier (+11.64%). The six selected features by the RFE procedure will be used for the development of a future multivariate model of emotions.}, KEYWORDS = {emotion recognition, feature selection, pattern classification, physiology, psychology, support vector machines, human arousal level, single affective words, EDA, electrodermal activity, speech analysis, emotion recognition system, speech processing}, PAGES = {229-232}, URL = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=\&arnumber=8857745\&isnumber=8856280}, VOLUME = {41st Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC)}, DOI = {10.1109/EMBC.2019.8857745}, PUBLISHER = {IEEE Service Center (Piscataway, NJ, Stati Uniti d'America)}, ISSN = {1557-170X}, ISBN = {978-1-5386-1311-5}, CONFERENCE_NAME = {41st Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC)}, CONFERENCE_PLACE = {Berlin, Germany}, CONFERENCE_DATE = {23-27 July 20}, BOOKTITLE = {Conference proceedings (IEEE Eng. Med. Biol. Soc., Conf.)}, } @INPROCEEDINGS{MAGRINI_2019_INPROCEEDINGS_MDCMCCF_403914, AUTHOR = {Magrini, A. and Di Feo, G. and Cerniglia, A. and Marconi, L. and Cutugno, P. and Chiarella, D. and Ferretti, M.}, TITLE = {INDAGINE SOGGETTIVA FINALIZZATA ALLA VALUTAZIONE DEL DISTURBO DA RUMORE NELLE ZONE RETROPORTUALI}, YEAR = {2019}, ABSTRACT = {Vengono presentati alcuni risultati preliminari di una ricerca sulla percezione del rumore in zone retroportuali, che si inserisce nelle azioni previste nell'ambito del progetto TRIPLO (Programma Interreg Italia-Francia Marittimo 2014-2020). Le valutazioni riguardano le prime fasi di realizzazione e somministrazione di un questionario preliminare, realizzato su piattaforma web, e delle relative risposte: attraverso questo strumento si vogliono mettere in relazione rumori e percezione soggettiva, mediante l'uso di termini linguistici ritenuti più appropriati dai soggetti intervistati.}, KEYWORDS = {port noise, acoustic monitoring, subjective survey, psychoacoustics}, PAGES = {2}, URL = {https://publications.cnr.it/doc/403914}, ISBN = {978-88-88942-59-9}, CONFERENCE_NAME = {46° Convegno Nazionale Associazione Italiana di Acustica}, CONFERENCE_PLACE = {Pesaro}, CONFERENCE_DATE = {29-31 maggio 2019}, } @INPROCEEDINGS{MARZI_2019_INPROCEEDINGS_MGSV_430473, AUTHOR = {Marzi, C. and Greco, A. and Scilingo, E. P. and Vanello, N.}, TITLE = {Electrodermal activity and speech features as predictors for arousal level changes after affective word pronunciation}, YEAR = {2019}, ABSTRACT = {This work explores the possibility of estimating subject arousal through the analysis of speech and electrodermal activity (EDA). One critical issue to be clarified is the reliability of EDA signal during speech production. To accomplish this task, a relation among EDA, speech activity and subject arousal during isolated affective word pronunciation task, will be investigated. The results show that significant information on subject arousal can be still obtained by analyzing EDA during speech. In fact, a significant relationship between EDA features and self-reported arousal can be observed. In addition, a quantitative linear model relating EDA- and speech-related features could be identified. These preliminary results indicate how the analysis of concurrent acquisition of EDA and speech deserves further attention and could offer a valid approach for the prediction of subject arousal during speech production, as a method for validating self-assessment ratings.}, KEYWORDS = {electrodermal activity, regression model, word pronunciation, arousal, speech}, PAGES = {93-96}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85086605454\&origin=inward}, VOLUME = {122}, PUBLISHER = {Firenze University Press (Firenze, Italia)}, ISBN = {978-88-6453-961-4}, CONFERENCE_NAME = {11th international workshop on Models and Analysis of Vocal Emissions for Biomedical Applications}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {17-19/12/2019}, BOOKTITLE = {Models and Analysis of Vocal Emissions for Biomedical Applications}, EDITOR = {Manfredi, C.}, } @INPROCEEDINGS{RORBERI_2019_INPROCEEDINGS_RM_408259, AUTHOR = {Rorberi, S. and Marzi, C.}, TITLE = {Modelling the interaction of regularity and morphological structure: the case of Russian verb inflection}, YEAR = {2019}, ABSTRACT = {Modelling complex inflection systems, such as conjugation in Modern Greek, Italian or Russian, requires careful consideration of a number of factors, ranging from pervasive stem allomorphy to the identification of the appropriate inflection class and the inferential predictability of morpho-phonological processes. Descriptive approaches have taken different views on how to account for degrees of morphological (ir)regularity, while making different predictions about the way speakers process regular and irregular forms in highly-inflecting languages. In the present paper, we assess the psycholinguistic implications of two radically different approaches to the description of the Russian verb system: a more traditional approach dating back to Jakobson (1948), and a Words and Paradigm approach (Brown 1998). Based on recent fMRI evidence (Slioussar et al. 2014) and original results of a neural network simulation with recurrent self-organising maps (Ferro et al. 2011; Marzi et al. 2014; Pirrelli et al. 2015; Marzi et al. 2016), we suggest that both approaches are prima facie compatible with Russian data, while being in contrast with Pinker's claim that the regular-irregular distinction is an epiphenomenon of the storage-processing dichotomy in the human language faculty (Pinker \& Ullman 2002). We argue that this evidence lends support to integrative models of the mental lexicon (Marzi \& Pirrelli 2015), accounting for a graded interaction between regularity and morphological structure.}, KEYWORDS = {Inflectional complexity, Russian verb system, perception of morphological structure, recurrent self-organising neural network}, PAGES = {107-110}, URL = {http://drehu.linguist.univ-paris-diderot.fr/ismo-2019/?fichier=programme}, VOLUME = {2019}, CONFERENCE_NAME = {International Symposium of Morphology (ISMo) 2019}, CONFERENCE_PLACE = {Université de Paris, France}, CONFERENCE_DATE = {25-27/09/2019}, EDITOR = {Crysmann, B. and Villoing, F.}, } @INPROCEEDINGS{ALBANESI_2019_INPROCEEDINGS_AD_414913, AUTHOR = {Albanesi, D. and Del Gratta, R.}, TITLE = {OpeNER and PANACEA: Web Services for the CLARIN Research Infrastructure}, YEAR = {2019}, ABSTRACT = {This paper describes the necessary steps for the integration of OpeNer and PANACEA Web Services within the CLARIN research infrastructure. The original Web Services are wrapped into a framework and re-implemented as REST APIs to be further exploited through both Language Resource Switchboard and WebLicht and made available for the CLARIN community.}, KEYWORDS = {CLARIN-IT, Interoperabilità, Research Infrastructure}, PAGES = {19-23}, URL = {https://office.clarin.eu/v/CE-2019-1512_CLARIN2019_ConferenceProceedings.pdf}, CONFERENCE_NAME = {CLARIN Annual Conference 2019}, CONFERENCE_PLACE = {Leipzig, Germany}, CONFERENCE_DATE = {30/09/2019, 02/10/2019}, } @INPROCEEDINGS{CATERINO_2019_INPROCEEDINGS_CBMDSD_430369, AUTHOR = {Caterino, A. F. and Battaglino, G. and Marini, A. and Di Meglio, A. and Silvi, D. and Del Grosso, A. M.}, TITLE = {Letteratura e filologia come scienze esatte: per un nuovo statuto scientifico dell'umanista informatizzato}, YEAR = {2019}, ABSTRACT = {La leggenda vuole che fosse inciso all'entrata dell'accademia platonica il motto «???????????? ?????? ??????». La geometria è qui intesa come la scienza stessa, quell'esatezza di calcolo indispensabile a cogliere determinate strutture armoniche all'interno dell'arte; strutture che, in fin dei conti, costituiscono la vera e propria ossatura del concetto di bello. Per meglio concepire il bello nella sua struttura è quindi necessario avere una capacità d'analisi matematica, geometrica. Oggi l'informatica sa venire incontro proprio alla necessità dell'umanista di rendere il suo lavoro più specifico e preciso, al fine di ottenere dal proprio lavoro migliori esegesi, migliori ricostruzioni testuali, migliori approcci critici utili a una fruizione più consapevole dello stesso testo letterario. Il panel vorrebbe quindi raccogliere ricerche e testimonianze di chi si occupa e preoccupa di applicare l'informatica agli studi filologici e letterari, con la volontà ultima di superare le barriere che contrappongono in modalità integralista gli studia humanitatis alle cosiddette scienze esatte. Esso nasce come estensione delle ricerche informatico-umanistiche condotte presso l'Università degli Studi del Molise dal gruppo di studi Eterodossie e dissenso nella letteratura italiana, ma vuole ovviamente aprirsi a chiunque creda - in maniera critica e ragionata - alla causa delle digital humanities.}, KEYWORDS = {Digital Humanities}, URL = {https://www.italianisti.it/associazione/congressi-adi/letteratura-e-scienze}, CONFERENCE_NAME = {XXIII Congresso ADI-Associazione degli Italianisti}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {12/09/2019-14/09/2019}, } @INPROCEEDINGS{DELGROSSO_2019_INPROCEEDINGS_D_430160, AUTHOR = {Del Grosso, A. M.}, TITLE = {Multilingual Word-by-word alignment. Methodology and some preliminary outcomes towards the construction of multilingual Lexicon within the "Traduzione del Talmud Babilonese" project}, YEAR = {2019}, ABSTRACT = {Textual scholars have been exploiting for long time multilingual resources in their daily work to better understand the primary sources they inquire. Bitexts are parallel texts which turn out to be useful in a number of cross-linguistic and comparative processing tasks. This talk will show the workflow adopted within the research activities conducted on the Italian translation of the Babylonian Talmud. More specifically, I will illustrate the ongoing work towards the construction of a multilingual Hebrew/Aramaic/Italian terminological resource by means of stochastic generative approaches to word-by-word text alignment. The related literature discusses plenty of techniques concerning this topic. The alignment tool I developed is grounded on generative models (i.e., IBM and HMM models), which are a collection of non-supervised machine learning algorithms, to calculate the probability of linking two words in a multilingual term pair. From a technical standpoint, beside the adopted models, which are based on an alignment function and on an unsupervised training procedure devoted to estimating the unknown probability distributions, other machine learning approaches to word alignment exist that encompass discriminative techniques, which are based on a target function and on a supervised learning process exploiting labeled training data set. The implemented models were widely adopted in the literary domain, as they are able to profitably handle interpretative bitexts modeling also deletion, insertion, transposition phenomena without having an extant labeled data set. The workflow I will present encompasses four distinct phases: 1) The encoding of the parallel text, which has been carried out according to the last TEI recommendations. In particular, the linking-target approach described within the Module 16 of the guidelines was used. 2) The semi-automatic extraction of the Italian terms, which has been carried out by means of linguistic analysis technologies available at the Institute of Computational Linguistics (ILC-CNR). These tools include a stochastic component for terminology extraction. 3) The addition of Hebrew/Aramaic terms to the Italian extracted ones via word-by-word alignment to automatically process the three main ancient languages appearing in the Talmud, namely mishnaic Hebrew, biblical Hebrew and babylonian Aramaic. 4) Finally, the revision of the obtained results through an ad-hoc implemented web-based application. This final step is devoted to build a ground truth and/or a gold training set allowing us to perform a complete validation process of the alignment outcomes. For the time being, 219.000 tokens have been analyzed, extracted from four tractates of the Babylonian Talmud which were translated so far."}, KEYWORDS = {bilingual word alignment, translation}, URL = {http://www.ens-lyon.fr/evenement/recherche/machine-learning-donnees-textuelles-et-recherche-en-sciences-humaines-et}, CONFERENCE_NAME = {Machine learning, données textuelles et recherche en sciences humaines et sociales}, CONFERENCE_PLACE = {ENS de Lyon}, CONFERENCE_DATE = {25/11/2019-26/11/2019}, } @INPROCEEDINGS{DELGROSSO_2019_INPROCEEDINGS_D_430161, AUTHOR = {Del Grosso, A. M.}, TITLE = {Verso la definizione e l'implementazione di una piattaforma orientata allo studio critico del testo}, YEAR = {2019}, ABSTRACT = {Il contributo illustra le caratteristiche della piattaforma in sviluppo presso ILC per lo studio scientifico del testo e in parte impiegata per la realizzazione dell'edizione digitale delle lettere di Bellini.}, KEYWORDS = {BelliniInRete, Digital Scholarly Editing, Digital Philology, Computational Philology}, URL = {https://publications.cnr.it/doc/430161}, CONFERENCE_NAME = {Il progetto BellinInRete digital correspondence. Per un'edizione critica digitale delle lettere di Vincenzo Bellini}, CONFERENCE_PLACE = {Coro di Notte-ex Monastero dei Benedettini-piazza Dante 32, Catania}, CONFERENCE_DATE = {29/10/2019}, } @INPROCEEDINGS{DELGROSSO_2019_INPROCEEDINGS_DCCSS_429947, AUTHOR = {Del Grosso, A. M. and Capizzi, E. and Cristofaro, S. and Seminara, G. and Spampinato, D.}, TITLE = {Promoting Bellini's legacy and the Italian opera by scholarly digital editing his own correspondence}, YEAR = {2019}, ABSTRACT = {This contribution aims at illustrating the ongoing work towards the digital scholarly editing, long-term preservation, web publishing and computational exploiting of 41 letters, written by the renowned composer Vincenzo Bellini. The correspondence is kept at the Belliniano Civic Museum of Catania and is being encoded in XML according to the last TEI guidelines. The edition will be made accessible both via web - exploiting the Edition Visualization Technology (EVT) - as well as integrated into an interactive and multimedia tour within the museum. The digital edition is based on the recently published transcriptions made by Seminara. The encoding scheme has been defined according to the edition requirements, the TEI best practices and the Music Encoding Initiative (MEI) guidelines - where the musical context must be specified. Our initiative has some elements of innovation that distinguish it from similar projects, such as the Van Gogh letter project or the DALF project. For instance, we encode the circumstance that the letters themselves have also the purpose of acting as envelopes. In fact, they are folded on themselves and postmarks and wax seals are sometimes affixed on them. The edition takes care of handling the correspondence metadata by means of the correspDesc TEI tagset, thus providing the opportunity to exploit the correspSearch API. This approach has allowed us to enrich the encoding of the document both in its logical and physical structure and in indexing letters by sender, recipient, date, and places. The museum context and the educational purposes have even led us to the definitions of some lists of named entities. Within these resources we have adopted the Semantic Web and LOD paradigm by encoding external references to authoritative repositories such as RISM and DBpedia. Finally, we implemented some useful EVT extensions to automatically handle hotspots and to show critical notes that accompany the text.}, KEYWORDS = {TEI, DSE, Vincenzo Bellini, Digital Correspondence}, URL = {https://gams.uni-graz.at/o:tei2019.118}, DOI = {10.5281/zenodo.3461673}, CONFERENCE_NAME = {What is text, really? TEI and beyond (TEI 2019)}, CONFERENCE_PLACE = {University of Graz, Austria}, CONFERENCE_DATE = {16/09/2019-20/09/2019}, BOOKTITLE = {What is text, really? TEI and beyond}, EDITOR = {Vogeler, G.}, } @INPROCEEDINGS{DELGROSSO_2019_INPROCEEDINGS_DP_430371, AUTHOR = {Del Grosso, A. M. and Piccini, S.}, TITLE = {Fixing the Movements of Thought in Text and Terminology: the Effectiveness of Scholarly Digital Tools on Saussure's Writings}, YEAR = {2019}, ABSTRACT = {The aim of this contribution is to present and discuss some recent activities, carried out by the Institute of Computational Linguistics (CNR - Pisa), devoted to representing in an explicit and formal way the diachronic evolution of concepts and terms occurring in Saussure's manuscripts. In the first part of the presentation, we will briefly outline some theoretical aspects concerning the diachronic standpoint in terminology and the "best practices" to be followed when modelling (diachronic) terminological resources. Then, some examples of terminological evolution will be provided, taken from the electronic lexicon Simple_FdS, built within the PRIN project "Per un'edizione digitale dei manoscritti di Ferdinand de Saussure" (2008-2011), based on the Generative Lexicon theory elaborated by Pustejovsky (1995). Terminological data will be shown in LexO, a Web-based and collaborative web editor, which allows for building lexical and terminoontological resources, compliant with the Semantic Web technologies (RDF and OWL). In the second part of the talk, starting from the outcomes of the aforementioned project, we will present the scholarly digital platform aimed at describing and analysing text resources. The encoding approach follows the current de facto standard in representing textual resources, namely the Text Encoding Initiative (TEI) guidelines. In particular, we adopted the tag sets defined in the Module for Transcription of Primary Resources. An example will be illustrated on how describe and visualize the process of writing in Saussure's manuscripts. The example was implemented exploiting the Omega framework as well as the Edition Visualization Technology (EVT). In such a way, scholars have at their disposal a powerful tool for searching additions, deletions, substitutions, retracings, different hands, transpositions, marginal and interlinear notes, and all the other textual phenomena significant to understanding the "Movements of thought" which emerge in the handwritten pages.}, KEYWORDS = {Saussure, Digital Humanities, Digital Scholarly Editing, Digital Philology, Computational Philology}, URL = {https://publications.cnr.it/doc/430371}, CONFERENCE_NAME = {Philosophy of Language and Digital Humanities}, CONFERENCE_PLACE = {Rende (CS)}, CONFERENCE_DATE = {07/05/2019-09/05/2019}, } @INPROCEEDINGS{DELTURCO_2019_INPROCEEDINGS_DMDCDZ_430159, AUTHOR = {Del Turco, R. R. and Martignano, C. and Di Pietro, C. and Cacioli, G. and Del Grosso, A. M. and Zenzaro, S.}, TITLE = {DSE Visualisation with EVT: Simplicity is Complex}, YEAR = {2019}, ABSTRACT = {Edition Visualization Technology (EVT) is an open source tool to produce digital scholarly editions on the basis of TEI XML-encoded documents. Born to serve the goals of a single project, the Digital Vercelli Book, it has been developed in such a way as to become a general purpose tool. Several DSE projects are using it to publish digital editions, in fact many researchers have found in EVT the perfect tool for their needs: it is easy to configure and deploy, it is fully customizable, it includes several useful research tools out of the box.}, KEYWORDS = {Digital Philology, Digital Scholarly Editing}, URL = {https://doi.org/10.34894/B6T1YD}, DOI = {10.34894/B6T1YD}, CONFERENCE_NAME = {Compexities}, CONFERENCE_PLACE = {Utrecht}, CONFERENCE_DATE = {09/07/2019-12/07/2019}, BOOKTITLE = {Complexities}, EDITOR = {Pierazzo, E. and Ciotti, F.}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_M_429318, AUTHOR = {Monachini, M.}, TITLE = {Ricerche di alta qualità negli Studi umanistici: l'infrastruttura CLARIN-IT}, YEAR = {2019}, ABSTRACT = {Nella lezione a invito presso la Scuola di Dottorato di Ateneo al Corso di UMANESIMO E TECNOLOGIE vengono descritti, il ruolo, i vantaggi e le opportunità offerte dalla infrastruttura di ricerca CLARIN. Una platea di giovani in formazione (che costituiranno i ricercatori del futuro) potrà approfondire la conoscenza degli strumenti della infrastruttura che consentono di coniugare studi umanistiche ed approcci tecnologici, al fine di compiere ricerche di alta qualità.}, KEYWORDS = {CLARIN, studi umanistici, tecnologie linguistiche}, URL = {https://publications.cnr.it/doc/429318}, CONFERENCE_NAME = {Scuola di Dottorato di Ateneo Corso di UMANESIMO E TECNOLOGIE}, CONFERENCE_DATE = {13/11/2019}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_M_429336, AUTHOR = {Monachini, M.}, TITLE = {Success stories of collaboration in Social Sciences and Humanities (between Italy and Slovenia)}, YEAR = {2019}, ABSTRACT = {The collaboration Italy-Slovenia in the sector of SSH revolves, since many years, around Digital methods for language, in particular, language resources and standards for language data; with the development of e-technology and explosion of data, the support to language studies goes through RI; another hot topic linked to the digital era is "word meaning" which involves a new type of lexicography; the even greater challenge, the concept of Open Science, sees the two countries together in in the new big cluster project SSHOC, which aims to implement the EOSC vision and build the Open Cloud for the SSH sector.}, KEYWORDS = {collaboration Italy Slovenija, Social Sciences and Humanities}, URL = {https://publications.cnr.it/doc/429336}, CONFERENCE_NAME = {RESEARCH DAY ITALY-SLOVENIA Bilateral meeting Italy Slovenia on the role of research in the society}, CONFERENCE_PLACE = {University of Nova Gorica, Vipava, Glavni trg 8}, CONFERENCE_DATE = {16/4/2019}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_M_429355, AUTHOR = {Monachini, M.}, TITLE = {CLARIN-IT nella prospettiva delle Digital Humanities}, YEAR = {2019}, ABSTRACT = {Fornire una panoramica relativa alla infrastruttura europea CLARIN e la sua emanazione italiana CLARIN-IT rispondere ai quesiti relativi alla sua missione e ai suoi obiettivi e fare il punto sui vantaggi per la comunità a cui è diretto, ricercatori del settore delle scienze umane e sociali}, KEYWORDS = {digital public humanities, infrastrutture di ricerca}, URL = {https://publications.cnr.it/doc/429355}, CONFERENCE_NAME = {Seminars in Digital Public Humanities}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {23 ottobre 2019}, } @INPROCEEDINGS{MONACHINI_2019_INPROCEEDINGS_MSC_429370, AUTHOR = {Monachini, M. and Stamuli, M. F. and Calamai, S.}, TITLE = {Folk in Tuscany: the Caterina Bueno sound archive}, YEAR = {2019}, ABSTRACT = {Caterina Bueno's sound archive is composed of 476 carriers (audio reels and compact cassettes), corresponding to nearly 714 hours of recording and was digitised during the PAR-FAS project Gra.fo (Grammo-foni. Le soffitte della voce, UNISI \& SNS, http://sns.grafo.it). It was located at two different owners': part of it was stored at Caterina's heirs' house, while the rest was kept by the former culture counsellor of the Municipality of San Marcello Pistoiese, in the Montagna Pistoiese, where a multi-media library was supposed to be set up. Unfortunately, disagreements and misunderstandings between the two parties have so far made the archive fragmented and inaccessible to the community. Both owners, independently, have turned to Silvia Calamai for the reassembly of the whole archive in the digital domain, in respect of the artist's wishes. After digitising, the carriers were returned to their owners, who helped in finding an arrangement for the sound archive, which can be divided according to the following categories: field-research (investigations carried out in the Tuscan countryside from the late 50s to the end of the artist's life); live performances (recordings of concerts and events); performances' rehearsals (recordings of rehearsals with musicians). In 2019 Regione Toscana decided to support the project of cataloguing and disseminating Caterina Bueno Archive and the following partners were involved: Università degli Studi di Siena (Silvia Calamai), Soprintendenza Archivistica e Bibliografica della Toscana (Maria Francesca Stamuli), CLARIN-IT (Monica Monachini), and Unione dei comuni del Casentino (Pierangelo Bonazzoli). Archivio Vi.vo will thus constitute a pilot study within CLARIN-IT to experiment methods and offer services to disciplines interested in oral sources. The ILC4CLARIN Italian node offers archiving preservation access and tools for linguistic data of a written type; within Archivio Vi.vo. the repository will be improved through experimental approach to conservation, management and access to audio and audio-video data and metadata. Archivio Vi.Vo. will develop a model which can be replicated on other audio-visual archives, even outside the context of Tuscany. The experimental activity will aim to adopt the model and high-performance computing and archiving services of the new GARR network infrastructure, built along the Cloud paradigm. This model will be disseminated both to the scientific community interested in accessing these data, and to the general public who enjoy ethnomusical materials produced in the territory.}, KEYWORDS = {long-term preservation, oral archives, infrastructures, conservation, access, metadata}, URL = {https://www.clarin.eu/sites/default/files/clarin2019_bazaar_calamai-stmuli-monachini.pdf}, CONFERENCE_NAME = {CLARIN 2019 Annual Conference}, CONFERENCE_PLACE = {Leipzig}, CONFERENCE_DATE = {30/09/2019-2/10/2019}, BOOKTITLE = {CLARIN Annual Conference 2019 Abstracts}, } @INPROCEEDINGS{PARDELLI_2019_INPROCEEDINGS_PGB_398956, AUTHOR = {Pardelli, G. and Goggi, S. and Boschetti, F.}, TITLE = {Strolling around the dawn of Digital Humanities}, YEAR = {2019}, ABSTRACT = {Nelle ricerche umanistiche l'impiego dell'elaboratore elettronico prende il via nella seconda metà del ventesimo secolo favorendo l'uso di metodi statistici sia nello studio di opere letterarie che nello studio delle lingue, promuovendo un sodalizio interdisciplinare che è arrivato ai giorni nostri senza interruzione. In questo contributo tentiamo di fissare alcuni momenti salienti del processo che ha visto la nascita comune della Linguistica Computazionale e delle Digital Humanities nonché i loro alterni allontanamenti e ricongiungimenti.}, KEYWORDS = {Digital Humanities (DH), Computational Linguistics (CL), History}, PAGES = {261-264}, URL = {http://aiucd2019.uniud.it/book-of-abstracts/}, CONFERENCE_NAME = {8th Annual Conference AIUCD 2019. Teaching and research in Digital Humanities' era}, CONFERENCE_PLACE = {Udine, Dipartimento Di Studi Umanistici e Patrimonio Culturale, Università di Udine}, CONFERENCE_DATE = {23-25 gennaio 2019}, } @INPROCEEDINGS{PICCINI_2019_INPROCEEDINGS_PSAMBAGE_427271, AUTHOR = {Piccini and , S. and Abrate and , M. and Bellandi and , A. and Giovannetti and , E.}, TITLE = {Rappresentazione e costruzione di risorse terminologiche diacroniche nell'era del web semantico}, YEAR = {2019}, ABSTRACT = {Con il presente contributo proponiamo un modello ed uno strumento volti a rappresentare formalmente, interrogare e visualizzare l'evoluzione diacronica di concetti e termini in un dato dominio, nel quadro del web semantico. Quest'ultimo sta attirando sempre più l'attenzione di lessicografi e terminologi computazionali, in quanto garantisce interoperabilità, facile accesso e riuso delle risorse lessicali/terminologiche all'interno di una comunità scientifica.}, KEYWORDS = {terminologia, terminologia diacronica, web semantico, lemon, lexO}, URL = {http://www.assiterm91.it/wp-content/uploads/2020/03/Piccini.pdf}, CONFERENCE_NAME = {XXIX Convegno Ass. I. Term}, CONFERENCE_PLACE = {Accademia della Crusca, Villa Medicea del Castello, Firenze}, CONFERENCE_DATE = {30-31/05/2019}, } @INPROCEEDINGS{PIRRELLI_2019_INPROCEEDINGS_P_424205, AUTHOR = {Pirrelli, V.}, TITLE = {Investigating inflection as a complex system}, YEAR = {2019}, ABSTRACT = {From a cross-linguistic perspective, different inflection systems appear to apportion word processing costs differently, depending on when and where, in the full form, morpho-lexical and morpho-syntactic information is encoded. The resulting balance is the outcome of an interaction between form frequency and morphological productivity, responding to basic communicative requirements. Big families of stem-sharing inflected forms constitute the productive core of an inflection system. This core is easy to learn, as it requires memorization of one stem only, with all inflected forms being redundantly built upon it. Unsurprisingly, generalizable paradigms are less sensitive to token frequency effects, and tend to be located in the long, low-frequency tail of the Zipfian distribution of word forms. In contrast, the head of the Zipfian distribution mostly contains small families of alternating and possibly suppletive stems, which, however shorter, morpho-phonologically simpler and easier to process, require high token frequency to be learned and resist pressure towards regularization.}, KEYWORDS = {Morphological paradigms, Mental Lexicon, Inflectional morphology}, PAGES = {23-24}, URL = {https://publications.cnr.it/doc/424205}, CONFERENCE_NAME = {International Symposium of Morphology (ISMo) 2019}, CONFERENCE_PLACE = {Université de Paris, Paris}, CONFERENCE_DATE = {25/9(2019, 27/9/2019}, } @INPROCEEDINGS{SALVATORI_2019_INPROCEEDINGS_SBD_400259, AUTHOR = {Salvatori, E. and Boschetti, F. and Del Grosso, A. M.}, TITLE = {From collaborative transcription to interdisciplinary education: the postcards of the Great War case}, YEAR = {2019}, KEYWORDS = {Digital Public History, Collaborative Philology, Text Encoding, Digital Philology, Web Application, Educational, Digital Textual Scholarship}, PAGES = {211-215}, URL = {http://amsacta.unibo.it/6361/}, DOI = {10.6092/unibo/amsacta/6361}, ISBN = {978-88-942535-3-5}, CONFERENCE_NAME = {Didattica e ricerca al tempo delle Digital Humanities / Teaching and research in Digital Humanities' era}, CONFERENCE_PLACE = {Udine}, CONFERENCE_DATE = {23-25/01/2019}, BOOKTITLE = {Didattica e ricerca al tempo delle Digital Humanities / Teaching and research in Digital Humanities' era. Ottavo Convegno Annuale 8th Annual Conference AIUCD 2019 (Udine, 23-25 gennaio 2019) Book of Abstracts}, EDITOR = {Allegrezza, S.}, } @TECHREPORT{CARDILLO_2019_TECHREPORT_CS_403463, AUTHOR = {Cardillo, F. A. and Straccia, U.}, TITLE = {Towards Ontology-based Explainable Classification of Rare Events}, YEAR = {2019}, ABSTRACT = {Rare events (e.g. major floods, violent conflicts) are events that have potentially widespread and/or disastrous impact on society. The overall goal is to build a framework capable to classify, predict and explain such rare events. To do so, we envisage the usage of a mixture of sub-symbolic Machine Learning (ML) and Ontology-based Statistical Relatio-nal Learning (OSRL) techniques to generate rare events classifiers and predictors, which additionally may be mapped into natural language to ease human interpretability of the decision process.}, KEYWORDS = {Ontologies Explainable Classification of Rare Events, Statistical Relational Machine Learning}, PAGES = {1-2}, URL = {https://hal.archives-ouvertes.fr/hal-02104520}, } @MISC{BOSCHETTI_2019_MISC_BD_430372, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {Digital Philology}, YEAR = {2019}, ABSTRACT = {Session outline: 1) Introduction 1a) Computational Linguistics and Digital Philology 1b) Collaboration vs Cooperation 1c) Defining Data Types and APIs for Scholarly Editing 1d) The Hermeneutical circle 2) Digital Ecdotics 2a) Representation of Textual Phenomena by TEI-XML 2b) Representation of Textual Phenomena by Domain-Specific Languages 2c) Visual Presentation of Encoded Data 2d) Stemma Codicum and Alignment of Variants 2e) Querying Encoded Data 3) Digital Hermeneutics 3a) Linguistic and Stylistic Analyses 3b) Thematic Analysis 3c) Interdisciplinary Approaches to Philological Issues 3d) Semantic Querying 4) Conclusion 4a) Putting All Together 4b) Further Perspectives Seminar readings - Boschetti, Federico, e Angelo Mario Del Grosso. 2015. «TeiCoPhiLib: A Library of Components for the Domain of Collaborative Philology». Journal of the Text Encoding Initiative, n. 8. https://doi.org/10.4000/jtei.1285 - Burnard, Lou. 2014. WHAT IS THE TEXT ENCODING INITIATIVE?. OpenEdition Press. http://books.openedition.org/oep/426 - Schmidt, Desmond. 2010. «The inadequacy of embedded markup for cultural heritage texts». Literary and Linguistic Computing 25 (3): 337-56. https://doi.org/10.1093/llc/fqq007 Further reading - Berti, Monica, Bridget Almas, David Dubin, Greta Franzini, Simona Stoyanova, e Gregory Ralph Crane. 2014. «The Linked Fragment: TEI and the Encoding of Text Reuses of Lost Authors». JTEI 8. https://doi.org/10.4000/jtei.1218 - Bozzi, Andrea. 2014. «Computer-assisted Scholarly Editing of Manuscript Sources». In New publication cultures in the humanities: exploring the paradigm shift, P. Davidhazi (ed.), 99-115. Amsterdam: Amsterdam University Press. http://www.oapen.org/record/515678 - Driscoll, Matthew James, e Elena Pierazzo, (eds) 2016. Digital Scholarly Editing: Theories and Practices. Vol. 4. Digital Humanities Series. Open Book Publishers. Chapters 2-4. http://www.openbookpublishers.com/product/483/digital-scholarly-editing-theories-and-practices/eec262cdd3121ebd5eb2bf78581594f2}, KEYWORDS = {digital philology, digital humanities}, URL = {https://github.com/SunoikisisDC/SunoikisisDC-2018-2019/wiki/Summer2019-Session2}, } @MISC{CARDAMONE_2019_MISC_CD_430891, AUTHOR = {Cardamone, R. D. and Del Grosso, A. M.}, TITLE = {L'edizione digitale: una risorsa per tutti}, YEAR = {2019}, ABSTRACT = {Presentazione dell'edizione digitale degli statuti quattrocenteschi di Monterosso al Mare completa di immagini, trascrizione e traduzione.}, KEYWORDS = {digital humanities, digital scholarly edition}, URL = {https://www.cfs.unipi.it/2019/11/27/gli-statuti-quattrocenteschi-di-monterosso-restituiti-alla-comunita/}, } @MISC{DELGROSSO_2019_MISC_D_430821, AUTHOR = {Del Grosso, A. M.}, TITLE = {Introduzione sistema git per edizioni collaborative}, YEAR = {2019}, ABSTRACT = {Il sistema più diffuso per il controllo di versione per risorse elettroniche (VCS - Version Control System) e' oggi "git", un sistema open source ad architettura distribuita tra i più utilizzati per lo sviluppo di grandi progetti collaborativi, come ad esempio il kernel di Linux. La prima parte del seminario introdurrà il modello generale e i principi di progettazione che sottendono l'ambiente di versionamento, come ad esempio lo stato delle risorse, il workflow di lavoro, l'ambiente in locale e quello in remoto. Verranno anche mostrati i comandi più importanti per un efficace utilizzo dello strumento, come la creazione di un repository, fare commit degli aggiornamenti e salvare le modifiche su un host remoto. L'obiettivo è quello di fornire ai partecipanti una prima panoramica sul funzionamento del sistema git facendo ampio uso dell'interfaccia a riga di comando da terminale. La seconda parte del seminario introdurrà la piattaforma github, uno dei più comuni host per la gestione remota e collaborativa di repository git. Sarà mostrata quindi sia la procedura per creare un account sulla piattaforma sia le modalità di gestione per un semplice progetto collaborativo di codifica di testi.}, KEYWORDS = {git, github, summer school, digital humanities}, URL = {http://digitaltools.labcd.unipi.it/past-editions/program2019/}, } @MISC{DELGROSSO_2019_MISC_D_430828, AUTHOR = {Del Grosso, A. M.}, TITLE = {AliEval-Revisione allineamento di testi paralleli multilingua}, YEAR = {2019}, ABSTRACT = {Sistema per la revisione di testi paralleli allineati parola per parola}, KEYWORDS = {bitext alignment, digital humaniteis, talmud}, URL = {http://omega.ilc.cnr.it/dev/proofreader/}, } @MISC{DELGROSSO_2019_MISC_D_430903, AUTHOR = {Del Grosso, A. M.}, TITLE = {Visualizzatore immagini con tiling per software evt2js}, YEAR = {2019}, ABSTRACT = {Sviluppo componente web per la visualizzazione ottimizzata delle immagini ad alta risoluzione in seno al progetto di visualizzazione di edizioni digitali scientifiche EVT. Nello specifico il visualizzatore è stato implementato per l'edizione digitale della vita di San Teobaldo. La fonte originale è conservata presso la Diocesi di Alba.}, KEYWORDS = {digital scholarly edition, EVT, viewer}, URL = {https://www.visitmudi.it/EVT/}, } @MISC{DELGROSSO_2019_MISC_DP_430373, AUTHOR = {Del Grosso, A. M. and Piccini, S.}, TITLE = {Approcci digitali e computazionali allo studio dei documenti manoscritti della tarda latinità: il caso Clavius}, YEAR = {2019}, ABSTRACT = {Introduzione alle digital humanities e alla lessicografia digitale con esempi e riferimenti tratti dal progetto Clavius on the Web.}, KEYWORDS = {digital humanities, digital philology, digital lexicography}, URL = {https://www.dipartimentidieccellenza-dilef.unifi.it/vp-122-calendario-attivita-didattica-marzo-2019.html}, } @MISC{DELGROSSO_2019_MISC_DS_445759, AUTHOR = {Del Grosso, A. M. and Spampinato, D.}, TITLE = {Edizione digitale delle Lettere di Bellini}, YEAR = {2019}, ABSTRACT = {La codifica dell'edizione scientifica digitale è condotta seguendo le ultime linee guida della Text Encoding Initiative e istanziata su un campione rappresentativo di missive. L'edizione è accessibile via web con il software open source: Edition Visualization Technology; ma è anche stata progettata per essere integrata nel percorso museale interattivo e multimediale in allestimento, senza rinunciare al rigore scientifico della trascrizione delle lettere presente nella recente edizione critica.}, KEYWORDS = {Digital Scholarly Edition, TEI, Vincenzo Bellini, Digital Correspondence}, URL = {http://licodemo.ilc.cnr.it/bellini-in-rete}, } @ARTICLE{CARDILLO_2018_ARTICLE_CFMP_396348, AUTHOR = {Cardillo, F. A. and Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Deep Learning of Inflection and the Cell-Filling Problem}, YEAR = {2018}, ABSTRACT = {Machine learning offers two basic strategies for morphology induction: lexical segmentation and surface word relation. The first approach assumes that words can be segmented into morphemes. Inferring a novel inflected form requires identification of morphemic constituents and a strategy for their recombination. The second approach dispenses with segmentation: lexical representations form part of a network of associatively related inflected forms. Production of a novel form consists in filling in one empty node in the network. Here, we present the results of a task of word inflection by a recurrent LSTM network that learns to fill in paradigm cells of incomplete verb paradigms. Although the task does not require morpheme segmentation, we show that accuracy in carrying out the inflection task is a function of the model's sensitivity to paradigm distribution and morphological structure.}, KEYWORDS = {Deep Learning, LSTM, Cell-Filling Problem}, PAGES = {57-75}, URL = {https://publications.cnr.it/doc/396348}, VOLUME = {4}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{CHIARELLA_2018_ARTICLE_CBBCRZMC_393262, AUTHOR = {Chiarella, D. and Bibuli, M. and Bruzzone, G. and Caccia, M. and Ranieri, A. and Zereik, E. and Marconi, L. and Cutugno, P.}, TITLE = {A Novel Gesture-Based Language for Underwater Human-Robot Interaction}, YEAR = {2018}, ABSTRACT = {The underwater environment is characterized by hazardous conditions that make it difficult to manage and monitor even the simplest human operation. The introduction of a robot companion with the task of supporting and monitoring the divers during their activities and operations underwater can help to solve some of the problems that usually arise in this scenario. In this context, a proper communication between the diver and the robot is imperative for the success of the dive. However, the underwater environment poses a set of technical challenges which are not readily surmountable thus limiting the spectrum from which possibilities can be chosen. This paper presents the design and development of a gesture-based communication language which has been employed for the entire duration of the European project CADDY (Cognitive Autonomous Diving Buddy). This language, the Caddian, was built upon consolidated and standardized underwater gestures that are commonly used in recreational and professional diving. Its use and integration during field tests with a remotely operated underwater vehicle (ROV) is also shown.}, KEYWORDS = {marine robotics, underwater human-robot interaction, gesture-based language, field trials}, PAGES = {19}, URL = {https://www.mdpi.com/2077-1312/6/3/91}, VOLUME = {6}, DOI = {10.3390/jmse6030091}, PUBLISHER = {Molecular Diversity Preservation International (Basel)}, ISSN = {2077-1312}, JOURNAL = {Journal of marine science and engineering}, } @ARTICLE{DISEGNI_2018_ARTICLE_D_395302, AUTHOR = {Di Segni, D. G.}, TITLE = {Il Talmud nella nuova traduzione italiana}, YEAR = {2018}, ABSTRACT = {Origine, struttura e caratteristiche del Talmud. I roghi e la censura del Talmud. La nuova traduzione italiana del Talmud. La rilevanza del Talmud nel mondo contemporaneo.}, KEYWORDS = {Talmud, Istituto Linguistica Computazionale CNR, Traduco}, PAGES = {633-644}, URL = {https://publications.cnr.it/doc/395302}, VOLUME = {XV}, PUBLISHER = {Il Mulino (Bologna, Italia)}, ISSN = {1824-0771}, JOURNAL = {Nuova informazione bibliografica}, } @ARTICLE{FERRO_2018_ARTICLE_FMP_397012, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Discriminative word learning is sensitive to inflectional entropy}, YEAR = {2018}, ABSTRACT = {Psycholinguistic evidence based on inflectional and derivational word families has emphasised the combined role of Paradigm Entropy and Inflectional Entropy in human word processing. Although the way frequency distributions affect behavioural evidence is clear in broad outline, we still miss a clear algorithmic model of how such a complex interaction takes place and why. The main challenge is to understand how the local interaction of learning and processing principles in morphology can result in global effects that require knowledge of the overall distribution of stems and affixes in word families. We show that principles of discriminative learning can shed light on this issue. We simulate learning of verb inflection with a discriminative recurrent network of specialised processing units, whose level of temporal connectivity reflects the frequency distribution of input symbols in context. We analyse the temporal dynamic with which connection weights are adjusted during discriminative learning, to show that self-organised connections are optimally functional to word processing when the distribution of inflected forms in a paradigm (Paradigm Entropy) and the distribution of their inflectional affixes across paradigms (Inflectional Entropy) diverge minimally.}, KEYWORDS = {discriminative learning, word processing, recurrent neural networks, relative entropy}, PAGES = {307-327}, URL = {https://www.rivisteweb.it/doi/10.1418/91871}, VOLUME = {XVII}, DOI = {10.1418/91871}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{GOGGI_2018_ARTICLE_GPRBM_388612, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2018}, ABSTRACT = {"In the electronic age, the World Wide Web has played a major role in making scientific information accessible to a wide audience more rapidly and efficiently. This democratic approach to information dissemination in science is changing the way science is perceived and implemented in our daily lives" (Weintraub, 2000).}, KEYWORDS = {CLARIN-IT, CLARIN-European Research Infrastructure for Language Resources and Technology, Grey Literature}, PAGES = {87-93}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85048643343\&origin=inward}, VOLUME = {14}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{PECCHIOLI_2018_ARTICLE_PABGM_397525, AUTHOR = {Pecchioli, A. and Albanesi, D. and Bellandi, A. and Giovannetti, E. and Marchi, S.}, TITLE = {Annotazione Linguistica Automatica dell'Ebraico Mishnaico: Esperimenti sul Talmud Babilonese}, YEAR = {2018}, ABSTRACT = {The automatic linguistic analysis of ancient Hebrew represents a new research opportunity in the field of Jewish studies. In fact, very little has been produced, both in terms of linguistic resources and, above all, of tools for the analysis of ancient Hebrew. This article illustrates a work born within the Italian Translation of the Babylonian Talmud Project aimed at the construction of an automatic linguistic annotator of Mishnaic Hebrew.}, KEYWORDS = {Babylonian Talmud, Natural Language Processing, Mishnaic Hebrew}, PAGES = {281-291}, URL = {http://aisg.cise.unipi.it/Materia-giudaica-2018/018-Pecchioli%20pp%20281-292B.pdf}, VOLUME = {XXIII}, PUBLISHER = {Giuntina (Firenze, Italia)}, ISSN = {2282-4499}, JOURNAL = {Materia giudaica Print}, } @BOOK{GARCIAMACHO_2018_BOOK_GS_389832, AUTHOR = {Garcia Macho, M. L. and Sassi, M.}, TITLE = {Léxico del Tratado del esphera y del arte de marear con el regimiento de las alturas, con algunas reglas nuevamente escritas muy necessarias de Francisco de Falero}, YEAR = {2018}, ABSTRACT = {El léxico del Tratado del esphera y del Arte del marear de Francisco Faleiro, forma parte del conjunto lexicográfico del Diccionario de la navegación del Siglo de Oro. Para la realización de este diccionario, se ha contado con dos proyectos de investigación: HUM2006, financiado por el Ministerio de Educación y Ciencia de España, y FFI2012-36768, del Ministerio de Economía y Competitividad y cuatro ayudas de movilidad: dos concedidas por el Consiglio Nazionale della Ricerca italiano, CNR [Istituto di Linguistica Computazionale de Italia (2006 y 2007)] y dos por el Ministerio de Ciencia e Innovación de España [Programa de Estancias de Profesores de Universidad e Investigadores del CSIC en Centros de Investigación Extranjeros (2005 y 2010)]. Este volumen contiene la concordancia lematizada, los índices de frecuencia de lemas y formas, los índices de los nombres propios y el diccionario inverso del Tratado.}, KEYWORDS = {Indici vari, Dizionario della Navigazione, Siglo de Oro, Concordanze per lemma}, PAGES = {1-488}, URL = {http://portal.uned.es/portal/page?_pageid=93,62295002\&_dad=portal\&_schema=PORTAL}, ISBN = {978-84-362-7383-0}, } @INCOLLECTION{AGNOLONI_2018_INCOLLECTION_AV_423867, AUTHOR = {Agnoloni, T. and Venturi, G.}, TITLE = {Semantic processing of legal texts}, YEAR = {2018}, ABSTRACT = {The paper provides an overview of the field of semantic processing of legal texts, combining views and perspectives from the computational linguistic and Artificial Intelligence and Law (AI \& Law) communities. The last few years have seen a growing body of research and practice in the field of AI \& Law which addresses a range of topics: semantic and cross-language legal Information Retrieval, document classification, legal drafting, legal knowledge extraction, automated legal argumentation, as well as the construction of legal ontologies and their application. The increasing availability of legal corpora accessible as processable data is making viable their partially automated conversion into legal knowledge bases. In this context, it is of paramount importance the use of Natural Language Processing (NLP) techniques and tools that automate the process of knowledge extraction from legal texts. Accordingly, the paper aims at discussing how the two research communities can benefit from the interaction of the different perspectives: the legal artificial intelligence community can gain insight into state-of-the-art linguistic technologies, tools and resources, and the computational linguists can take advantage of the large and often multilingual legal resources (corpora as well as lexicons and ontologies) for training, domain adaptation and evaluation of current NLP technologies and tools. The authors will present an overview on semantic resources for legal texts annotation and processing. Different kind of resources (linguistic, lexical, conceptual, formal) will be introduced and their differences, methodological premises, intended use and possible integration will be highlighted. The peculiarities of the legal domain and legal language will be discussed in relation with the construction and use of legal semantic resources. The issue of multilingualism, multilingual and multi-legal system access to legal information will be also discussed showing how formalized lexical, linguistic and conceptual legal resources can support the task. How NLP tools and techniques can be fruitfully exploited to semantically process collections of legal texts will be introduced in the second part of the paper. In particular, the authors will show how they can be used to automatically extract the relevant knowledge contained in legal text corpora, to structure the extracted knowledge in semantic resources (such as domain-specific ontologies or thesauri), and to semantically annotate the texts with the extracted information to pave the way to content-based access and querying.}, KEYWORDS = {Semantic Processing, Natural Language Processing, Ontology Learning, Legal Texts}, PAGES = {109-137}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85061292435\&origin=inward}, DOI = {10.1515/9781614514664-006}, PUBLISHER = {Walter De Gruyter Inc (Boston/Berlin/Munich, USA)}, ISBN = {978-1-61451-669-9}, } @INCOLLECTION{MARZI_2018_INCOLLECTION_M_390949, AUTHOR = {Marzi, C.}, TITLE = {Morpho - phonotactic typicality and second language acquisition and processing}, YEAR = {2018}, ABSTRACT = {According to many accounts of word processing and access, an input word concurrently activates non-target lexical neighbours that become available for further processing stages. Psycholinguistic evidence shows how prediction and competition based on word similarity and lexical redundancy affect speakers' anticipation of incoming stimuli, so as to speed input recognition and improve lexical decision (Luce/Pisoni 1998; Bailey/Hahn 2001; Hahn/Bailey 2005, among others). As observed by Bailey and Hahn (2001), wordlikeness affects both language acquisition and processing. Wordlikeness can be defined in terms of phonotactic/ orthotactic likelihood and lexical density. Both neighbourhood size and frequency distribution of neighbours are known to play a role in word prediction and competition. In this perspective, monitoring this competing behaviour can shed some light on the relationship between phonotactic/orthotactic likelihood and lexical density, and their connection with issues of word recognition and production. My goal in this chapter is to provide a computational model of bilingual lexical self-organisation, with language-independent architectural and functional requirements of the lexical store, together with language-specific phonotactic constraints, appearing to control aspects of interaction of first and second language (hereafter L1-L2) and define the propensity to acquire novel words, showing how acquisitional strategies are affected by past knowledge of language and entrenched expectations on incoming stimuli. On the one hand, a strong expectation based on L1 affects the way L2 inputs are perceived. On the other hand, language-independent architectural and functional requirements of the lexical store, such as its highly integrated organisation and language-non-selective access (Dijkstra/van Heuven 2002), appear to control aspects of L1-L2 interaction. Simulations in the neuro-computational framework of Temporal Self-Organising Maps (TSOMs, Ferro et al. 2011; Marzi et al. 2012, 2014a, 2016; Pirrelli et al. 2014, 2015), where word processing and lexical acquisition are implemented as recoding and storage strategies for time-series of symbolic units, will highlight how partially overlapping phonological representations may cause competition in incremental learning, and how weaker connections and recycled memory resources make L2 representations underspecified due to the lack of strong lexical expectations and selective specialisation typical of the L1 representations.}, KEYWORDS = {L1-L2 acquisition, bilingual lexical self-organisation, phonotactic typicality, discriminative recurrent network}, PAGES = {219-232}, URL = {https://www.francoangeli.it/Ricerca/Scheda_Libro.aspx?ID=25216\&Tipo=Libro\&strRicercaTesto=25216\&lingua=it\&titolo=tipologia%2c+acquisizione%2c+grammaticalizzazione.+typology%2c++acquisition%2c+grammaticalization+studies}, VOLUME = {1095. 79}, PUBLISHER = {Franco Angeli (Milano, ITA)}, ISBN = {978-88-917-7847-5}, BOOKTITLE = {Tipologia, Acquisizione, Grammaticalizzazione-Typology, Acquisition, Grammaticalization studies}, EDITOR = {Chini, M. and Cuzzolin, P.}, } @INCOLLECTION{MONACHINI_2018_INCOLLECTION_MNS_387374, AUTHOR = {Monachini, M. and Nicolosi, A. and Stefanini, A.}, TITLE = {Digital Classics and CLARIN-IT: What Italian Scholars of Ancient Greek Expect from Digital Resources and Technology}, YEAR = {2018}, ABSTRACT = {This paper presents and discusses the findings of a survey carried out to assess the use of digital resources and digital technologies with respect to work in ancient Greek scholarship, with the aim to identify the factors that are likely to constrain its use as well as to elicit needs and requirements of ancient Greek scholars in Italy. The survey is in line with the principles behind the user engagement strategy developed by CLARIN-ERIC and constitutes one of the national efforts undertaken by CLARIN-IT to contribute to the wider impact of CLARIN on Digital Classicists. The survey, as well as other surveys carried out in the sector in the last decade, points out that most of the available resources do not respond to users' requirements. This motivated us to develop a mock-up of a digital editor of Archilochus, which, mostly grounded on previous studies by Nicolosi, draws on the outcomes of the survey. The experiment includes a sample prototype to submit for evaluation by end-users. The final aim is to identify good practices and new models to enable new approaches to the study of classical texts and profile a new workbench for scholarly digital edition.}, KEYWORDS = {Digital Classics, User Involvement, User requirements, CLARIN ERIC, CLARIN Infrastructure}, PAGES = {61-74}, URL = {https://ep.liu.se/ecp/147/006/ecp17147006.pdf}, VOLUME = {147}, ISBN = {978-91-7685-273-6}, BOOKTITLE = {Selected papers from the CLARIN Annual Conference 2017, Budapest, 18-20 September 2017}, } @INCOLLECTION{PIRRELLI_2018_INCOLLECTION_P_398877, AUTHOR = {Pirrelli, V.}, TITLE = {Morphological Theory And Computational Linguistics}, YEAR = {2018}, ABSTRACT = {For decades, processing issues have taken centre stage in the debate on the theoretical foundations of linguistic morphology. The present chapter provides a computer-based, algorithmic view on these issues, ranging from the encoding of input data to the structure of output representations, going through the basic operations of word splitting, storage, access, retrieval, and assembly of intermediate representations.}, KEYWORDS = {word processing, word storage, computational morphology, lexical modelling, machine language learning, finite state technology, artificial neural networks}, PAGES = {573-593}, URL = {http://www.oxfordhandbooks.com/view/10.1093/oxfordhb/9780199668984.001.0001/oxfordhb-9780199668984-e-32?rskey=qZuY8Z\&result=9}, DOI = {10.1093/oxfordhb/9780199668984.013.32}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {978-0-19-966898-4}, BOOKTITLE = {The Oxford Handbook of Morphological Theory}, EDITOR = {Audring, J. and Masini, F.}, } @EDITORIAL{ASCOLI_2018_EDITORIAL_AD_395313, AUTHOR = {Ascoli, M. and Di Segni, G.}, TITLE = {Talmud Babilonese - Trattato Ta'anìt}, YEAR = {2018}, ABSTRACT = {Traduzione e commento del trattato Ta'anit (Digiuno) del Talmud Babilonese con testo originale a fronte}, KEYWORDS = {Talmud, Traduco, Linguistica computazionale}, PAGES = {332}, URL = {https://www.talmud.it/}, VOLUME = {9}, PUBLISHER = {Giuntina (Firenze, ITA)}, ISBN = {978-88-8057-748-5}, } @EDITORIAL{BRANCO_2018_EDITORIAL_BCC_401835, AUTHOR = {Branco, A. and Calzolari, N. and Choukri, K.}, TITLE = {4REAL 2018 Workshop on Replicability and Reproducibility of Research Results in Science and Technology of Language Proceedings}, YEAR = {2018}, ABSTRACT = {This workshop sought to contribute to the discussion and the advancement on a topic that has been given insufficient attention in the research area of language processing tools and resources and that has been an important topic emerging in other scientific areas, continuing the objectives of the first edition of the 4REAL workshop, at LREC 2016. We invited the submission of articles that present cases, either with positive or negative results, of actual replication or reproduction exercises of previous published results in our area.}, KEYWORDS = {Reproduction, Replication, Validation}, PAGES = {1-36}, URL = {http://4real2018.di.fc.ul.pt/wp-content/uploads/2018/05/lrec2018_workshop_proceedings_4REAL.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-21-4}, } @EDITORIAL{CALZOLARI_2018_EDITORIAL_CCCDGHIMMMMOPT_401744, AUTHOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, TITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC-2018)}, YEAR = {2018}, ABSTRACT = {It is the LREC 20th Anniversary and LREC has become one of the most successful conferences of the field. Data are pervasive in Natural Language Processing and Language Technology: we call our data Language Resources (LR). But when LREC was started by ELRA, in 1998 in Granada, from an idea of Antonio Zampolli and Joseph Mariani, it was really a new adventure and a challenge. There were well established big conferences but he thought that the new emerging field of Language Resources deserved its own dedicated forum. In the keynote talk I gave at LREC1998 I could say: "the infrastructural role of Language Resources as the necessary common platform on which new technologies and applications can be based is nowadays widely recognised." This could not have been said only few years before. I had the pleasure and the honour of being involved in LREC from the beginning, first as member of the Program Committee and since 2004 as Conference Chair.}, KEYWORDS = {Language Resources, Language Technology}, PAGES = {1-4628}, URL = {https://www.aclweb.org/anthology/L18-1}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, } @EDITORIAL{SORIA_2018_EDITORIAL_SBP_387365, AUTHOR = {Soria, C. and Besacier, L. and Pretorius, L.}, TITLE = {Proceedings of CCURL 2018-Sustaining knowledge diversity in the digital age}, YEAR = {2018}, ABSTRACT = {Proceedings of the CCURL 2018 workshop}, KEYWORDS = {knowledge diversity, digital age, language resources, language technologies}, PAGES = {i-75}, URL = {http://lrec-conf.org/workshops/lrec2018/W26/pdf/book_of_proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-22-1}, } @EDITORIAL{BERNHARD_2018_EDITORIAL_BS_443019, AUTHOR = {Bernhard, D. and Soria, C.}, TITLE = {Automatic processing of under-resourced languages|Traitement automatique des langues peu dotées}, YEAR = {2018}, KEYWORDS = {less-resourced languages, NLP}, PAGES = {7-14}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85063404297\&origin=inward}, VOLUME = {59}, PUBLISHER = {TAL (Saint-Cloud, Francia)}, ISSN = {1248-9433}, BOOKTITLE = {TAL. Traitement automatique des langues}, } @INPROCEEDINGS{ADORNI_2018_INPROCEEDINGS_ADKTV_385339, AUTHOR = {Adorni, G. and Dell'Orletta, F. and Koceva, F. and Torre, I. and Venturi, G.}, TITLE = {Extracting dependency relations from digital learning content}, YEAR = {2018}, ABSTRACT = {Digital Libraries present tremendous potential for developing e-learning applications, such as text comprehension and question-answering tools. A way to build this kind of tools is structuring the digital content into relevant concepts and dependency relations among them. While the literature offers several approaches for the former, the identification of dependencies, and specifically of prerequisite relations, is still an open issue. We present an approach to manage this task.}, KEYWORDS = {Prerequisite relationship, Concept extraction, Graph mining}, PAGES = {114-119}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85041860435\&origin=inward}, VOLUME = {806}, DOI = {10.1007/978-3-319-73165-0_11}, PUBLISHER = {Springer (Heidelberg, Germania)}, ISSN = {1865-0929}, CONFERENCE_NAME = {14th Italian Research Conference on Digital Libraries (IRCDL 2018)}, CONFERENCE_PLACE = {Udine}, CONFERENCE_DATE = {25-26 gennaio 2018}, BOOKTITLE = {Communications in computer and information science (Print)}, } @INPROCEEDINGS{ALZETTA_2018_INPROCEEDINGS_ADMSV_391617, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Simi, M. and Venturi, G.}, TITLE = {Assessing the Impact of Iterative Error Detection and Correction. A Case Study on the Italian Universal Dependency Treebank}, YEAR = {2018}, ABSTRACT = {Detection and correction of errors and inconsistencies in "gold treebanks" are becoming more and more central topics of corpus annotation. The paper illustrates a new incremental method for enhancing treebanks, with particular emphasis on the extension of error patterns across different textual genres and registers. Impact and role of corrections have been assessed in a dependency parsing experiment carried out with four different parsers, whose results are promising. For both evaluation datasets, the performance of parsers increases, in terms of the standard LAS and UAS measures and of a more focused measure taking into account only relations involved in error patterns, and at the level of individual dependencies.}, KEYWORDS = {Error Detection, Universal Dependency Treebanks, Syntactic parsing}, PAGES = {1-7}, URL = {http://universaldependencies.org/udw18/PDFs/39_Paper.pdf}, ISBN = {978-1-948087-84-1}, CONFERENCE_NAME = {Universal Dependencies Workshop 2018 (UDW 2018)}, CONFERENCE_PLACE = {Brussels}, CONFERENCE_DATE = {01/11/2018}, } @INPROCEEDINGS{ALZETTA_2018_INPROCEEDINGS_ADMV_382333, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Dangerous Relations in Dependency Treebanks}, YEAR = {2018}, ABSTRACT = {The paper illustrates an effective and innovative method for detecting erroneously annotated arcs in gold dependency treebanks based on an algorithm originally developed to measure the reliability of automatically produced dependency relations. The method permits to significantly restrict the error search space and, more importantly, to reliably identify patterns of systematic recurrent errors which represent dangerous evidence to a parser which tendentially will replicate them. Achieved results demonstrate effectiveness and reliability of the method.}, KEYWORDS = {Dependency treebanks, Error Detection, Linguistic Annotation}, PAGES = {201-210}, URL = {http://aclweb.org/anthology/W/W17/W17-7624.pdf}, ISBN = {978-80-88132-04-2}, CONFERENCE_NAME = {16th International Workshop on Treebanks and Linguistic Theories}, CONFERENCE_PLACE = {Praga}, CONFERENCE_DATE = {23-24 gennaio 2018}, } @INPROCEEDINGS{ALZETTA_2018_INPROCEEDINGS_ADMV_385342, AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Universal Dependencies and Quantitative Typological Trends. A Case Study on Word Order}, YEAR = {2018}, ABSTRACT = {The paper presents a new methodology aimed at acquiring typological evidence from "gold" treebanks for different languages. In particular, it investigates whether and to what extent algorithms developed for assessing the plausibility of automatically produced syntactic annotations could contribute to shed light on key issues of the linguistic typological literature. It reports the first and promising results of a case study focusing on word order patterns carried out on three different languages (English, Italian and Spanish).}, KEYWORDS = {Linguistic Knowledge Extraction, Dependency Treebanks, Linguistic Typology}, PAGES = {4540-4549}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/1109.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Proceedings of the 11th Edition of the Language Resources and Evaluation Conference (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki (Japan)}, CONFERENCE_DATE = {7-12 maggio 2018}, } @INPROCEEDINGS{BARTOLINI_2018_INPROCEEDINGS_BGMP_387159, AUTHOR = {Bartolini, R. and Goggi, S. and Monachini, M. and Pardelli, G.}, TITLE = {The LREC Workshops Map}, YEAR = {2018}, ABSTRACT = {The aim of this work is to present an overview of the research presented at the LREC workshops over the years 1998-2016 with the aim to shed light on the community represented by workshop participants in terms of country of origin, type of affiliation, gender. There has been also an effort towards the identification of the major topics dealt with as well as of the terminological variations noticed in this time span. Data has been retrieved from the portal of the European Language Resources Association (ELRA) which organizes the conference and the resulting corpus made up of workshops titles and of the related presentations has then been processed using a term extraction tool developed at ILC-CNR.}, KEYWORDS = {corpus creation, terminology, LREC}, PAGES = {557-562}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/summaries/639.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{BOSCO_2018_INPROCEEDINGS_BSDPT_398987, AUTHOR = {Bosco, C. and Sanguinetti, M. and Dell'Orletta, F. and Poletto, F. and Tesconi, M.}, TITLE = {Overview of the EVALITA 2018 hate speech detection task}, YEAR = {2018}, ABSTRACT = {The Hate Speech Detection (HaSpeeDe) task is a shared task on Italian social media (Facebook and Twitter) for the detection of hateful content, and it has been proposed for the first time at EVALITA 2018. Providing two datasets from two different online social platforms differently featured from the linguistic and communicative point of view, we organized the task in three tasks where systems must be trained and tested on the same resource or using one in training and the other in testing: HaSpeeDe-FB, HaSpeeDe-TW and Cross-HaSpeeDe (further subdivided into Cross-HaSpeeDe FB and Cross-HaSpeeDe TW sub-tasks). Overall, 9 teams participated in the task, and the best system achieved a macro F1-score of 0.8288 for HaSpeeDe-FB, 0.7993 for HaSpeeDe-TW, 0.6541 for Cross-HaSpeeDe FB and 0.6985 for Cross-HaSpeeDe TW. In this report, we describe the datasets released and the evaluation measures, and we discuss results.}, KEYWORDS = {Hate Speech Detection, Social Media Analysis}, PAGES = {9}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-85058647605\&partnerID=q2rCbXpz}, VOLUME = {2263}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {EVALITA 2018-Sixth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian}, CONFERENCE_PLACE = {Torino, Italia}, CONFERENCE_DATE = {10-12/12/2018}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{BRUNATO_2018_INPROCEEDINGS_BDDIV_391619, AUTHOR = {Brunato, D. and De Mattei, L. and Dell'Orletta, F. and Iavarone, B. and Venturi, G.}, TITLE = {Is this sentence difficult? Do you agree?}, YEAR = {2018}, ABSTRACT = {In this paper, we present a crowdsourcing-based approach to model the human perception of sentence complexity. We collect a large corpus of sentences rated with judgments of complexity for two typologically-different languages, Italian and English. We test our approach in two experimental scenarios aimed to investigate the contribution of a wide set of lexical, morpho-syntactic and syntactic phenomena in predicting i) the degree of agreement among annotators independently from the assigned judgment and ii) the perception of sentence complexity.}, KEYWORDS = {Linguistic complexity, Crowdsourcing, Human perception}, PAGES = {1-10}, URL = {https://www.aclweb.org/anthology/D18-1289/}, DOI = {10.18653/v1/D18-1289}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-948087-84-1}, CONFERENCE_NAME = {Conference on Empirical Methods in Natural Language Processing (EMNLP)}, CONFERENCE_PLACE = {Brussels}, CONFERENCE_DATE = {31/10/2018-04/11/2018}, } @INPROCEEDINGS{CALZOLARI_2018_INPROCEEDINGS_C_401831, AUTHOR = {Calzolari, N.}, TITLE = {Introduction to LREC 2018 by Nicoletta Calzolari Chair of the 11th edition of LREC ELRA Honorary President}, YEAR = {2018}, ABSTRACT = {It is the LREC 20th Anniversary and LREC has become one of the most successful conferences of the field. Data are pervasive in Natural Language Processing and Language Technology: we call our data Language Resources (LR).}, KEYWORDS = {Language Resources, Language Technology}, PAGES = {1-6}, URL = {https://www.aclweb.org/anthology/L18-1}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, JAPAN}, CONFERENCE_DATE = {MAY 7-12, 2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{CHIRIATTI_2018_INPROCEEDINGS_CDDMPSV_423871, AUTHOR = {Chiriatti, G. and Della Gala, V. and Dell'Orletta, F. and Montemagni, S. and Pettenati, M. C. and Sagri, M. T. and Venturi, G.}, TITLE = {A NLP-based analysis of reflective writings by Italian teachers}, YEAR = {2018}, ABSTRACT = {This paper reports first results of a wider study devoted to exploit the potentialities of a NLP-based approach to the analysis of a corpus of reflective writings on teaching activities. We investigate how a wide set of linguistic features allows reconstructing the linguistic profile of the texts written by the Italian teachers and predicting whether are reflective.}, KEYWORDS = {Natural Language Processing, Reflective Writings, Linguistic Profiling, Document Classification}, PAGES = {1-7}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85057733802\&origin=inward}, VOLUME = {2253}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {5th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {10-12/12/2018}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{CIMINO_2018_INPROCEEDINGS_CDBV_423870, AUTHOR = {Cimino, A. and Dell'Orletta, F. and Brunato, D. and Venturi, G.}, TITLE = {Sentences and documents in native language identification}, YEAR = {2018}, ABSTRACT = {Starting from a wide set of linguistic features, we present the first in depth feature analysis in two different Native Language Identification (NLI) scenarios. We compare the results obtained in a traditional NLI document classification task and in a newly introduced sentence classification task, investigating the different role played by the considered features. Finally, we study the impact of a set of selected features extracted from the sentence classifier in document classification.}, KEYWORDS = {Natural Language Processing, Native Language Identification}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85057749754\&origin=inward}, VOLUME = {2253}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {5th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {10-12/12/2018}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{COCCIU_2018_INPROCEEDINGS_CBVD_423873, AUTHOR = {Cocciu, E. and Brunato, D. and Venturi, G. and Dell'Orletta, F.}, TITLE = {Gender and Genre Linguistic profiling: A case study on female and male journalistic and diary prose}, YEAR = {2018}, ABSTRACT = {This paper intends to investigate the linguistic profile of male- and female-authored texts belonging to two very different textual genres: newspaper articles and diary prose. By using a wide set of linguistic features automatically extracted from text and spanning across different levels of linguistic description, from lexicon to syntax, our analysis highlights the peculiarities of the two examined genres and how the genre dimension is influenced by variation depending on author's gender (and vice versa).}, KEYWORDS = {Natural Language Processing, Genre Classification, Linguistic Profiling}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85057759773\&origin=inward}, VOLUME = {2253}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {5th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {10-12/12/2018}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{DEFELICE_2018_INPROCEEDINGS_DDVLM_423872, AUTHOR = {De Felice, I. and Dell'Orletta, F. and Venturi, G. and Lenci, A. and Montemagni, S.}, TITLE = {Italian in the Trenches: Linguistic annotation and analysis of texts of the great war}, YEAR = {2018}, ABSTRACT = {The paper illustrates the design and development of a textual corpus representative of the historical variants of Italian during the Great War, which was enriched with linguistic (lemmatization and pos-tagging) and meta-linguistic annotation. The corpus, after a manual revision of the linguistic annotation, was used for specializing existing NLP tools to process historical texts with promising results.}, KEYWORDS = {Natural Language Processing, Automatic Linguistic Annotation}, PAGES = {1-5}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85057734451\&origin=inward}, VOLUME = {2253}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {5th Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {10-12/12/2018}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{DELGRATTA_2018_INPROCEEDINGS_DGPC_387155, AUTHOR = {Del Gratta, R. and Goggi, S. and Pardelli, G. and Calzolari, N.}, TITLE = {LREMap, a Song of Resources and Evaluation}, YEAR = {2018}, ABSTRACT = {After 8 years we revisit the LRE Map of Language Resources, introduced at LREC 2010, to try to get a picture of the field and its evolution as reflected by the creation and use of Language Resources. The purpose of the Map was in fact "to shed light on the vast amount of resources that represent the background of the research presented at LREC". It also aimed at a "change of culture in the field, actively engaging each researcher in the documentation task about resources". The data analysed here have been provided by the authors of several conferences during the phase of submission of papers, and contain information about ca. 7500 resources. We analysed the LRE Map data from many different viewpoints and the paper reports on the global picture, on different trends emerging from the diachronic perspective and finally on some comparisons between the 2 major conferences present in the Map: LREC and COLING.}, KEYWORDS = {LR Infratructure, Metadata, LR Documentation}, PAGES = {1275-1281}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/summaries/300.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{DELGROSSO_2018_INPROCEEDINGS_DBGMN_390296, AUTHOR = {Del Grosso, A. M. and Bellandi, A. and Giovannetti, E. and Marchi, S. and Nahli, O.}, TITLE = {Scanning is Just the Beginning: Exploiting Text and Language Technologies to Enhance the Value of Historical Manuscripts}, YEAR = {2018}, ABSTRACT = {In this paper we present a digital process for the explicitation of the textual, linguistic and semantic content of historical manuscripts. The proposed workflow is composed of a sequence of incremental steps, each of which is described both on a methodological and practical perspective. The steps are: 1) visualization and structuring of metadata, 2) transcription, 3) structural encoding, 4) annotation, 5) lexical and conceptual structuring.}, KEYWORDS = {Computational Lexica, Digital Scholarly Editing, Digital Humanities, al-Qamus al-Muhit}, PAGES = {214-219}, URL = {https://publications.cnr.it/doc/390296}, DOI = {10.1109/CIST.2018.8596373}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {CIST 2018 WH-MNLP}, CONFERENCE_PLACE = {MARRAKECH, MOROCCO}, CONFERENCE_DATE = {21-27/10/2018}, BOOKTITLE = {Colloquium in Information Science and Technology, CIST}, EDITOR = {Al Achhab, M. and El Mohajir, M. and Jellouli, I. and El Mohajir, B. E.}, } @INPROCEEDINGS{FERRO_2018_INPROCEEDINGS_FCGMNCP_390504, AUTHOR = {Ferro, M. and Cappa, C. and Giulivi, S. and Marzi, C. and Nahli, O. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: Reading for Understanding}, YEAR = {2018}, ABSTRACT = {This paper focuses on motivation, objectives, design issues and preliminary results of ReadLet, an ICT platform for assessing reading efficiency in primary school children. Test data are discussed on a sample of 200 early graders, reading French, Italian and Standard Modern Arabic (SMA).}, KEYWORDS = {Reading, text comprehension, Specific Learning Disorders, multimodal signal processing, cloud computing, portable assistive technology}, PAGES = {404-409}, URL = {https://publications.cnr.it/doc/390504}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {IEEE-CIST2018 LED-ICT}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {21-27/10/2018}, } @INPROCEEDINGS{GOGGI_2018_INPROCEEDINGS_GPRBM_385571, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2018}, ABSTRACT = {This work will provide a map of the documentation archived in the CLARIN infrastructure, whose purpose is to share language resources produced and managed in the various European countries but finally merged into the CLARIN data centers for allowing access, interoperability, reuse and preservation of scientific documentation as well as Grey Literature.}, KEYWORDS = {CLARIN Infrastructure, Language Resources, Grey Literature}, PAGES = {93-99}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2018/03/GL19_Conference_Proceedings.pdf}, VOLUME = {19}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-31-9}, CONFERENCE_NAME = {Nineteenth International Conference on Grey Literature, GL19}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {October 23-24, 2017}, BOOKTITLE = {Nineteenth International Conference on Grey Literature "Public Awareness and Access to Grey Literature"}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{KHAN_2018_INPROCEEDINGS_KBFM_387178, AUTHOR = {Khan, F. and Bellandi, A. and Frontini, F. and Monachini, M.}, TITLE = {One Language to rule them all: modelling Morphological Patterns in a Large Scale Italian Lexicon with SWRL}, YEAR = {2018}, ABSTRACT = {We present an application of Semantic Web Technologies to computational lexicography. More precisely we describe the publication of the morphological layer of the Italian Parole Simple Clips lexicon (PSC-M) as linked open data. The novelty of our work is in the use of the Semantic Web Rule Language (SWRL) to encode morphological patterns, thereby allowing the automatic derivation of the inflectional variants of the entries in the lexicon. By doing so we make these patterns available in a form that is human readable and that therefore gives a comprehensive morphological description of a large number of Italian word.}, KEYWORDS = {Morphology, Linked Open Data, Italian Lexicon, SWRL, SQVRL}, PAGES = {4385-4389}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/844.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N.}, } @INPROCEEDINGS{MARZI_2018_INPROCEEDINGS_MFNBBP_388016, AUTHOR = {Marzi, C. and Ferro, M. and Nahli, O. and Belik, P. and Bompolas, S. and Pirrelli, V.}, TITLE = {Evaluating Inflectional Complexity Crosslinguistically: a Processing Perspective}, YEAR = {2018}, ABSTRACT = {The paper provides a cognitively motivated method for evaluating the inflectional complexity of a language, based on a sample of "raw" inflected word forms processed and learned by a recurrent self-organising neural network with fixed parameter setting. Training items contain no information about either morphological content or structure. This makes the proposed method independent of both meta-linguistic issues (e.g. format and expressive power of descriptive rules, manual or automated segmentation of input forms, number of inflectional classes etc.) and language-specific typological aspects (e.g. word-based, stem-based or template-based morphology). Results are illustrated by contrasting Arabic, English, German, Greek, Italian and Spanish.}, KEYWORDS = {paradigm-based morphology, inflectional complexity, prediction-based processing, recurrent self-organising networks, Statistical And Machine Learning Methods, Language Modelling}, PAGES = {3860-3866}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/summaries/745.html}, VOLUME = {2018}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{MONACHINI_2018_INPROCEEDINGS_MK_387203, AUTHOR = {Monachini, M. and Khan, A. F.}, TITLE = {Towards the Construction of a Lexical Data and Technology Ecosystem: The Experience of ILC-CNR}, YEAR = {2018}, ABSTRACT = {This paper describes the activities and projects being carried on at the "A. Zampolli" Institute for Computational Linguistics (ILC) at the crossroads between computational lexicography and e- lexicography and that are intended to assist in the creation of a queryable and interconnected ecosystem of standardised lexicographic datasets and technologies.}, KEYWORDS = {e-lexicography, computational lexicography, lexical resources, standards, LOD}, PAGES = {52-54}, URL = {https://globalex.link/globalex2018/wp-content/uploads/2018/03/Globalex-2018_proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-28-3}, CONFERENCE_NAME = {LREC 2018 Workshop "Globalex 2018-Lexicography \& WordNets}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the LREC 2018 Workshop "Globalex 2018-Lexicography \& WordNets"}, EDITOR = {Kernerman, I. and Krek, S.}, } @INPROCEEDINGS{NAHLI_2018_INPROCEEDINGS_N_390405, AUTHOR = {Nahli, O.}, TITLE = {Arabic Language Alignment with English Ontologies-Some Ontological Reflections}, YEAR = {2018}, ABSTRACT = {There have been several attempts to build lexico-conceptual resources by extension of the English WordNet, i.e. by means of translation of English synsets. However, the extension approach is arguable because it assumes that the target resource is isomorphic to English WordNet. Yet, some languages, such as English and Arabic, can be very different. The problem would be to know, first, whether they conceptualize reality in the same way; and if not, to identify different concepts types. The mapping of a lexical resource of a different language onto Princeton WordNet of English (PWN) answers these questions. The experiment, in this article, describes results obtained from mapping the Arabic dictionary, al=q?m?s al=mu???, onto English WordNet and SUMO (Standard Upper Merged Ontology), also developed for the English language.}, KEYWORDS = {Ontology, concept, Arabic, PWN, SUMO, al=q?m?s al=mu???}, PAGES = {7}, URL = {https://publications.cnr.it/doc/390405}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {CIST 2018 WH-MNLP}, CONFERENCE_PLACE = {MARRAKECH, MAROCCO}, CONFERENCE_DATE = {21-27/10/2018}, } @INPROCEEDINGS{NICOLAS_2018_INPROCEEDINGS_NKMDCAEBQS_387361, AUTHOR = {Nicolas, L. and König, A. and Monachini, M. and Del Gratta, R. and Calamai, S. and Abel, A. and Enea, A. and Biliotti, F. and Quochi, V. and Stella, F. V.}, TITLE = {CLARIN-IT: State of Affairs, Challenges and Opportunities}, YEAR = {2018}, ABSTRACT = {his paper gives an overview on the Italian national CLARIN consortium as it currently stands two years after its creation at the end of 2015. It thus discusses the current state of affairs of the consortium on several aspects, especially with regards to members. It also discusses the events and initiatives that have been undertaken, as well as the ones that are planned in the close future. It finally outlines the conclusions of a user survey performed to understand the expectations of a targeted user population and provides indications regarding the next steps planned.}, KEYWORDS = {CLARIN-IT Consortium Pisa Bolzano Siena}, PAGES = {1-14}, URL = {http://www.ep.liu.se/ecp/contents.asp?issue=147}, VOLUME = {147}, ISBN = {978-91-7685-273-6}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {18-20 September, 2017}, BOOKTITLE = {Selected papers from the CLARIN Annual Conference 2017, Budapest, 18-20 September 2017}, } @INPROCEEDINGS{SORIA_2018_INPROCEEDINGS_SQR_387362, AUTHOR = {Soria, C. and Quochi, V. and Russo, I.}, TITLE = {The DLDP Survey on Digital Use and Usability of EU Regional and Minority Languages}, YEAR = {2018}, ABSTRACT = {This paper reports about the design, the results and the key findings of a survey launched by the Digital Language Diversity Project about the digital use and usability of regional and minority languages. The aim of the survey - the first of this kind - was to investigate the real needs and expectations of European minority language speakers regarding digital opportunities. The focus on four languages (Basque, Breton, Karelian and Sardinian) at different stages of digital development offers a starting point to develop strategies for assessing digital vitality of these languages and overcoming specific difficulties.}, KEYWORDS = {minority languages, digital survival, electronic communication}, PAGES = {4155-4160}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/684.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{BELLANDI_2018_INPROCEEDINGS_BGP_385403, AUTHOR = {Bellandi, A. and Giovannetti, E. and Piccini, S.}, TITLE = {Collaborative Editing of Lexical and Termino-ontological Resources: a Quick Introduction to LexO}, YEAR = {2018}, ABSTRACT = {We here present LexO, a web collaborative editor of lexical and termino-ontological resources. As the underlying lexical model we adopted lemon, which appeared to be perfect for our purposes, in particular regarding the separation between the conceptual and linguistic dimensions .}, KEYWORDS = {lemon model, lexo, collaborative editor, termino-ontological resource}, PAGES = {23-27}, URL = {http://euralex2018.cjvt.si/wp-content/uploads/sites/6/2018/12/Euralex2018_book_of_abstracts_FINAL.pdf}, CONFERENCE_NAME = {XVIII EURALEX International Congress}, CONFERENCE_PLACE = {Ljubljana, Slovenia}, CONFERENCE_DATE = {17-21/07/2018}, BOOKTITLE = {The XVIII EURALEX International Congress: Lexicography in Global Contexts-Book of Abstracts}, EDITOR = {Čibej, J. and Gorjanc, V. and Kosem, I. and Krek, S.}, } @INPROCEEDINGS{CAPPA_2018_INPROCEEDINGS_CFGMNCP_396593, AUTHOR = {Cappa, C. and Ferro, M. and Giulivi, S. and Marzi, C. and Nahli, O. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: piattaforma ICT per valutare l'efficienza di lettura}, YEAR = {2018}, ABSTRACT = {ReadLet è una piattaforma ICT pensata per valutare accuratamente l'efficienza di lettura nei bambini della scuola primaria. Combina tecnologia ICT portatile e cloud-computing con una serie di moduli software, specifici per modalità di somministrazione. Questi, implementati come servizi web, includono: i) valutazione dell'elaborazione del testo e della leggibilità; ii) valutazione della velocità di lettura (ad alta voce e silente) e delle sue fluttuazioni); iii) valutazione della correttezza della decodifica ad alta voce; iv) valutazione della comprensione del testo (in lettura silente e da ascolto). Un prototipo della tecnologia ReadLet è stato sperimentato su circa 200 alunni (8-11 anni), che variano per stato socio-economico, lingua (italiana, francese, araba) e area geografica (Italia, Svizzera, Marocco). L'utilizzo del tablet per la lettura è stato percepito dai bambini come un'esperienza coinvolgente e piacevole. Gli insegnanti hanno trovato lo strumento facile da utilizzare e in grado di fornire maggiori informazioni rispetto agli strumenti tradizionali.}, KEYWORDS = {leggere per capire, disturbi del linguaggio, screening}, URL = {https://www.airipa.it/congresso/pluginfile.php/2781/mod_resource/content/1/Programma%20Congresso%20AIRIPA_Arezzo_dettagliato-3.pdf}, CONFERENCE_NAME = {XXVII Congresso Nazionale AIRIPA}, CONFERENCE_PLACE = {Arezzo (Italy)}, CONFERENCE_DATE = {28-29/09/2018}, } @INPROCEEDINGS{DELGROSSO_2018_INPROCEEDINGS_D_390305, AUTHOR = {Del Grosso, A. M.}, TITLE = {Verso la definizione e l'implementazione di un processo per la gestione dell'informazione in ambito bibliografico e archivistico}, YEAR = {2018}, ABSTRACT = {L'intervento ripercorre alcune iniziative svolte negli anni passati in collaborazione con il liceo classico Medi-Livatino. Si introduce un processo di digitalizzazione e di analisi di documenti testuali volto alla gestione e allo studio dell'informazione testuale in ambito filologico. In particolare si sottolineano gli aspetti che accomunano il lavoro ingegneristico-tecnologico con quelli maggiormente bibliografici e archivistici relativi alla conservazione e alla fruizione di risorse testuali. Si evidenziano gli sviluppi di attività dedicate alla cultura digitale e all'applicazione di strumenti computazionali per l'analisi e lo studio di tesi storici nell'ambito della didattica. In perfetta sintonia quindi con le linee guida del Piano Nazionale Scuola Digitale.}, KEYWORDS = {PNSD, liceo classico, biblioteche innovative, archivi digitali}, URL = {https://publications.cnr.it/doc/390305}, CONFERENCE_NAME = {Cultura Digitale: a scuola di innovazione}, CONFERENCE_PLACE = {San Marco dei Cavoti (Benevento)}, CONFERENCE_DATE = {17-18/5/2018}, } @INPROCEEDINGS{DELGROSSO_2018_INPROCEEDINGS_DCCCDR_390989, AUTHOR = {Del Grosso, A. M. and Cacioli, G. and Cavallero, C. and Cioffi, R. and Di Pietro, C. and Rosselli Del Turco, R.}, TITLE = {Encoding and publishing the Life of San Teobaldo using EVT: challenges and rewards}, YEAR = {2018}, ABSTRACT = {This contribution aims at illustrating both the scholarly work and the development outcomes that have been achieved while working towards a digital edition of the Life of San Teobaldo (an hagiography of the patron saint of the city of Alba, Italy). The text, physically embodied in an ancient palimpsest scroll, has been encoded using the TEI-XML standard and published by means of the Edition Visualization Technology tool. EVT has been appropriately customized and extended with new features concerning image visualization, diplomatic edition display and textual search. A working progress demo is available at < http://licodemo.ilc.cnr.it/evt-rotulo >.}, KEYWORDS = {digital philology, evt, computational philology, Rotulo vita San Teobaldo}, URL = {https://drive.google.com/file/d/19SQqvy4vwG_-irpelu7ro3Q1QdZjcsZJ/view?usp=sharing}, CONFERENCE_NAME = {EADH 2018: "Data in Digital Humanities"}, CONFERENCE_DATE = {7-9/12/2018}, } @INPROCEEDINGS{DELGROSSO_2018_INPROCEEDINGS_DCDGMSS_384781, AUTHOR = {Del Grosso, A. M. and Cristofaro, S. and De Luca, M. R. and Giovannetti, E. and Marchi, S. and Seminara, G. and Spampinato, D.}, TITLE = {Le lettere di Bellini: dalla Carta al Web}, YEAR = {2018}, ABSTRACT = {Nel contesto del progetto "Museo virtuale della Musica BellinInRete" sarà reso fruibile, attraverso un processo di acquisizione, codifica e pubblicazione digitale, un corpus di lettere di Vincenzo Bellini, compositore catanese del XIX secolo. L'edizione digitale delle lettere belliniane sarà consultabile in rete e, inoltre, sarà integrata in un percorso museale interattivo in allestimento presso il Museo Civico Belliniano di Catania.}, KEYWORDS = {Digital Edition, Digital Scholarly Platform}, PAGES = {60-64}, URL = {http://www.aiucd2018.uniba.it/content/AIUCD2018-BoA.pdf}, DOI = {10.6092/unibo/amsacta/5997}, ISBN = {9788894253528}, CONFERENCE_NAME = {AIUCD 2018 Conference}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {31/01/2018-02/02/2018}, BOOKTITLE = {AIUCD 2018-Book of abstracts}, EDITOR = {Spampinato, D.}, } @INPROCEEDINGS{FERRO_2018_INPROCEEDINGS_FCGMCP_396591, AUTHOR = {Ferro, M. and Cappa, C. and Giulivi, S. and Marzi, C. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: an ICT platform for the assessment of reading efficiency in early graders}, YEAR = {2018}, ABSTRACT = {Reading is not just word decoding, but the joint product of decoding and deep linguistic comprehension [ 1 , 2 ]. Effective linguistic comprehension relies on language skills such as semantic and syntactic awareness. Both decoding and linguistic comprehension are necessary for reading comprehension, and neither is by itself sufficient [ 2 ]. However, current protocols for reading assessment measure decoding (reading accuracy and speed) and reading comprehension separately [ 3 , 4 , 5 ]. This does not allow evaluation of reading efficiency [ 6 ], defined as the ability to fully understand connected texts by minimising reading time, a cognitive ability that lies at the roots of students' academic achievement [ 8 , 7 ]. ReadLet is an ICT platform specifically designed to provide accurate, evidence-based assessment of reading efficiency in early grade children, by offering an ecological, non-invasive protocol for extensive data elicitation, storage and analysis. With ReadLet, early graders at school can read a one or two page text displayed on a tablet touchscreen, either silently or aloud. Children are asked to slide their finger across the words as they read, to guide directional tracking. After reading, the child is prompted with a few multiple-answer questions on text content presented one at a time, while the text remains displayed on the screen for the child to be able to retrieve relevant information. In the process, the tablet keeps track of time-aligned multimodal data: voice recording, finger sliding time, time of reading, time of question answering, and number of correct answers. Data are recorded, stored locally, sent to the ReadLet server through an internet connection, and processed remotely by a battery of cloud-based services, analysing data automatically to produce a detailed quantitative signature of each reading session. A server-based database aggregates anonymised data to make them available for specialists. Also individual's longitudinal profiles are stored, for them be queried and inspected upon authorised access. The platform combines portable ICT technology and cloud computing with a number of modality-specific software modules, implemented as web services including: i) a text processing and readability assessment service, consisting in a battery of tools for automated linguistic annotation of written texts and a machine-learning component assigning a readability score to annotated texts [ 9 ]; ii) a finger touch processing service aligning the child's finger sliding with the written text and measuring speed fluctuations; iii) a speech processing and decoding assessment service, aligning the acoustic record of child's reading with the written text and assessing correctness of recoding [ 10 ]. At the time of writing, the platform includes the first two modules only. Preliminary testing of a prototype version of ReadLet technology with a population of about 200 pupils aged 8 to 11, both male and female, varying for socio-economic status, language (Italian, French and Arabic) and geographical area (Italy and Morocco), showed that children are extremely responsive to using a tablet for reading, and very easy to engage in what they perceive as an enjoyable experience. We expect online databases of automatically classified cross-sectional and longitudinal data, accurate statistical modelling and developmental trends of reading literacy to help education professionals and clinical specialists assess the level of reading skills reached by the child, and decide which intervention programmes and measures are most appropriate. While information technology cannot and should not supplant the role and professional judgement of teachers and therapists, the project intends to provide portable tools, models and data for timely screening and daily management of reading difficulties and disorders.}, KEYWORDS = {reading efficiency, decoding, comprehension, language specific disorders}, PAGES = {61-61}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, CONFERENCE_PLACE = {Edmonton, Alberta (Canada)}, CONFERENCE_DATE = {25-28/09/2018}, } @INPROCEEDINGS{GIOVANNETTI_2018_INPROCEEDINGS_GABDDPP_385407, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Dattilo, D. and Dollinar, M. and Pecchioli, A. and Piperno, C.}, TITLE = {Il Progetto Traduzione del Talmud Babilonese: il Ruolo della Tecnologia e della Linguistica Computazionale}, YEAR = {2018}, ABSTRACT = {L'obiettivo principale del Progetto Traduzione del Talmud Babilonese è produrre la traduzione del Talmud in italiano. La traduzione, affidata ad un team di circa 80 studiosi, è condotta con l'aiuto di Traduco, un software preposto ad agevolare tutte le fasi di lavoro previste dal progetto, dall'attribuzione degli utenti alle sezioni da tradurre, fino al supporto all'impaginazione finale. La presenza di una piattaforma collaborativa digitale che già, di per sé, costituisce una innovazione nell'ambito dei grandi progetti di traduzione, è arricchita da algoritmi per il trattamento automatico del testo e della lingua, in costante evoluzione, attraverso i quali il traduttore, il revisore o lo studioso possono contare su funzionalità sempre più avanzate.}, KEYWORDS = {Linguistica Computazionale, Traduzione di Testi Religiosi, Traduzione Assistita dal Calcolatore, Traduzione Collaborativa}, PAGES = {144-146}, URL = {http://amsacta.unibo.it/5997/}, DOI = {10.6092/unibo/amsacta/5997}, ISBN = {9788894253528}, CONFERENCE_NAME = {AIUCD 2018 Conference}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {31/01/2018-02/02/2018}, BOOKTITLE = {AIUCD 2018-Book of abstracts}, EDITOR = {Spampinato, D.}, } @INPROCEEDINGS{GOGGI_2018_INPROCEEDINGS_GPBMBC_395584, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Monachini, M. and Biagioni, S. and Carlesi, C.}, TITLE = {Semantic query analysis from the global science gateway}, YEAR = {2018}, ABSTRACT = {We focused on building a corpus constituted by the query logs registered by the GreyGuide: Repository and Portal to Good Practices and Resources in Grey Literature and received by the WorldWideScience.org (The Global Science Gateway) portal: the aim is to retrieve information related to social media which as of today represent a considerable source of data more and more widely used for research ends. This project includes eight months of query logs3 registered between July 2017 and February 2018 for a total of 445,827 queries. The analysis mainly concentrates on the semantics of the queries received from the portal clients: it is a process of information retrieval from a rich digital catalogue whose language is dynamic, is evolving and follows - as well as reflects - the cultural changes of our modern society.}, KEYWORDS = {Global Science Gateway, Semantic Query Analysis, Terminology}, PAGES = {93-95}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2018/12/GL20_ProgramBook.pdf}, VOLUME = {20}, ISBN = {978-90-77484-34-0}, CONFERENCE_NAME = {Twentieth International Conference on Grey Literature "Research Data Fuels and Sustains Grey Literature"}, CONFERENCE_PLACE = {New Orleans, USA (Loyola University)}, CONFERENCE_DATE = {December 3-4, 2018}, BOOKTITLE = {Research Data Fuels and Sustains Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{LEONI_2018_INPROCEEDINGS_LMCFG_396592, AUTHOR = {Leoni, F. and Muzio, C. and Cappa, C. and Ferro, M. and Giulivi, S.}, TITLE = {Il progetto AEREST: primi risultati in Italia e in Canton Ticino}, YEAR = {2018}, ABSTRACT = {Il progetto AEREST, per una valutazione ecologica dell'efficienza di lettura, è attualmente in corso presso alcune classi di scuola primaria di istituti italiani e ticinesi. Si presentano qui i risultati ottenuti a seguito della prima sessione di raccolta dati, che si è svolta nell'A.A. 2017-18 su circa 160 bambini italofoni di età compresa tra 8 e 11 anni. Lo scopo di questa prima fase sperimentale è stato duplice: 1. ottenere indicazioni sull'efficacia, ai fini della valutazione dell'efficienza di lettura, dei testi utilizzati nelle prove di cui si compone il test AEREST; 2. ottenere indicazioni sulla fattibilità dell'implementazione dello screening su tablet, in termini di facilità di somministrazione e di gradimento da parte dei soggetti; 3. esplorare e confrontare le performance di lettura nel campione italiano e ticinese, al fine di individuare strategie didattiche volte a potenziare le eventuali abilità carenti.}, KEYWORDS = {efficienza di lettura, screening}, URL = {https://www.airipa.it/congresso/pluginfile.php/2781/mod_resource/content/1/Programma%20Congresso%20AIRIPA_Arezzo_dettagliato-3.pdf}, CONFERENCE_NAME = {XXVII Congresso Nazionale AIRIPA}, CONFERENCE_PLACE = {Arezzo (Italy)}, CONFERENCE_DATE = {28-29/09/2018}, } @INPROCEEDINGS{MARZI_2018_INPROCEEDINGS_MFP_396356, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Is inflectional irregularity dysfunctional to human processing?}, YEAR = {2018}, ABSTRACT = {Regularly inflected verb forms are classically associated with the formal transparency and predictability of their internal constituents [ 1 , 2 , 3 ]. Transparency ensures that full forms can be segmented uniquely into their internal constituents: as in walk-s/walk-ed. Predictability allows for a speaker to fill in an empty paradigm cell, using information from other known forms of the same lexical paradigm and its inflection macro-class. From this perspective, irregulars appear to be dysfunctional to the human processing system, as they make it hard to infer - say - bought from buy , or segment bought appropriately into its constituent parts. Likewise, an influential psycholinguistic tradition relegates irregulars to the lexical store, whereas regulars are segmented by rules into their simpler constituents [ 4 , 5 ]. Here, we offer a few reasons for questioning this view. First, transparency and predictability are not dichotomous notions. Secondly, their influence on processing is not unidirectional. Unpredictable stems in irregularly inflected forms of complex inflectional systems provide a lot of processing information, by dynamically constraining the number of possible alternative endings during serial processing. Thirdly, acquisition of word inflection does not consist in associating co-occurring cues and outcomes, but in discriminating between multiple cues that are constantly in competition for their predictive value for a given outcome. We present the results of a few computer simulations with Self-organising Recurrent Neural Networks (TSOMs, [ 8 , 9 ]) that learn how to inflect high-frequency verb paradigms in 6 languages: English, German, Italian, Modern Greek, Modern Standard Arabic and Spanish. After training, each TSOM was tested on a word recognition (serial recoding) and a word production (serial recall) task, and results were analysed with generalised regression models. Processing uncertainty is differently apportioned on regulars and irregulars, depending on the nature of the processing task. While irregulars are harder to produce when they are unknown because they typically have fewer neighbours than regulars have, they are readily accessed once they are acquired, for exactly the same reason. Our data are in line with psycholinguistic evidence [ 10 , 11 ] that lexical processing is paced by two types of uniqueness point: Marslen-Wilson's Uniqueness Point (UP), distinguishing unrelated onset-overlapping words [ 12 ], and the Complex Uniqueness Point (CUP), distinguishing paradigmatically-related words [ 11 ]. Late UPs are inhibitory and elicit prolonged reaction times in acoustic word recognition, explaining an early delay in word recognition of irregular stems. Similarly, late CUPs are inhibitory, and this accounts for a slowdown in the processing advantage of regulars, compared to irregulars, after UP. These structural factors interact in a variety of ways and concurrently affect human processing, to show that irregularly-inflected forms may in fact reflect communicative and processing constraints of the word processor. They provide strong evidence against a processing architecture that assumes compartmentalized, independent processing routes for some specific combinations of these factors (e.g. a rule-based route for a combination of transparency and predictability, and a memory-based route for all other combinations). In addition, they seem incompatible with Bayesian approaches to auditory word comprehension ignoring a word's internal structure [ 13 ]. We suggest that a different design of the human language processor, based on a computational architecture integrating memory and processing as two different dynamics of the same underlying mechanism, can shed light on the complexity of inflection, and vindicate the role of irregular inflection in the system.}, KEYWORDS = {inflectional processing, temporal self organizing maps, letter prediction, morpheme boundary}, PAGES = {60-60}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, CONFERENCE_PLACE = {Edmonton, Alberta (Canada)}, CONFERENCE_DATE = {25-28/09/2018}, } @INPROCEEDINGS{PICCINI_2018_INPROCEEDINGS_PBG_385401, AUTHOR = {Piccini, S. and Bellandi, A. and Giovannetti, E.}, TITLE = {A Semantic Web Approach to Modelling and Building a Bilingual Chinese-Italian Termino-ontological Resource}, YEAR = {2018}, ABSTRACT = {This paper introduces a bilingual Chinese-Italian onto-terminological resource, devoted to modelling the Chinese terminology of Matteo Ricci's World Map (1602), together with the Italian translation by Pasquale D'Elia (1835) [3]. The Map was created in collaboration with the Chinese mathematician and astronomer Li Zizhao, and is entitled ? ? ? ? ? ? Kunyu Wanguo Quantu (literally "Map of the Ten Thousand Countries of the Earth"). Its publication in China was significant as it was the first map to show the Americas, and to represent the world as a sphere. Its large number of cartouches provide information about the geography, history and customs of the world at that time as well as cosmological and cosmographic data. The map had a revolutionary impact from a linguistic standpoint as well: a large number of neologisms were introduced by Ricci, many of which have survived until today.}, KEYWORDS = {termino-ontological resource, classical chinese, lemon model}, PAGES = {87-90}, URL = {http://euralex2018.cjvt.si/wp-content/uploads/sites/6/2018/12/Euralex2018_book_of_abstracts_FINAL.pdf}, CONFERENCE_NAME = {XVIII EURALEX International Congress}, CONFERENCE_PLACE = {Ljubljana, Slovenia}, CONFERENCE_DATE = {17-21/07/2018}, BOOKTITLE = {The XVIII EURALEX International Congress: Lexicography in Global Contexts-Book of Abstracts}, EDITOR = {Čibej, J. and Gorjanc, V. and Kosem, I. and Krek, S.}, } @INPROCEEDINGS{PIRRELLI_2018_INPROCEEDINGS_P_399032, AUTHOR = {Pirrelli, V.}, TITLE = {NLP-based assessment of reading efficiency in early grade children}, YEAR = {2018}, ABSTRACT = {Assessing reading skills is a laborious and time-consuming task, which requires monitoring a variety of interlocked abilities, ranging from accurate word rendering, reading fluency and lexical access, to linguistic comprehension, and interpretation, management and inference of complex events in working memory. No existing software, to our knowledge, is able to cover and integrate reading performance monitoring, instant feedback, personalised potentiation and intelligent decision support to teachers and speech therapists, assessment of response to intervention. NLP and ICT technologies can make such an ambitious platform an achievable target.}, KEYWORDS = {NLP-based methods, reading efficiency, early graders}, PAGES = {5-6}, URL = {http://dcl.bas.bg/clib/wp-content/uploads/2018/07/CLIB_2018_Proceedings_v2_final.pdf}, CONFERENCE_NAME = {Computational Linguistics in Bulgaria}, CONFERENCE_PLACE = {Sofia, Bulgaria}, CONFERENCE_DATE = {27-29/05/2018}, } @INPROCEEDINGS{PIRRELLI_2018_INPROCEEDINGS_PFMGSM_396353, AUTHOR = {Pirrelli, V. and Ferro, M. and Marzi, C. and Gagné, C. and Spalding, T. and Marelli, M.}, TITLE = {Processing compounds: what frequency (alone) cannot explain}, YEAR = {2018}, ABSTRACT = {Observed elevation in typing latency for the initial letter of the second constituent of an English compound, compared with the typing time of the final letter of the first constituent (Gagné \& Spalding 2016), suggests that both compounds ( snowball ) and pseudo-compounds ( carpet ) are decomposed but also that full form representations are available in the lexical store. To gain further insight into the lexical representations underlying typing, we used computational modelling. In particular, we used superpositional models of word memory, based on Self-Organising Recurrent Maps (TSOMs) (Ferro et al. 2016; Marzi et al. 2016), where both simple and compound words are processed (and stored) using the same pool of processing (and memory) resources, to model the elevation in typing time at the constituent boundary and the rate of typing. In addition, we also considered models based in the Compositional Distributional Semantics framework (CAOSS, Marelli et al. 2017), to simulate independent effects of semantic transparency on compound typing (Gagné \& Spalding 2016). Due to co-activation and competition between compounds and their constituent words in TSOMs, levels of activation of processing nodes per letter positions appear to reflect degrees of context-sensitive predictability: the higher the level, the more expected the letter in that position. In English compounds, activation levels appeared to exhibit a characteristically U-shaped pattern, with min values centred on the constituent boundary. A similar pattern was found for pseudo-compounds, which nonetheless present a less pronounced U-shaped pattern and a higher activation value at the morpheme boundary than compounds do. The difference is in line with the higher speed-up rate in typing pseudo-compounds than compounds reported in Gagné and Spalding (2016). TSOMs were trained on letter-based representations, so computer experiments could simulate peripheral effects of serial processing of compound structure before lexical access. To investigate post-lexical issues, we also tested computational models of generation of the meanings of novel compounds based on CAOSS, which proved to be able to account for well-established relational effects in compound processing (Gagné 2001; Gagné \& Shoben 1997) with an unsupervised data-driven framework (Marelli et al. 2017). We ran a mixed-effects regression analysis of the data in Gagné and Spalding (2016) using vector-semantics estimates and TSOM activation levels to predict typing time for the initial letter of the second constituent. There was a negative effect of TSOM letter activation levels: i.e. the more active a letter node is, the faster a subject is at typing the letter ( t =-2.7 p =.007). Also, there was a positive effect of CAOSS-based compositionality estimates: i.e. the more easily a compound's lexicalized meaning can be obtained through compositional operations on single constituent vectors, the slower participants were at typing the first letter of the second constituent ( t =2.4, p =.017). These results have interesting implications for an integrative computational architecture accounting for the whole range of experimental evidence reported by Gagné and Spalding (2016). In particular we will focus on evidence of a stronger competition (and longer typing time) in Transparent-Transparent and Transparent-Opaque compounds, vs. Opaque-Transparent compounds, which gives an indication of a non-trivial interaction between semantic compositionality and serial processing effects.}, KEYWORDS = {compound processing, Temporal Self-organizing Map, letter production latency, constituent boundary}, PAGES = {60-60}, URL = {https://mentallexicon2018.ca/}, CONFERENCE_NAME = {11th International Conference on the Mental Lexicon}, CONFERENCE_PLACE = {Edmonton (Canada)}, CONFERENCE_DATE = {25-28/09/2018}, } @INPROCEEDINGS{STEFANINI_2018_INPROCEEDINGS_SNM_385585, AUTHOR = {Stefanini, A. and Nicolosi, A. and Monachini, M.}, TITLE = {An experiment on the development of a digital edition for ancient Greek fragmentary poetry: A case study on Archilochus of Paros}, YEAR = {2018}, ABSTRACT = {This paper overviews ongoing experiments on a digital edition of Archilochus which is based on the readings, translations and comments by Nicolosi [1] and also integrates feedback and requirements from the Digital Classics community. The experiment encompasses a few fragments of the poet of Paros, so as to provide a mock-up of the prototype for evaluation by its intended end-users, in view of developing a fully fledged digital edition. The mock-up provides the philologist with a set of resources and tools that ease a critical appraisal of the text.}, KEYWORDS = {Digital methods in the humanities, Interfaces and user-friendly data presentation}, PAGES = {86-89}, URL = {http://amsacta.unibo.it/5997/1/AIUCD-2018-BoA-rev.pdf}, DOI = {10.6092/unibo/amsacta/5997}, ISBN = {9788894253528}, CONFERENCE_NAME = {Settimo Convegno Annuale AIUCD 2018}, CONFERENCE_PLACE = {Bari}, CONFERENCE_DATE = {31/01/2018-2/02/2018}, BOOKTITLE = {Settimo Convegno Annuale AIUCD 2018. Patrimoni culturali nell'era digitale. Memorie, culture umanistiche e tecnologia. Book of Abstracts}, EDITOR = {Daria, S.}, } @TECHREPORT{BARONI_2018_TECHREPORT_BQRSCGHKSS_483257, AUTHOR = {Baroni, P. and Quochi, V. and Russo, I. and Soria, C. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Salonen, T. and Sarhimaa, A.}, TITLE = {Kit per la sopravvivenza digitale della lingua sarda-Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua sarda}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua sarda (versione italiana)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Sardinian}, PAGES = {12}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Sardinian_IT.pdf}, } @TECHREPORT{CARLINO_2018_TECHREPORT_C_483693, AUTHOR = {Carlino, M.}, TITLE = {Rapporto annuale 2017 del CNR-ILC}, YEAR = {2018}, ABSTRACT = {Rapporto Annuale 2017 del Cnr-Istituto di Linguistica Computazionale "Antonio Zampolli" (CNR-ILC)}, KEYWORDS = {CNR-ILC, ILC, Annual Report, Rapporto Annuale, Istituto di Linguistica Computazionale, Zampolli, Activity report}, PAGES = {1-64}, URL = {https://publications.cnr.it/doc/483693}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443050, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Euskarak Mundu Digitalean Bizirauteko Kita-DLDPren gomendioak, euskararen bizitasun digitala hobetu dadin}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale della lingua basca (versione basca)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Basque}, PAGES = {27}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Basque_EU.pdf}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443051, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Kit de Supervivencia Lingüística Digital del Euskera-Recomendaciones del DLDP para mejorar la Vitalidad Digital del euskera}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del basco (versione spagnola)}, KEYWORDS = {digital diversity, digital vitality, recommendations, Basque, digital survival}, PAGES = {28}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Basque_ES.pdf}, } @TECHREPORT{CEBERIO_2018_TECHREPORT_CGBHKQRSSS_443020, AUTHOR = {Ceberio, B. K. and Gurrutxaga, H. A. and Baroni, P. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Digital Language Survival Kit}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale delle lingue (versione inglese integrale)}, KEYWORDS = {sopravvivenza digitale, lingue minoritarie, less-resourced languages}, PAGES = {38}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit.pdf}, } @TECHREPORT{CININI_2018_TECHREPORT_C_390082, AUTHOR = {Cinini, A.}, TITLE = {LigurArch900: Itinerari di architettura contemporanea in Liguria}, YEAR = {2018}, ABSTRACT = {Realizzazione di un prototipo di applicazione per sistema operativo Android per la divulgazione dei risultati di un Progetto di ricerca "Censimento e schedatura di complessi di architettura moderna e contemporanea in Liguria". L'attività finalizzata allo "Studio e realizzazione di moduli software per accedere, gestire ed estrarre informazioni sulle architetture del Novecento in Liguria", è stata svolta nell'ambito della collaborazione con il Dipartimento dell'Università di Architettura di Genova (DSA-UNIGE). L'applicazione affianca la consultazione delle architetture censite con visualizzazione su mappa, a quella dei contenuti descrittivi per le architetture di maggior rilievo.}, KEYWORDS = {Android, Mobile, Term extraction}, PAGES = {1-9}, URL = {http://dbtvm1.ilc.cnr.it/Download/app-release_20170318.zip}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_443047, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Roadmap}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione inglese integrale)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {19}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Roadmap.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483247, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {The DLDP Roadmap-Policy Recommendations & Timeline}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione inglese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_EN.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483251, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {DLDP etenemissuunnitelma-Toimenpidesuunnitelmat ja aikajana}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione finlandese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_FI.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483254, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {La DLDP Hoja de Ruta-Políticas recomendadas & Cronograma}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione spagnola sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_ES.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483255, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Diversità Linguistica Digitale: la Roadmap-Raccomandazioni strategiche & Sequenza}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione italiana sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_IT.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483256, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {DLDP Bide Orria-Gomendatutako politikak & Kronograma}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione basca sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_EU.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483262, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {Die DLDP Roadmap-Strategieempfehlungen & Zeitplan}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione tedesca sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_DE.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HBCGKQRSSS_483263, AUTHOR = {Hicks, D. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A. and Soria, C.}, TITLE = {La Roadmap DLDP-Recommandations de politique et calendrier}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP rivolte ai decisori politici (versione francese sintetica)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, PAGES = {6}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP-Roadmap_Short-Version_FR.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HSBCGKQRSS_443354, AUTHOR = {Hicks, D. and Soria, C. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A.}, TITLE = {Pak treuzveviñ ar Brezhoneg niverel-Erbedoù an DLDP evit gwellaat buhezegezh niverel ar brezhoneg}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del bretone (versione bretone)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Breton_BR.pdf}, } @TECHREPORT{HICKS_2018_TECHREPORT_HSBCGKQRSS_443359, AUTHOR = {Hicks, D. and Soria, C. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Kruse, E. and Quochi, V. and Russo, I. and Salonen, T. and Sarhimaa, A.}, TITLE = {Kit de survie numerique pour la langue bretonne-Les recommandations du DLDP pour améliorer la vitalité numérique du Breton}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale del bretone (versione francese)}, KEYWORDS = {digital vitality, digital diversity, recommendations}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Breton_FR.pdf}, } @TECHREPORT{SALONEN_2018_TECHREPORT_SBCGHKQRSS_443365, AUTHOR = {Salonen, T. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Sarhimaa, A. and Soria, C.}, TITLE = {Karjalan digitaalinen kielenselviytymispakkaus-DLDP-suositukset karjalan kielen digitaalisen elinvoimaisuuden parantamiseksi}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per la sopravvivenza digitale della lingua careliana (versione finlandese)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Karelian}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Karelian_FI.pdf}, } @TECHREPORT{SALONEN_2018_TECHREPORT_SBCGHKQRSS_483261, AUTHOR = {Salonen, T. and Baroni, P. and Ceberio, B. K. and Gurrutxaga, H. A. and Hicks, D. and Kruse, E. and Quochi, V. and Russo, I. and Sarhimaa, A. and Soria, C.}, TITLE = {Karjalan digitualine hengihjiämispakkavus-DLDP-rekomendatsiet karjalan kielen digitualizen elinvoimazuon kohendamizeh}, YEAR = {2018}, ABSTRACT = {Le raccomandazioni del progetto DLDP per migliorare la vitalità digitale della lingua careliana (versione careliana)}, KEYWORDS = {digital vitality, digital diversity, digital language survival, recommendations, Karelian}, PAGES = {12}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Survival-Kit-for-Karelian_KRL.pdf}, } @THESIS{NAHLI_2018_THESIS_N_390506, AUTHOR = {Nahli, O.}, TITLE = {Vers une ontologie de la culture arabo-musulmane}, YEAR = {2018}, ABSTRACT = {Le projet vise à décrire les méthodologies permettant de développer un réseau de connaissance pour la culture arabo-islamique sur la base d'un processus d'extractions automatiques de données à partir du lexique arabe al=qamus al=muHiT (qamus). Le choix de qamus est justifié par le fait qu'il a un statut d'autorité dans le monde arabe, au point que la parole qamus [océan] a supplanté la parole mungid 'dictionnaire'. Le projet prévoit divers étapes de travail et, avant tout, l'acquisition d'une version numérique de qamus. La mise au point d'algorithmes pour la codification partielle et automatique de la macrostructure lexicale et la conversion du lexique en format XML. D'autres algorithmes permettent l'identification de la microstructure lexicale et, l'annotation de chaque partie constituante de l'entrée lexicale, entre autres, le lemme, sa nature morphologique, ses définitions, etc. En utilisant deux dictionnaires bilingues arabe-anglais, un système de recherche permet de trouver, de manière automatique et quand c'est possible, la traduction de chaque lemme, ce qui permet de le lier au synset correspondant dans PWN et au concept de SUMO à qui il pourrait faire référence. Une autre étape serait l'analyse de divers échantillons de lemmes pour détecter la validité des résultats.}, KEYWORDS = {al qamus al muHiyT, ontologie, langue arabe, Wordnet, PWN, SUMO (The Suggested Upper Merged Ontology)}, PAGES = {317}, URL = {https://publications.cnr.it/doc/390506}, } @MISC{BOSCHETTI_2018_MISC_BD_390656, AUTHOR = {Boschetti, F. and Del Grosso, A. M.}, TITLE = {Euporia: Piattaforma digitale per l'annotazione tramite Domain Specific Languages di testi multilingui disposti in parallelo}, YEAR = {2018}, ABSTRACT = {Piattaforma digitale per l'annotazione tramite Domain Specific Languages di testi multilingui disposti in parallelo}, KEYWORDS = {digital humanities, computational philology, digital philology}, URL = {https://github.com/CoPhi/euporia}, } @MISC{CEBERIO_2018_MISC_CGSRQ_440548, AUTHOR = {Ceberio, K. and Gurrutxaga, A. and Soria, C. and Russo, I. and Quochi, V.}, TITLE = {How to Use the Digital Language Vitality Scale}, YEAR = {2018}, ABSTRACT = {The Digital Language Vitality Scale is an instrument developed within the framework of the Digital Language Diversity Project (www.dldp.eu) for estimating the degree of digital vitality of any given language. It aims to be an instrument for self-assessment of the digital vitality of any language, although it is aimed in particular at identifying current gaps, needs and requirements regarding the extent to which a language community is active/vital on digital media and devices so that adequate digital language planning can be done. This document instructs prospective adopters on how to best use it.}, KEYWORDS = {Diversità Linguistica, BLARK, Sopravvivenza linguistica digitale}, PAGES = {18}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Digital-Language-Vitality-Scale.pdf}, } @MISC{CRISTOFARO_2018_MISC_CDS_445805, AUTHOR = {Cristofaro, S. and Del Grosso, A. M. and Spampinato, D.}, TITLE = {Chiosco Voci di Pietra}, YEAR = {2018}, ABSTRACT = {Il chiosco multimediale Voci di Pietra è stato installato come postazione in locale all'interno del percorso museale in occasione della omonima mostra. Il totem touch screen permette la navigazione tra le informazioni raccolte nelle schede, nei video e nelle immagini digitalizzate, sia in italiano che in inglese, per favorire l'approfondimento della visita.}, KEYWORDS = {Museum, Epigraphy, EpiDoc, TEI}, URL = {https://publications.cnr.it/doc/445805}, } @MISC{DELGROSSO_2018_MISC_DM_484667, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Edizione digitale del Rotulo di San Teobaldo}, YEAR = {2018}, ABSTRACT = {Applicazione web per la consultazione dell'edizione digitale del Rotulo di San Teobaldo proprietà della Diocesi di Alba.}, KEYWORDS = {digital philology, web application, evt, enhanced visualization}, URL = {https://www.visitmudi.it/rotulo-di-san-teobaldo/}, } @MISC{DELGROSSO_2018_MISC_DM_484669, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Bellininrete Web Application}, YEAR = {2018}, ABSTRACT = {Applicazione web sviluppata in seno al progetto Bellininrete per lo studio e la consultazione della corrispondenza del maestro catanese Vincenzo Bellini.}, KEYWORDS = {Digital Edition, Digital Scholarly Platform, web application}, URL = {http://bellinicorrespondence.cnr.it/evt}, } @MISC{DELGROSSO_2018_MISC_DMA_390394, AUTHOR = {Del Grosso, A. M. and Marchi, S. and Albanesi, D.}, TITLE = {Omega Project: Omega: Piattaforma Multi-modulare per lo studio scientifico del testo}, YEAR = {2018}, ABSTRACT = {Piattaforma per lo studio del testo con prospettiva scientifico-filologica.}, KEYWORDS = {digital humanities, computational philology, software engineering}, URL = {https://github.com/literarycomputinglab/OmegaProject}, } @ARTICLE{BARTOLINI_2017_ARTICLE_BPGGB_369103, AUTHOR = {Bartolini, R. and Pardelli, G. and Goggi, S. and Giannini, S. and Biagioni, S.}, TITLE = {A terminological "journey" in the Grey Literature domain}, YEAR = {2017}, ABSTRACT = {"It is by means of terms that the expert usually transfer their knowledge and again through terms scientific communication reaches the highest effectiveness. Therefore we can assert that terminology - in the sense of a set of representative and domain-specific units - is necessary for representing and connecting specialized fields as well as any attempt to represent and/or transfer scientific knowledge requires, more or less extensively, the use of terminology." (Cabré, 2000). "When we read the articles or papers of a particular domain, we can recognize some lexical items in the texts as technical terms. In a domain where new knowledge is generated, new terms are constantly created to fulfill the needs of the domain, while others become obsolete. In addition, existing terms may undergo changes of meaning..." (Kageura K., 1998/1999). Specialized lexicons are made up of the terms which are specific to each field of knowledge, «a subset which is distinct but not separated from the common language» (Cassese, 1992): it is usually difficult to extract the relevant domain-specific terminology, meaning to discern terms which belong to a specialized glossary from those belonging to the common dictionary. The interest in the study of terminology and the "truth" contained in the above definitions has led us to make a "journey" in the Grey Literature (GL) domain in order to offer an overall vision on the terms used and the links between them. Within this scenario, the work analyzes a corpus constituted of the entire amount of full research papers published in the GL conference series over a time-span of more than one decade (2003-2014) with the aim of creating a terminological map of relevant words in the various GL research topics. "... corpora used to extract terminological units can be further investigated to find semantic and conceptual information on terms or to represent conceptual relationships between terms. (Bourigault D. et al., 2001). Another interesting inquiry is the terminology used in the GL conferences for describing the types of documents which can be detected (Pej?ová P. et al., 2012).}, KEYWORDS = {Grey Literature, Information Extraction IE, Terminology}, PAGES = {41-53}, URL = {http://www.greynet.org/thegreyjournal/currentissue.html}, VOLUME = {13}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{BIZZONI_2017_ARTICLE_BRD_382152, AUTHOR = {Bizzoni, Y. and Reboul, M. and Del Grosso, A.}, TITLE = {Diachronic trends in Homeric translations}, YEAR = {2017}, ABSTRACT = {In this paper we intend to present a tool we developed for translation studies and diachronically compare various French translations of the Odyssey.}, KEYWORDS = {Digital Humanities, Diachronic translations, Software design and engineering, Natural Language Processing}, PAGES = {26}, URL = {http://www.digitalhumanities.org/dhq/vol/11/2/000297/000297.html}, VOLUME = {11}, PUBLISHER = {Alliance of Digital Humanities Organizations ([Providence, RI?], Stati Uniti d'America)}, ISSN = {1938-4122}, JOURNAL = {Digital humanities quarterly}, } @ARTICLE{BOMPOLAS_2017_ARTICLE_BFMCP_380237, AUTHOR = {Bompolas, S. and Ferro, M. and Marzi, C. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {For a performance-oriented notion of regularity in inflection: the case of Modern Greek conjugation}, YEAR = {2017}, ABSTRACT = {Paradigm-based approaches to word processing/learning assume that word forms are not acquired in isolation, but through associative relations linking members of the same word family (e.g. a paradigm, or a set of forms filling the same paradigm cell). Principles of correlative learning offer a set of equations that are key to modelling this complex dynamic at a considerable level of detail. We use these equations to simulate acquisition of Modern Greek conjugation, and we compare the results with evidence from German and Italian. Simulations show that different Greek verb classes are processed and acquired differentially, as a function of their degrees of formal transparency and predictability. We relate these results to psycholinguistic evidence of Modern Greek word processing, and interpret our findings as supporting a view of the mental lexicon as an emergent integrative system.}, KEYWORDS = {paradigm-based morphology, gradient (ir)regularity, recurrent self-organisng networks}, PAGES = {77-92}, URL = {http://www.ai-lc.it/IJCoL/v3n1/IJCOL_3_1_5_bompolas_et_al.pdf?v=2a47ad90f2ae}, VOLUME = {3}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{CONNOR_2017_ARTICLE_CCVR_363052, AUTHOR = {Connor, R. and Cardillo, F. A. and Vadicamo, L. and Rabitti, F.}, TITLE = {Hilbert exclusion: improved metric search through finite isometric embeddings}, YEAR = {2017}, ABSTRACT = {Most research into similarity search in metric spaces relies on the triangle inequality property. This property allows the space to be arranged according to relative distances to avoid searching some subspaces. We show that many common metric spaces, notably including those using Euclidean and Jensen-Shannon distances, also have a stronger property, sometimes called the four-point property: In essence, these spaces allow an isometric embedding of any four points in three-dimensional Euclidean space, as well as any three points in two-dimensional Euclidean space. In fact, we show that any space that is isometrically embeddable in Hilbert space has the stronger property. This property gives stronger geometric guarantees, and one in particular, which we name the Hilbert Exclusion property, allows any indexing mechanism which uses hyperplane partitioning to perform better. One outcome of this observation is that a number of state-of-the-art indexing mechanisms over high-dimensional spaces can be easily refined to give a significant increase in performance; furthermore, the improvement given is greater in higher dimensions. This therefore leads to a significant improvement in the cost of metric search in these spaces.}, KEYWORDS = {Similarity search, Metric space, Metric indexing, Four-point property, Hilbert embedding, H. Information systems. Data structures, H. Information systems. Multidimensional range search, H. Information systems. Proximity search, H. Information systems. Database query processing, H. Information systems. Retrieval models and ranking, Information systems. Retrieval efficiency, H. Information systems. Multimedia information systems, F. Theory of computation. Random projections and metric embeddings}, PAGES = {17-27}, URL = {http://doi.acm.org/10.1145/3001583}, VOLUME = {35}, DOI = {10.1145/3001583}, PUBLISHER = {Association for Computing Machinery (New York, NY, Stati Uniti d'America)}, ISSN = {1046-8188}, JOURNAL = {ACM transactions on information systems}, } @ARTICLE{FERRARI_2017_ARTICLE_FDEGG_382166, AUTHOR = {Ferrari, A. and Dell'Orletta, F. and Esuli, A. and Gervasi, V. and Gnesi, S.}, TITLE = {Natural language requirements processing: a 4D vision}, YEAR = {2017}, ABSTRACT = {Natural language processing (NLP) and requirements engineering (RE) have had a long relationship, yet their combined use isn't well established in industrial practice. This situation should soon change. The future evolution of the application of NLP technologies in RE can be viewed from four dimensions: discipline, dynamism, domain knowledge, and datasets.}, KEYWORDS = {Natural Language Processing, Requirement Processing}, PAGES = {28-35}, URL = {http://ieeexplore.ieee.org/abstract/document/8106888/}, VOLUME = {34}, DOI = {10.1109/MS.2017.4121207}, PUBLISHER = {IEEE Computer Society ([Los Alamitos, CA, Stati Uniti d'America)}, ISSN = {0740-7459}, JOURNAL = {IEEE software}, } @ARTICLE{GIANNINI_2017_ARTICLE_GBGP_369104, AUTHOR = {Giannini, S. and Biagioni, S. and Goggi, S. and Pardelli, G.}, TITLE = {Grey Literature Citations in the age of Digital Repositories and Open Access}, YEAR = {2017}, ABSTRACT = {The work measures grey citations in the years 2012, 2013 and 2014 and then describes the features of GL documents cited in different areas of knowledge: Computational Linguistics, Computer Science and Engineering. With the aim of surveying a wide and varied range of resources, we selected a sample data based on the bibliographical references of articles contained in four journals - all indexed by Scopus Citation Database and ISI Web of Science, with an Impact Factor (IF) over the last three years - and two proceedings of international conferences held in 2012 and 2014.}, KEYWORDS = {Grey Literature, Citations}, PAGES = {23-31}, URL = {http://www.greynet.org/thegreyjournal/currentissue.html}, VOLUME = {13}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{GIOVANNETTI_2017_ARTICLE_GABB_364947, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Benotto, G.}, TITLE = {Traduco: A collaborative web-based CAT environment for the interpretation and translation of texts}, YEAR = {2017}, ABSTRACT = {Traduco is a web-based collaborative tool aimed at supporting the translation of texts that pose particular challenging interpretative issues. Nowadays, Computer-Assisted Translation (CAT) tools are mainly applied to the translation of technical manuals or legislative texts and are aimed at speeding up the translation process. Traduco extends most of the standard components of a traditional CAT tool with specific features necessary to support the interpretation and translation of complex texts (like the Babylonian Talmud, that we here present as a case study), which pose particular comprehension issues. Traduco goes beyond the translation and its printing: it includes features for the addition of notes and annotations and the creation of glossaries. Translators, editors, supervisors, and end-users accessing Traduco are able to use components that can ease the translation process through the use of CAT technologies, the supervision and managing of the whole process of translation and publishing, the exporting of translations and notes in standard formats for desktop publishing software and TEI format, and, soon, the possibility to perform automatic linguistic analysis of the text. Moreover, Traduco allows the users to insert notes, comments, annotations, and bibliographical references. The design and development of Traduco required the adoption of a multidisciplinary approach, leveraging on advances in software engineering, computational linguistics, knowledge engineering, and publishing.}, KEYWORDS = {computer-assisted translation, talmud, progetto traduzione del talmud babilonese}, PAGES = {47-62}, URL = {http://dsh.oxfordjournals.org/content/early/2016/10/26/llc.fqw054}, VOLUME = {32}, DOI = {10.1093/llc/fqw054}, PUBLISHER = {Oxford University Press (Oxford, UK, Regno Unito)}, ISSN = {2055-7671}, JOURNAL = {Digital Scholarship in the Humanities}, } @ARTICLE{MARZI_2017_ARTICLE_MFN_363116, AUTHOR = {Marzi, C. and Ferro, M. and Nahli, O.}, TITLE = {Arabic word processing and morphology induction through adaptive memory self-organisation strategies}, YEAR = {2017}, ABSTRACT = {Aim of the present study is to model the human mental lexicon, by focussing on storage and processing dynamics, as lexical organisation relies on the process of input recoding and adaptive strategies for long-term memory organisation. A fundamental issue in word processing is represented by the emergence of the morphological organisation level in the lexicon, based on paradigmatic relations between fully-stored word forms. Morphology induction can be defined as the task of perceiving and identifying morphological formatives within morphologically complex word forms, as a function of the dynamic interaction between lexical representations and distribution and degrees of regularity in lexical data. In the computational framework we propose here (TSOMs), based on Self-Organising Maps with Hebbian connections defined over a temporal layer, the identification/perception of surface morphological relations involves the alignment of recoded representations of morphologically-related input words. Facing a non-concatenative morphology such as the Arabic inflectional system prompts a reappraisal of morphology induction through adaptive organisation strategies, which affect both lexical representations and long-term storage. We will show how a strongly adaptive self-organisation during training is conducive to emergent relations between word forms, which are concurrently, redundantly and competitively stored in human mental lexicon, and to generalising knowledge of stored words to unknown forms.}, KEYWORDS = {Non-concatenative morphological structure, Lexical storage and access, Topological alignment, Synchronisation, Self-Organising Maps}, PAGES = {179-188}, URL = {http://www.sciencedirect.com/science/article/pii/S1319157816301148}, VOLUME = {29}, DOI = {10.1016/j.jksuci.2016.11.006}, PUBLISHER = {Elsevier (Amsterdam, Paesi Bassi)}, ISSN = {2213-1248}, JOURNAL = {Journal of King Saud University. Computer and information sciences (Online)}, } @ARTICLE{ROBERTSON_2017_ARTICLE_RB_381705, AUTHOR = {Robertson, B. and Boschetti, F.}, TITLE = {Large-Scale Optical Character Recognition of Ancient Greek}, YEAR = {2017}, ABSTRACT = {This paper documents our campaign to undertake the large-scale optical character recognition of ancient, or polytonic, Greek. Building upon the Gamera OCR engine and developing a suite of post-processing tools, including automatic spellcheck, we processed 1,200 volumes comprising 329,002,271 Greek words. A sample of 10 pages is studied in detail; they demonstrate the degree to which each step of post-processing improved the results, and with which source documents. These pages attain an average character accuracy of about 96%. These results will provide a basis for further improvements, including the training of other open-source OCR engines.}, KEYWORDS = {OCR, Ancient Greek}, PAGES = {341-359}, URL = {https://doi.org/10.3138/mous.14.3-3}, VOLUME = {14 (III series)}, PUBLISHER = {University of Calgary Press, for the Classical Association of Canada (Calgary, Canada)}, ISSN = {1496-9343}, JOURNAL = {Mouseion (Calg.)}, } @ARTICLE{VENTURI_2017_ARTICLE_VDMFB_382249, AUTHOR = {Venturi, G. and Dell'Orletta, F. and Montemagni, S. and Flore, E. and Bellandi, T.}, TITLE = {La qualità dei consensi informati. Un'analisi linguistico-computazionale della leggibilità dei testi}, YEAR = {2017}, ABSTRACT = {La leggibilità dei testi delle informative di consenso per le procedure diagnostico-terapeutiche è un requisito fondamentale, per offrire alle persone assistite l'accesso alle informazioni necessarie a una scelta consapevole delle opzioni disponibili per curare i diversi problemi di salute. La disponibilità di un testo leggibile è inoltre un aiuto per i medici responsabili della comunicazione e della raccolta del consenso, che possono impiegarlo come un ausilio alle informazioni presentate in forma verbale durante il colloquio, in modo tale da poter condividere una base di conoscenze minime da condividere con il paziente e i suoi familiari. Seppure le evidenze siano limitate in merito alla relazione tra la qualità del consenso e l'attitudine al contenzioso da parte dei pazienti in caso di trattamenti che esitano in un danno attribuibile alle cure (Durand et al., 2015), si tratta di un ambito di ricerca di crescente interesse nella letteratura sulla sicurezza (Wu et al., 2005; Manta et al., 2017). Nella casistica regionale della Toscana sulle richieste di risarcimento, solo l'1% dei sinistri include problemi di consenso informato (dati Centro GRC), probabilmente anche a causa di una sottovalutazione del diritto all'informazione da parte dei cittadini che si sottopongono a interventi programmati, connessa con una limitata consapevolezza del potere di scegliere le proprie cure che ogni persona dovrebbe poter esercitare posta di fronte alle opzioni terapeutiche disponibili per i propri problemi di salute.}, KEYWORDS = {Consenso informato, valutazione automatica della leggibilità, Trattamento Automatico del Linguaggio}, PAGES = {35-39}, URL = {http://www.formas.toscana.it/rivistadellasalute/fileadmin/files/fascicoli/2017/212/SeT_fascicolo_212.pdf}, VOLUME = {212}, PUBLISHER = {ETS (Pisa, Italia)}, ISSN = {0392-4505}, JOURNAL = {Salute e territorio}, } @INCOLLECTION{BOSCHETTI_2017_INCOLLECTION_BDD_382022, AUTHOR = {Boschetti, F. and Del Gratta, R. and Del Grosso, A. M.}, TITLE = {The role of digital scholarly editors in the design of components for cooperative philology}, YEAR = {2017}, ABSTRACT = {This contribution is focused on the role of the digital scholarly editor in the continuous process of analysis, development and evaluation of libraries of components for cooperative philology.}, KEYWORDS = {digital editions, collaborative and cooperative philology, digital philology}, PAGES = {249-253}, URL = {https://www.sidestone.com/books/advances-in-digital-scholarly-editing}, ISBN = {978-90-8890-484-4}, BOOKTITLE = {Advances in Digital Scholarly Editing}, EDITOR = {Boot, P. and Cappellotto, A. and Dillen, W. and Fischer, F. and Kelly, A. and Mertgens, A. and Sichani, A. and Spadini, E. and Van Hulle, D.}, } @INCOLLECTION{COPPOLA_2017_INCOLLECTION_CMRT_371344, AUTHOR = {Coppola, D. and Moretti, R. and Russo, I. and Tranchida, F.}, TITLE = {In quante lingue mangi? Tecniche glottodidattiche e language testing in classi plurilingui e ad abilità differenziata}, YEAR = {2017}, ABSTRACT = {La diversità linguistica e culturale costituisce oggi una caratteristica struttura-le delle nostre scuole. La necessità, caldeggiata dai principali documenti nazio-nali ed europei sull'educazione, di considerare le lingue "immigrate", assieme a tutte le altre presenti in classe, come un'importante risorsa linguistica e cultura-le per gli alunni, impone la ricerca di metodologie d'insegnamento idonee a va-lorizzare l'intero repertorio linguistico dei ragazzi e di prove di verifica atte a testare in modo più adeguato le loro competenze linguistico-comunicative, senza trascurare gli importanti aspetti culturali veicolati dalle diverse lingue. Il presente contributo riporta i primi dati di un'indagine che si colloca nell'ambito di una più ampia sperimentazione, avviata nel gennaio 2015, in al-cune classi del quinto anno della scuola primaria e del primo anno della secon-daria di primo grado della provincia di Firenze, con l'obiettivo di implementare tecniche glottodidattiche cooperative supportate dalla tecnologia in classi pluri-lingui e ad abilità differenziata.}, KEYWORDS = {language testing, glottodidattica, plurilinguismo}, PAGES = {199-231}, URL = {https://publications.cnr.it/doc/371344}, PUBLISHER = {Pisa University Press (Pisa, ITA)}, ISBN = {978-88-6741-789-6}, BOOKTITLE = {Strutture linguistiche e dati empirici in diacronia e sincronia}, EDITOR = {Marotta, G. and Lievers, F. S.}, } @INCOLLECTION{DELGRATTA_2017_INCOLLECTION_D_483914, AUTHOR = {Del Gratta, R.}, TITLE = {WordNets per lingue classiche}, YEAR = {2017}, ABSTRACT = {The Wordnet for Ancient Greek (AGWN) is presented and illustrated both as a stand-alone semantic net and as a participant in a more complex net of wordnets for historical and modern languages. Two applications of AGWN carried out within the Memorata Poetis project are described: in the first application, the AGWN is used to investigate multilingual synonyms; in the second one, AGWN is used to extract Greek synonyms in order to classify epigrams in terms of similar content.}, KEYWORDS = {WordNet, Perseus project, Ancient Greek Wordnet, oNLP, Classical Languages}, PAGES = {117-122}, URL = {https://publications.cnr.it/doc/483914}, VOLUME = {14/3}, DOI = {10.14277/6969-182-9/ANT-14-9}, PUBLISHER = {Edizioni Ca' Foscari (Venezia, ITA)}, ISBN = {978-88-6969-183-6}, BOOKTITLE = {Strumenti digitali e collaborativi per le Scienze dell'antichità}, EDITOR = {Mastandrea, P.}, } @INCOLLECTION{MANZELLA_2017_INCOLLECTION_MBBDDFMMMNS_368363, AUTHOR = {Manzella, G. M. R. and Bartolini, R. and Bustaffa, F. and D'Angelo, P. and De Mattei, M. and Frontini, F. and Maltese, M. and Medone, D. and Monachini, M. and Novellino, A. and Spada, A.}, TITLE = {Semantic Search Engine for Data Management and Sustainable Development: Marine Planning Service Platform}, YEAR = {2017}, ABSTRACT = {This chapter presents a computer platform supporting a Marine Information and Knowledge System based on a repository that gathers, classify and structures marine scientific literature and data, guaranteeing their accessibility by means of standard protocols. This requires the access to quality controlled data and to information that is provided in grey literature and/or in relevant scientific literature. There exist efforts to develop search engines to find author's contributions to scientific literature or publications. This implies the use of persistent identifiers. However very few efforts are dedicated to link publications to data that was used, or cited in them or that can be of importance for the published studies. Full-text technologies are often unsuccessful since they assume the presence of specific keywords in the text; to fix this problem,it is suggested to use different semantic technologies for retrieving the text and data and thus getting much more complying results.}, KEYWORDS = {Marine Information and Knowledge System}, PAGES = {127-154}, URL = {http://www.igi-global.com/chapter/semantic-search-engine-for-data-management-and-sustainable-development/166839#}, VOLUME = {Volume 7}, DOI = {10.4018/978-1-5225-0700-0.ch006}, PUBLISHER = {IGI Global (Hershey, USA)}, BOOKTITLE = {Oceanographic and Marine Cross-Domain Data Management for Sustainable Development}, EDITOR = {Diviacco, P. and Leadbetter, A. and Glaves, H.}, } @INCOLLECTION{MARCONI_2017_INCOLLECTION_MMC_406349, AUTHOR = {Marconi, L. and Miyares, L. R. and Cutugno, P.}, TITLE = {Características distributivas del español de Cuba a partir del análisis de dos estudios lingüísticos}, YEAR = {2017}, ABSTRACT = {El objetivo de este artículo es realizar un estudio sobre las características distributivas de un subconjunto del español de Cuba; se refiere a la información recogida en el Diccionario Ortográfico del Español -con la exclusión de las lexías complejas y de las locuciones-, o sea de los 7927 lemas que han sido generados a partir de las 93759 palabras flexionadas con sus categorías gramaticales; también se analizarán los datos del Léxico Activo-Funcional del Escolar Cubano para hacer un mapeo de las palabras utilizadas por los escolares. El artículo contiene también una serie de comparaciones de algunas propiedades distributivas derivadas del tratamiento de los datos del DOE con los resultados obtenidos mediante el análisis del Léxico Activo-Funcional del Escolar Cubano. Cuando se habla de propiedades distributivas de una lengua, estas pueden ser consideradas en relación con los lemas, con las diversas palabras flexionadas o con el número de apariciones en un corpus específico. En este artículo nos referiremos a una muestra de lemas y flexiones derivada del Diccionario Ortográfico del Español y solamente a las flexiones del Léxico Activo-Funcional del Escolar Cubano.}, KEYWORDS = {Léxico, escolares, Cuba, propiedades distributivas}, PAGES = {87-111}, URL = {https://publications.cnr.it/doc/406349}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {978-959-7174-34-9}, BOOKTITLE = {Estudios de Lexicología y Lexicografía Homenaje a Eloína Miyares Bermúdez}, EDITOR = {Miyares, L. R.}, } @INCOLLECTION{MONTEMAGNI_2017_INCOLLECTION_MW_367892, AUTHOR = {Montemagni, S. and Wieling, M.}, TITLE = {Exploring the role of extra-linguistic factors in defining dialectal variation patterns through cluster comparison}, YEAR = {2017}, ABSTRACT = {This paper contributes to two open issues in the dialectometric literature, i.e. i) whether and how patterns of linguistic variation are influenced by extra-linguistc features such as the geomorphology of the area, or cultural, administrative and political boundaries, and ii) whether and how the influence of extra-linguistic factors remains stable across linguistically-grounded partitions of data. To investigate these issues, a case study focusing on lexical variation has been carried out on a regional lexical atlas of Tuscan dialects. A variety of extra-linguistic features was taken into account, whose impact and role has been evaluated with respect to both the whole dialectal dataset and across different semantic fields.}, KEYWORDS = {dialectometric literature, dialectology, linguistic variation, dialect, Tuscan, lexical atlas}, PAGES = {241-251}, URL = {http://www.let.rug.nl/festschriftnerbonne/25.%20Montemagni%20\&%20Wieling.pdf}, VOLUME = {Tributes 32}, BOOKTITLE = {From Semantics to Dialectometry. Festschrift in honor of John Nerbonne}, EDITOR = {Wieling, M. and Kroon, M. and Van Noord, G. and Bouma, G.}, } @INCOLLECTION{SASSI_2017_INCOLLECTION_S_382198, AUTHOR = {Sassi, M.}, TITLE = {Cuestiones pertinentes e impertinentes de los Diccionarios Temáticos}, YEAR = {2017}, ABSTRACT = {The Dictionaries in general, and in particular the thematic ones, have represented the thread of my career in the Istituto di Linguistica Computazionale of Pisa from and before its origins (in the years 1965-1978 it was called Divisione Linguistica of the CNUCE and later ILC- CNR). In the '60 -'70 we worked on the DMI (Italian Machine Dictionary) under the auspices of the Italian Parliament. In the 1980s, the first studies of dictionaries organized in lexical fields began, starting from the intuitions of Julio Casares and the same principles were applied to Italian. From these distant experiences, over 50 years, different studies, applications, corpora and databases have been developed in relation to several disciplines: Literature, Philology, Law, Justice, Administration, Tourism, Medicine, etc. There have been compilations of several Authors in Spanish language: Bolivar, Carpentier, Cervantes, Encina, Machado, Marquez, Neruda, Salinas, Teresa de Avila, Unamuno, Vallejo, that will be described in this presentation, with examples of online search with the DBT-Web interface. It is also discussed to preserve this data for the future through its maintenance for consultation on the network.}, KEYWORDS = {Computational Linguistics, Thematic Dictionaries, Corpora, Preservation and Reuse of data, Historical overview of Text Processing}, PAGES = {37-49}, URL = {https://publications.cnr.it/doc/382198}, VOLUME = {VII}, ISBN = {978-84-617-4512-8}, BOOKTITLE = {El diccionario en la encrucijada: de la sintaxis y la cultura al desafío digital}, EDITOR = {López, S. and Cuadrado, I. G. and Escribano, J. G. and Cecilio}, } @INCOLLECTION{SORIA_2017_INCOLLECTION_S_382104, AUTHOR = {Soria, C.}, TITLE = {What is Digital Language Diversity and why should we care?}, YEAR = {2017}, ABSTRACT = {The relationship between language and the Internet is a growing area of policy interest and academic study, see for instance (MAAYA 2012), (Paolillo et al. 2005), (Pimienta 2001), (Kornai 2013), (Pimienta et al. 2009), (Rehm and Uszkoreit 2012). The emerging picture is one where language profoundly affects a person's experience of the Internet. It determines how much - if any - information you can access on Wikipedia. It orients a person's choices and decisions by shaping the results of a search engine, depending on the language used. It determines the range of services that can be available over the Internet, and therefore the amount of everyday tasks (such as buying a ticket, reviewing opinions about hotel and restaurants, purchasing books or other goods, etc.) that can be carried out virtually. Far from infinite, the Internet, it seems, is only as big as one's language. Should this hold true, it would be at odds with the original spirit of the Internet, which - according to the words of Tim Berners-Lee - would be a place "to cross barriers and connect cultures". But it is safe to argue that the extent to which a language can be used over the Internet not only affects a person's experience and choice of opportunities; it also affect the language itself. If a language is poorly or not supported to be used over digital devices, for instance if the keyboard of the PC is not equipped with the characters and diacritics necessary to write in the language, or if there is no spell checker for a language, then its usability becomes severely affected, and it might will never be used online. The language could become "digitally endangered", and its value and profile could be lessened, especially in the eyes of the new generations. These considerations call for closer examination of a number of related issues. First, the "digital language diversity", i.e. the linguistic diversity of the Internet. Second, it is important to reflect on the conditions that make it possible for a language to be used over digital devices, and about what can be done in order to grant this possibility to languages other than so-called "major" ones.}, KEYWORDS = {digital language diversity}, PAGES = {13-28}, URL = {http://www.linguapax.org/wp-content/uploads/2015/03/LinguapaxReview2016web.pdf}, } @EDITORIAL{DISEGNI_2017_EDITORIAL_D_383158, AUTHOR = {Di Segni, D. G.}, TITLE = {Talmud Babilonese - Trattato Berakhòt}, YEAR = {2017}, ABSTRACT = {Curatela della traduzione italiana commentata del trattato del Talmud Babilonese "Berakhòt" in due volumi}, KEYWORDS = {Talmud, Traduco, Linguistica computazionale}, PAGES = {932}, URL = {https://publications.cnr.it/doc/383158}, VOLUME = {1*, 1**}, PUBLISHER = {Giuntina (Firenze, ITA)}, ISBN = {978-88-8057-668-6}, } @EDITORIAL{PRETORIUS_2017_EDITORIAL_PS_382299, AUTHOR = {Pretorius, L. and Soria, C.}, TITLE = {Language Resources and Evaluation. Special Issue: Collaboration and Computing for Under-resourced Languages}, YEAR = {2017}, ABSTRACT = {Special issue of the journal "Language Resources and Evaluation", dedicated to under-resourced languages}, KEYWORDS = {under-resourced languages, language resources, minority languages, endangered languages, small languages}, PAGES = {891-1084}, URL = {https://link.springer.com/journal/10579/51/4/page/1}, VOLUME = {51}, DOI = {10.1007/s10579-017-9405-8}, PUBLISHER = {Springer (Berlin, DEU)}, } @EDITORIAL{SORIA_2017_EDITORIAL_SRQ_382301, AUTHOR = {Soria, C. and Russo, I. and Quochi, V.}, TITLE = {Reports on Digital Language Diversity in Europe}, YEAR = {2017}, ABSTRACT = {In these reports we present the results of the first survey about the actual needs of European minority languages speakers in terms of digital opportunities}, KEYWORDS = {regional languahges, minority languages, digital vitality, digital use}, URL = {http://www.dldp.eu/content/reports-digital-language-diversity-europe}, } @EDITORIAL{BRANCO_2017_EDITORIAL_BCVIC_383337, AUTHOR = {Branco, A. and Cohen, K. B. and Vossen, P. and Ide, N. and Calzolari, N.}, TITLE = {Replicability and reproducibility of research results for human language technology: introducing an LRE special section}, YEAR = {2017}, KEYWORDS = {Human Language Technology (HLT), Language Resources (LR)}, PAGES = {1-5}, URL = {https://link.springer.com/article/10.1007/s10579-017-9380-0}, VOLUME = {51}, DOI = {10.1007/s10579-017-9380-0}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, BOOKTITLE = {Language resources and evaluation (Print)}, } @EDITORIAL{PIRRELLI_2017_EDITORIAL_PZ_381161, AUTHOR = {Pirrelli, V. and Zarghili, A.}, TITLE = {Arabic Natural Language Processing: Models, systems and applications}, YEAR = {2017}, KEYWORDS = {Natural Language Processing, Standard Modern Arabic}, PAGES = {A1-A3}, URL = {https://www.sciencedirect.com/science/article/pii/S1319157817301155}, VOLUME = {29}, DOI = {10.1016/j.jksuci.2017.04.004}, PUBLISHER = {Elsevier (Amsterdam, Paesi Bassi)}, ISSN = {2213-1248}, BOOKTITLE = {Journal of King Saud University. Computer and information sciences (Online)}, } @EDITORIAL{PRETORIUS_2017_EDITORIAL_PS_382062, AUTHOR = {Pretorius, L. and Soria, C.}, TITLE = {Introduction to the Special Issue}, YEAR = {2017}, KEYWORDS = {language resources, under-resourced languages, minority languages}, PAGES = {891-895}, URL = {https://link.springer.com/article/10.1007%2Fs10579-017-9405-8}, VOLUME = {51}, DOI = {10.1007/s10579-017-9405-8}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, BOOKTITLE = {Language resources and evaluation (Print)}, } @INPROCEEDINGS{BARTOLINI_2017_INPROCEEDINGS_BPGGB_368487, AUTHOR = {Bartolini, R. and Pardelli, G. and Goggi, S. and Giannini, S. and Biagioni, S.}, TITLE = {A terminological "journey" in the Grey Literature domain}, YEAR = {2017}, ABSTRACT = {The work analyzes a corpus constituted of the entire amount of full research papers published in the GL conference series over a time-span of more than one decade (2003-2014) with the aim of creating a terminological map of relevant words in the various GL research topics. "... corpora used to extract terminological units can be further investigated to find semantic and conceptual information on terms or to represent conceptual relationships between terms. (Bourigault D. et al., 2001). Another interesting inquiry is the terminology used in the GL conferences for describing the types of documents which can be detected (Pej?ová P. et al., 2012).}, KEYWORDS = {Grey Literature, Information Extraction IE, Terminology}, PAGES = {117-130}, URL = {https://publications.cnr.it/doc/368487}, VOLUME = {18}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-30-2}, CONFERENCE_NAME = {Eighteenth International Conference on Grey Literature (GL18): Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {New York, US}, CONFERENCE_DATE = {November 28-29, 2016}, BOOKTITLE = {Proceedings of the Eighteenth International Conference on Grey Literature (GL18): Leveraging Diversity in Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{BELLANDI_2017_INPROCEEDINGS_BGPW_377421, AUTHOR = {Bellandi, A. and Giovannetti, E. and Piccini, S. and Weingart, A.}, TITLE = {Developing LexO: a Collaborative Editor of Multilingual Lexica and Termino-ontological Resources in the Humanities}, YEAR = {2017}, ABSTRACT = {In this paper we present a first version of LexO, a collaborative editor of multilingual lexica and termino-ontological resources. It is based on the lemon model, and aims at supporting lexicographers and terminologists in their work. Although the development of LexO is still ongoing, the editor is already being used within two research projects in the field of Computational Linguistics applied to Humanities: DiTMAO and Totus Mundus. This allowed to test the functionalities of LexO, and prove its high degree of flexibility according to the different extensions of the lemon model needed to fulfill the needs of the involved scholars.}, KEYWORDS = {lexo, multi-lingual termino-ontological resources, DiTMAO, Totus Mundus}, URL = {http://www.aclweb.org/anthology/W17-7000}, CONFERENCE_NAME = {Workshop on Language, Ontology, Terminology and Knowledge Structures-LOTKS 2017}, CONFERENCE_PLACE = {Montpellier}, CONFERENCE_DATE = {19/09/2017}, BOOKTITLE = {Proceedings of Language, Ontology, Terminology and Knowledge Structures Workshop (LOTKS 2017)}, } @INPROCEEDINGS{BRUNATO_2017_INPROCEEDINGS_BD_382461, AUTHOR = {Brunato, D. and Dell'Orletta, F.}, TITLE = {On the order of words in Italian: a study on genre vs complexity}, YEAR = {2017}, ABSTRACT = {In this paper we present a cross-genre study on word order variation in Italian based on automatically dependency-parsed corpora. A comparative analysis focused on dependency direction and dependency distance for major constituents in the sentence is carried out in order to assess the influence of both textual genre and linguistic complexity on the distribution of phenonemena of syntactic markedeness.}, KEYWORDS = {word order, syntactic analysis, linguistic complexity, natural language processing}, PAGES = {25-31}, URL = {https://publications.cnr.it/doc/382461}, CONFERENCE_NAME = {International Conference on Dependency Linguistics (Depling 2017)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {18-20/09/2017}, } @INPROCEEDINGS{CARDILLO_2017_INPROCEEDINGS_CFMP_381090, AUTHOR = {Cardillo, F. A. and Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {How "deep" is learning word inflection?}, YEAR = {2017}, ABSTRACT = {Machine learning offers two basic strategies for morphology induction: lexical segmentation and surface word relation. The first one assumes that words can be segmented into morphemes. Inducing a novel inflected form requires identification of morphemic constituents and a strategy for their recombination. The second approach dispenses with segmentation: lexical representations form part of a network of associatively related inflected forms. Production of a novel form consists in filling in one empty node in the network. Here, we present the results of a recurrent LSTM network that learns to fill in paradigm cells of incomplete verb paradigms. Although the process is not based on morpheme segmentation, the model shows sensitivity to stem selection and stem-ending boundaries.}, KEYWORDS = {LSTM, Morphology induction, Cognitive modelling}, PAGES = {77-82}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85037368972\&origin=inward}, VOLUME = {2006}, DOI = {10.4000/books.aaccademia.2314}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {978-88-99982-76-8}, CONFERENCE_NAME = {Fourth Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {11-13/12/2017}, BOOKTITLE = {Proceedings of the Fourth Italian Conference on Computational Linguistics (CLiC-it 2017)}, EDITOR = {Basili, R. and Nissim, M. and Satta, G.}, } @INPROCEEDINGS{CIMINO_2017_INPROCEEDINGS_CWDMV_382252, AUTHOR = {Cimino, A. and Wieling, M. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Identifying predictive features for textual genre classification: The key role of syntax}, YEAR = {2017}, ABSTRACT = {The paper investigates impact and role of different feature types for the specific task of Automatic Genre Classification with the final aim of identifying the most predictive ones. The goal was pursued by carrying out incremental feature selection through Grafting using different sets of linguistic features. Achieved results for discriminating among four traditional textual genres show the key role played by syntactic features, whose impact turned out to vary across genres.}, KEYWORDS = {Textual Genre Classification, Feature Selection, Syntactic Features}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85037370866\&origin=inward}, VOLUME = {2006}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {11-12 dicembre 2017}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{DELVIGNA_2017_INPROCEEDINGS_DCDPT_369760, AUTHOR = {Del Vigna, F. and Cimino, A. and Dell'Orletta, F. and Petrocchi, M. and Tesconi, M.}, TITLE = {Hate me, hate me not: Hate speech detection on Facebook}, YEAR = {2017}, ABSTRACT = {While favouring communications and easing information sharing, Social Network Sites are also used to launch harmful campaigns against specific groups and individuals. Cyberbullism, incitement to self-harm practices, sexual predation are just some of the severe effects of massive online offensives. Moreover, attacks can be carried out against groups of victims and can degenerate in physical violence. In this work, we aim at containing and preventing the alarming diffusion of such hate campaigns. Using Facebook as a benchmark, we consider the textual content of comments appeared on a set of public Italian pages. We first propose a variety of hate categories to distinguish the kind of hate. Crawled comments are then annotated by up to five distinct human annotators, according to the defined taxonomy. Leveraging morpho-syntactical features, sentiment polarity and word embedding lexicons, we design and implement two classifiers for the Italian language, based on different learning algorithms: the first based on Support Vector Machines (SVM) and the second on a particular Recurrent Neural Network named Long Short Term Memory (LSTM). We test these two learning algorithms in order to verify their classification performances on the task of hate speech recognition. The results show the effectiveness of the two classification approaches tested over the first manually annotated Italian Hate Speech Corpus of social media text.}, KEYWORDS = {Hate speech, NLP, Social Networks}, PAGES = {86-95}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-85017337270\&partnerID=q2rCbXpz}, VOLUME = {1816}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {ITA-SEC 17}, CONFERENCE_PLACE = {Venezia, Italia}, CONFERENCE_DATE = {17-20/01/2017}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{GIOVANNETTI_2017_INPROCEEDINGS_GABDD_377423, AUTHOR = {Giovannetti, E. and Albanesi, D. and Bellandi, A. and Dattilo, D. and Dell'Orletta, F.}, TITLE = {Stylometry in Computer-Assisted Translation: Experiments on the Babylonian Talmud}, YEAR = {2017}, ABSTRACT = {The purpose of this research is to experiment the application of stylometric techniques in the area of Computer-Assisted Translation to reduce the revision effort in the context of a collaborative, large scale translation project. The obtained results show a correlation between the editing extent and the compliance to some specific linguistic features, proving that supporting translators in writing translations following a desired style can actually reduce the number of following necessary interventions (and, consequently, save time) by revisors, editors and curators.}, KEYWORDS = {traduco, babylonian talmud, computer-assisted translation, stylometry, readability}, PAGES = {177-182}, URL = {https://publications.cnr.it/doc/377423}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {9788899982942}, CONFERENCE_NAME = {Fourth Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {11-13/12/2017}, BOOKTITLE = {Proceedings of 4th Italian Conference on Computational Linguistics (CLiC-it)}, EDITOR = {Basili, R. and Nissim, M. and Satta, G.}, } @INPROCEEDINGS{MORGAVI_2017_INPROCEEDINGS_MMFMCLC_370916, AUTHOR = {Morgavi, G. and Morando, M. and Ferretti, M. and Marconi, L. and Cutugno, P. and Lucentini, R. and Chiarella, D.}, TITLE = {Active aging: a user centred approach for designing a virtual village network architecture}, YEAR = {2017}, ABSTRACT = {Currently many solutions for domotic housing have been created to provide a better autonomous life for older people and to reduce health and caregiving costs. Unfortunately, assistive technology is often not accepted by old users. This may be due to the technology-oriented approach adopted in designing such systems, which does not consider enough the users' needs. This paper proposes a procedure to design an AAL(Ambient Assisted Living) system to support active aging based on the User-Centred Design approach. A virtual network architecture integrating different solutions have been designed involving final users from the very beginning of the planning stage. The result is a high usable and flexible platform that allows creating user-friendly products as well as services and realizing also high-level functions by integrating data from completely different contexts.}, KEYWORDS = {Virtual Village Network, Active aging, Ambient Assisted Living, User-Centred Design Approach}, PAGES = {5}, URL = {https://publications.cnr.it/doc/370916}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {9789597174325}, CONFERENCE_NAME = {XV° Simposio Internacional de Comunicación Social, Eloína Miyares in memoriam}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {23-27/01/2017}, } @INPROCEEDINGS{PARDELLI_2017_INPROCEEDINGS_PGBRM_367782, AUTHOR = {Pardelli, G. and Goggi, S. and Bartolini, R. and Russo, I. and Monachini, M.}, TITLE = {A Geographical Visualization of GL Communities: A Snapshot}, YEAR = {2017}, ABSTRACT = {This quotation stresses the important role of the several international organizations in producing and disseminating knowledge in the field of Grey Literature (GL): the paper aims to provide a first snapshot of the geographical distribution of GL organizations and their participation to the annual International Conference on Grey Literature over the time (in the period from 2003 to 2015. See List of Conferences on Table 2 ). Nowadays a visual representation of data is often associated with the traditional statistical graphs, in particular for representing complex phenomena by means of maps and diagrams, which allow a deeper and more focused analysis of the data. In our case the geographical representation of stakeholders in government, academics, business and industry aims at visualizing the GL community across the globe: it concerns 674 organizations which over the years have contributed to the development of a common vision on the most pressing issues of the field by using new paradigms such as Open Access and the social networks.}, KEYWORDS = {Geographical Visualization, Grey Literature Communities}, PAGES = {109-113}, URL = {http://greyguide.isti.cnr.it/wp-content/uploads/2017/04/GL18_Conference_Proceedings.pdf}, VOLUME = {18}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-30-2}, CONFERENCE_NAME = {Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {Washington}, CONFERENCE_DATE = {November 28-29, 2016}, BOOKTITLE = {GL18 Conference Proceedings Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{PIRRELLI_2017_INPROCEEDINGS_P_398875, AUTHOR = {Pirrelli, V.}, TITLE = {Co-activation and competition effects in lexical storage and processing}, YEAR = {2017}, ABSTRACT = {According to traditional wisdom in Linguistics, morphologically simple words reside in the mental lexicon, a kind of brain dictionary that contains unpredictable mappings between lexical features. Here I illustrate some of the defining features of an alternative view of the language architecture, where computation and storage are just the short-term and long-term dynamics of the same underlying process. Empirical results of a computational model of this view are reported and general implications for a theory of the lexicon are discussed.}, KEYWORDS = {Mental Lexicon, Morphology, Human Language Processing, artificial neural networks, lexical self-organization}, PAGES = {1-21}, URL = {https://picgl4.files.wordpress.com/2015/11/4-paper_1_pirrelli.pdf}, CONFERENCE_NAME = {4th Patras International Conference of Graduate Students in Linguistics}, CONFERENCE_PLACE = {Patras, Greece}, CONFERENCE_DATE = {20-22/05/ 2016}, } @INPROCEEDINGS{SASSOLINI_2017_INPROCEEDINGS_SCC_382393, AUTHOR = {Sassolini, E. and Cucurullo, S. and Cinini, A.}, TITLE = {I corpora digitali: dall'obsolescenza tecnologica, alla salvaguardia e alla condivisione}, YEAR = {2017}, ABSTRACT = {Studio e implementazione di un protocollo di recupero, conservazione e valorizzazione di testi e corpora digitali interessati da problemi di obsolescenza tecnologica. Le strategie di salva-guardia adottate si spingono oltre il salvataggio dei testi e la conservazione in un formato di rappresentazione in linea con gli standard internazionali (XML TEI), si pongono come obiettivo la valorizzazione di questo patrimonio attraverso nuove modalità di fruizione dei contenuti. Lo scopo è affiancare le funzionalità classiche di analisi testuale, che da sempre caratterizzano le nostre attività di ricerca, a nuove modalità grafiche e visuali di fruizione dei dati e, in alcuni casi, migrare verso dispositivi mobili e tecnologie App. In questo articolo, oltre al protocollo di recupero, presentiamo due sperimentazioni di valorizzazione di contenuti testuali. Nel primo caso proponiamo tecniche di visual analytics applicate ad un corpus testuale semi strutturato riguardante corrispondenza redatta in lingua italiana del 1600. Nel secondo caso abbiamo realizzato un'applicazione per sistema Android finalizzata all'interrogazione di dati testuali relativi ad un progetto di censimento di architetture moderne della regione Liguria.}, KEYWORDS = {Testi digitali, Analisi testuale, Preservazione dei dati, Diffusione dei risultati}, PAGES = {31-35}, URL = {https://www.garr.it/it/documenti/3529-conferenza-2016-selected-papers-sassolini-et-al/file}, DOI = {10.26314/GARR-Conf16-proceeedings-06}, PUBLISHER = {Consortium GARR (Roma, ITA)}, ISBN = {978-88-905077-6-2}, CONFERENCE_NAME = {Conferenza GARR 2016-The CreActive Network}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {30/11/2016-02/12/2016}, } @INPROCEEDINGS{VADICAMO_2017_INPROCEEDINGS_VCFCDCT_375139, AUTHOR = {Vadicamo, L. and Carrara, F. and Falchi, F. and Cimino, A. and Dell'Orletta, F. and Cresci, S. and Tesconi, M.}, TITLE = {Cross-media learning for image sentiment analysis in the wild}, YEAR = {2017}, ABSTRACT = {Much progress has been made in the field of sentiment analysis in the past years. Researchers relied on textual data for this task, while only recently they have started investigating approaches to predict sentiments from multimedia content. With the increasing amount of data shared on social media, there is also a rapidly growing interest in approaches that work "in the wild", i.e. that are able to deal with uncontrolled conditions. In this work, we faced the challenge of training a visual sentiment classifier starting from a large set of user-generated and unlabeled contents. In particular, we collected more than 3 million tweets containing both text and images, and we leveraged on the sentiment polarity of the textual contents to train a visual sentiment classifier. To the best of our knowledge, this is the first time that a cross-media learning approach is proposed and tested in this context. We assessed the validity of our model by conducting comparative studies and evaluations on a benchmark for visual sentiment analysis. Our empirical study shows that although the text associated to each image is often noisy and weakly correlated with the image content, it can be profitably exploited to train a deep Convolutional Neural Network that effectively predicts the sentiment polarity of previously unseen images.}, KEYWORDS = {Big data, Data Mining, Sentiment Analysis, Social Media Analysis}, PAGES = {10}, URL = {https://ieeexplore.ieee.org/document/8265255}, DOI = {10.1109/ICCVW.2017.45}, ISBN = {978-1-5386-1034-3}, CONFERENCE_NAME = {ICCV 2017 IEEE International Conference on Computer Vision Workshops}, CONFERENCE_PLACE = {Venezia, Italy}, CONFERENCE_DATE = {22-29 October 2017}, } @INPROCEEDINGS{BARTOLINI_2017_INPROCEEDINGS_BGPRFF_377073, AUTHOR = {Bartolini, R. and Goggi, S. and Pardelli, G. and Russo, I. and Farace, D. and Frantzen, J.}, TITLE = {Data Visualization of a Grey Literature Community: A Cooperative Project}, YEAR = {2017}, ABSTRACT = {The expected outcome of this project will not only produce a revised and updated publication of International Directory of Organizations in Grey Literature, IDGL, but will also provide a visual overview of GreyNet as an international organization serving diverse communities with shared interests in grey literature. It would be a demonstration of GreyNet's commitment to research, publication, open access, education, and public awareness in this field of library and information science.}, KEYWORDS = {International Directory of Organizations in Grey Literature, Data Visualization}, PAGES = {63-63}, URL = {https://publications.cnr.it/doc/377073}, VOLUME = {19}, ISBN = {978-90-77484-32-6}, CONFERENCE_NAME = {Nineteenth International Conference on Grey Literature, GL19}, CONFERENCE_PLACE = {Rome, National Research Council, CNR}, CONFERENCE_DATE = {October 23-24, 2017}, BOOKTITLE = {Nineteenth International Conference on Grey Literature Public Awareness and Access to Grey Literature. Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{BELLANDI_2017_INPROCEEDINGS_BBKDM_366612, AUTHOR = {Bellandi, A. and Boschetti, F. and Khan, A. F. and Del Grosso, A. M. and Monachini, M.}, TITLE = {Provando e riprovando modelli di dizionario storico digitale: collegare voci, citazioni, interpretazioni}, YEAR = {2017}, ABSTRACT = {Il dizionario storico è il luogo d'incontro privilegiato di linguistica e lessicografia e filologia e critica letteraria. Nella prima parte prendiamo in considerazione un caso di studio piuttosto noto, relativo all'espressione "provando e riprovando", per mostrare come perfino i luoghi citati nei dizionari, che sono introdotti con lo scopo di disambiguare i termini in contesto, non siano privi di controversie interpretative. Nella seconda parte, molto più dettagliata e più tecnica, tentiamo di aggiungere ai modelli lessicali e citazionali già esistenti ed aperti soluzioni minime che ci permettano di collegare voci, citazioni e interpretazioni all'interno dell'universo dei Linked Open Data.}, KEYWORDS = {Linked Open Data LOD}, PAGES = {119-125}, URL = {http://aiucd2017.aiucd.it/wp-content/uploads/2017/01/book-of-abstract-AIUCD-2017.pdf}, CONFERENCE_NAME = {AIUCD 2017 Conference \& 3rd EADH Day}, CONFERENCE_PLACE = {Roma, Università "Sapienza"}, CONFERENCE_DATE = {24-28 January 2017}, BOOKTITLE = {AIUCD 2017 Conference}, } @INPROCEEDINGS{BOMPOLAS_2017_INPROCEEDINGS_BMFCPR_381125, AUTHOR = {Bompolas, S. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V. and Ralli, A.}, TITLE = {Transparency and predictability in Modern Greek conjugation: Implications for models of word processing}, YEAR = {2017}, ABSTRACT = {We argue that the Greek evidence calls for a substantial revision of the clear-cut interaction between transparency/predictability and regularity, to make room for a more process-oriented notion of regularity. According to this view, regularity is no longer an epiphenomenon of the design of the human language faculty and the purported dualism between rule-based and memory-based routes, but the graded result of the varying interaction of several structural factors concurrently affecting the human word processor.}, KEYWORDS = {Inflectional regularity, Word Processing, Modern Greek Conjugation}, PAGES = {17-19}, URL = {http://www.lilec.it/mmm/wp/wp-content/uploads/2017/02/Book-of-abstracts_MMM11_Final.pdf}, CONFERENCE_NAME = {MMM 11: 11th Mediterranean Morphology Meeting}, CONFERENCE_PLACE = {Cyprus}, CONFERENCE_DATE = {22-25/06/2017}, } @INPROCEEDINGS{DELGRATTA_2017_INPROCEEDINGS_D_382031, AUTHOR = {Del Gratta, R.}, TITLE = {(Re)Using OpeNER and PANACEA Web Services in the CLARIN Research Infrastructure}, YEAR = {2017}, ABSTRACT = {We describe the implications of (re)using the OpeNer and PANACEA Web Services into the CLARIN Research Infrastructure. The analyzed tools are of great interest for specific communities such as academic and small business focused on sentiment/opinion analysis and on Machine Translation along with related technologies, but their outcomes may be of great importance for the CLARIN audience as well. In fact, the Virtual Language Observatory shows a lot of lexical resources for sentiment but a few tool, while a lot of lexical resources and tools are available for Machine Translation. This means that the latter community is already in CLARIN, while the former should be poked. If community-related challenges are on the political side, issues related to interoperability are definitely on the technical one. The initiative is carried out at the ILC4CLARIN center in Pisa, the leading one of the CLARIN-IT national Consortium. The least common multiple between those two projects is neither limited to tools and Web Services nor to the creation of annotated corpora and lexicons; neither to the focus they have on specific communities. They also are based on (and strongly pursue and suggest) the concept of interoperability. This is clear from the use of the Kyoto Annotation Format in OpeNer, of Graph Annotation Format in PANACEA8 and of and the Lexical Markup Framework in both. Data and tools interoperability is also a key asset in both CLARIN (https://www.clarin.eu/event/2017/clarin-workshop-towards-interoperability-lexico-semantic-resources) and EUDAT (https://eudat.eu/communities/an-eudat-based-fair-data-approach-for-data-interoperability) . Within CLARIN, initiatives such as the Language Resource Switchboard and openly go towards methodologies and "systems" to address the interoperability issues. From a technical point of view the main issues are briefly reported below: 1. Many tools in OpeNer and PANACEA are command line ones; 2. OpeNer o_ers both POST and GET API; 3. PANACEA built its Web Services using Soaplab11 and o_ers SOAP Web Services; 4. KAF, LMF and GrAF guarantee the interoperability among data and services; 5. Simple pipelines are available in OpeNer, while a workow engine has been used in PANACEA. Tools are already wrapped, but to fully meet the requirements of both LRS and WebLicht we have to build a new shell around the command line tools so that REST APIs can accept both POST and GET requests and accept/produce different formats. Indeed if Language Resource Switchboard accepts tools with their output format but requires to read data from URL in plain text, WebLicht accepts tools which read and write the TCF format. While OpeNer requires that the core (the command line) be wrapped into a REST shell, Web Services in PANACEA need REST APIs around a SOAP core. In the final paper, we will finalize the technical aspects and describe how the User Involvement group can play an important role in poking the sentiment/opinion community in CLARIN.}, KEYWORDS = {Web Services, Clarin, Research Infrastructures}, URL = {https://indico.egi.eu/indico/event/3455/contribution/139}, CONFERENCE_NAME = {Digital Infrastructures for Research 2017}, CONFERENCE_PLACE = {Brussels, The Square Meeting Centre}, CONFERENCE_DATE = {30/11/2017, 1/12/2017}, } @INPROCEEDINGS{DELGROSSO_2017_INPROCEEDINGS_D_384783, AUTHOR = {Del Grosso, A. M.}, TITLE = {Domain Driven Design and Domain Specific Modelling for Digital Textual Scholarship}, YEAR = {2017}, ABSTRACT = {Over the last years, the digital turn and the world wide web have led historical studies towards an automatic processing of their own data and consequently towards new forms of scholarly editing and publications. In this framework, scholars have adopted digital models, electronic elements and computational features in their work, but these new instruments are generally derived from other disciplines. For example, they exploit optical character recognition from image processing, corpora annotation and natural language processing from computational linguistics, text alignment from bioinformatics, text meaning from knowledge engineering, text presentation from data visualization. However, these latter research areas do not cover entirely the specificity of the fundamental requirements of the scholarly domain (for instance, treebank data models do not provide the adequate abstractions to manage multiple variant readings and multiple text interpretations). To exceed these issues, it is essential to adopt correct design approaches devoted to analyze the problem space of the historical source editing field. This rigorous and formal analysis will shape suitable architectures, design patterns, data abstractions and procedural abstractions for the constitutive features of the digital scholarly editions. Moreover, this modelling process will produce generic, flexible, maintainable and reusable digital models and modular textual scholarly environments. This contribution aims at discussing software engineering approaches, within an object-oriented paradigm, towards the definition of domain specific abstractions (DS-ADTs). In this way, it will be possible to accommodate domain needs by formally defining core "unities of concerns" which actually adhere to both the traditional and the digital editorial domain.}, KEYWORDS = {Domain Driven Design, Digital Scholarly Editing, Computational Philology, Digital Philology}, URL = {http://atlasfontium.pl/edition2.0/Home-and-News.php}, CONFERENCE_NAME = {Historical Source Edition 2. 0}, CONFERENCE_PLACE = {Warsaw, Poland}, CONFERENCE_DATE = {6/10/2017-7/10/2017}, } @INPROCEEDINGS{DELGROSSO_2017_INPROCEEDINGS_D_390293, AUTHOR = {Del Grosso, A. M.}, TITLE = {Digital Textual Scholarship Tools: From Digitizing Historical Archives To Digital Scholarly Editing Models}, YEAR = {2017}, ABSTRACT = {The contribution illustrates fundamental aspects concerning the digitization of a historical archive for scholarly studies. During the first part of the talk I briefly introduce the standard reference model for digital archives, thereafter, I will show some features and technologies about the production of digital facsimiles from original primary sources. Beside this topic the metadata issues will be pointed out. Afterwards, digital transcription tools and text recognition tasks will be highlighted. Highlights on digital textual scholarship will introduce the text encoding and the annotation topics. This talk ends with an overview of tools for visualizing, indexing and searching textual content.}, KEYWORDS = {historical archive, digital textual scholarship, digital humanities, digital libraries, computational philology, software engineering}, URL = {https://publications.cnr.it/doc/390293}, CONFERENCE_NAME = {International Workshop on Machine Learning and Natural Language Processing}, CONFERENCE_PLACE = {Fez, Marocco}, CONFERENCE_DATE = {24-25/11/2017}, } @INPROCEEDINGS{DELGROSSO_2017_INPROCEEDINGS_DGM_377409, AUTHOR = {Del Grosso, A. M. and Giovannetti, E. and Marchi, S.}, TITLE = {Il modello a microkernel di Omega nello sviluppo di strumenti per lo studio dei testi: dagli ADT alle API}, YEAR = {2017}, KEYWORDS = {microkernel, studio del testo, Omega, ADT, API}, PAGES = {199-205}, URL = {https://publications.cnr.it/doc/377409}, ISBN = {978-88-942535-1-1}, CONFERENCE_NAME = {AIUCD 2017 Conference}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {24-28/01/2017}, } @INPROCEEDINGS{DELGROSSO_2017_INPROCEEDINGS_DGM_377413, AUTHOR = {Del Grosso, A. M. and Giovannetti, E. and Marchi, S.}, TITLE = {Thinking like the "Modern Operating Systems": The Omega architecture and the Clavius on the Web project}, YEAR = {2017}, ABSTRACT = {The current digital turn in studying and analyzing historical documents results in both having machine actionable cultural data and providing software able to process them. However, these data and services often lack in integration strategies among them in order to be reused in other contexts different from the original ones. As pointed out by Franz Fischer in a worthy of note article: "There is no out-of-the-box software available for creating truly critical and truly digital editions at the same time" [1]. Likewise, Monica Berti stated that is now important to "build a model for representing quotations and text reuses of lost works in a digital environment" [2]. In this vision Bridget Almas is in charge of developing an integrated platform for collaboratively transcribing, editing, and translating historical documents and texts. She claimed that through this platform, called Perseids, students and scholars are able to create open source digital scholarly editions [3]. A number of interesting projects are currently under development to realize general models, digital services, and online tools that can be adopted as part of a long-term infrastructure for managing digital editions. Among Perseids and others, we cite as reference systems (a) the Textual Community project led by P. Robinson and B. Bordalejo, (b) the AustESE project led by the Australian eResearch group, (c) the Tagore Online Variorum "Bichitra" project led by Sukanta Chaudhuri, (d) Homer Multitext led by Neel Smith and Christopher Blackwell, (e) Sharing Ancient Wisdoms founded by the HERA network.}, KEYWORDS = {Omega, object-oriented design, digital scholarly editing, clavius on the web}, URL = {https://publications.cnr.it/doc/377413}, CONFERENCE_NAME = {Global Philology Open Conference}, CONFERENCE_PLACE = {Leipzig}, CONFERENCE_DATE = {20-23/02/2017}, } @INPROCEEDINGS{GOGGI_2017_INPROCEEDINGS_GPRBM_377070, AUTHOR = {Goggi, S. and Pardelli, G. and Russo, I. and Bartolini, R. and Monachini, M.}, TITLE = {Providing Access to Grey Literature: The CLARIN Infrastructure}, YEAR = {2017}, ABSTRACT = {This work will provide a map of the documentation archived in the CLARIN infrastructure, whose purpose is to share language resources produced and managed in the various European countries but finally merged into the CLARIN data centers for allowing access, interoperability, reuse and preservation of scientific documentation as well as Grey Literature.}, KEYWORDS = {CLARIN ERIC, Terminological Resources, Grey Literature}, PAGES = {60-62}, URL = {https://publications.cnr.it/doc/377070}, VOLUME = {19}, ISBN = {978-90-77484-32-6}, CONFERENCE_NAME = {Nineteenth International Conference on Grey Literature, GL19}, CONFERENCE_PLACE = {Rome, National Research Council, CNR}, CONFERENCE_DATE = {October 23-24, 2017}, BOOKTITLE = {Nineteenth International Conference on Grey Literature Public Awareness and Access to Grey Literature. Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_382175, AUTHOR = {Monachini, M.}, TITLE = {Discipline umanistiche: vantaggi, opportunità e benefici dell'Infrastruttura di Ricerca CLARIN e del nodo nazionale CLARIN-IT per la comunità italiana}, YEAR = {2017}, ABSTRACT = {L'interesse da parte delle scienze umane e sociali per le tecnologie del linguaggio non è mai stato così attuale come in questo momento storico. Le principali conferenze di Digital Humanities vedono sempre più la partecipazione di linguisti computazionali, mentre nelle conferenze di Trattamento Automatico del Linguaggio (TAL), l'applicazione di soluzioni TAL alle scienze umane e sociali costituisce una tematica che si affianca a quella delle ricadute industriali. Il bisogno di rispondere alle esigenze di una platea di utenti diversa apre nuove prospettive e offre una sfida rilevante per il settore delle tecnologie del linguaggio. I testi da trattare in ambito umanistico possono essere spesso eterogenei per genere, per periodo storico, per tipologia e nuovi tipi di analisi testuale acquistano particolare rilevanza. I software di analisi devono permettere una elaborazione automatica affidabile di tipologie di dati diversi da quelli che comunemente vengono usati nel TAL. La qualità delle risorse, in particolare la qualità dei vari livelli di annotazione acquista maggiore importanza quando queste devono essere usate per fare ricerca. Diventa cruciale sviluppare strumenti facilmente usabili e adattabili a diverse tipologie di contenuto e fornire soluzioni volte facilitare il reperimento e la condivisione di risorse e di tecnologie. E' proprio per rispondere a queste esigenze e per far incontrare chi produce e sviluppa risorse e tecnologie linguistiche con chi le usa, che è stata creata CLARIN (Common Language Resources Infrastructure for Social Sciences and Humanities), l'infrastruttura di ricerca europea per le risorse linguistiche al servizio delle scienze umane e sociali. CLARIN favorisce lo sviluppo di soluzioni tecnologiche volte a rendere le risorse e le tecnologie linguistiche visibili e disponibili per studiosi, ricercatori, studenti e cittadini, attraverso una modalità unificata e standardizzata di accesso. Tale innovazione consente di adottare nuovi e diversi approcci alla disciplina tradizionale determinando, in prospettiva, nuove consuetudini di studio che, sulla base delle buone pratiche lasciate in eredità dalla tradizione precedente, permettono lo sviluppo di una diversa e più attuale metodologia di ricerca e di prassi didattica.}, KEYWORDS = {Digital Humanities, CLARIN-IT}, URL = {https://apps.unive.it/server/eventi/13818/master%202017-2018%2011-2017-1.pdf}, CONFERENCE_NAME = {Università Ca' Foscari. Cerimonia conclusiva Master Digital Humanities}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {3/11/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_382188, AUTHOR = {Monachini, M.}, TITLE = {Infrastrutture di Ricerca e Studi Classici. CLARIN-IT: opportunità e prospettive}, YEAR = {2017}, ABSTRACT = {L'interesse da parte delle scienze umane e sociali per le tecnologie del linguaggio non è mai stato così attuale come in questo momento storico. Le principali conferenze di Digital Humanities vedono sempre più la partecipazione di linguisti computazionali, mentre nelle conferenze di Trattamento Automatico del Linguaggio (TAL), l'applicazione di soluzioni TAL alle scienze umane e sociali costituisce una tematica che si affianca a quella delle ricadute industriali. Il bisogno di rispondere alle esigenze di una platea di utenti diversa apre nuove prospettive e offre una sfida rilevante per il settore delle tecnologie del linguaggio. I testi da trattare in ambito umanistico possono essere spesso eterogenei per genere, per periodo storico, per tipologia e nuovi tipi di analisi testuale acquistano particolare rilevanza. I software di analisi devono permettere una elaborazione automatica affidabile di tipologie di dati diversi da quelli che comunemente vengono usati nel TAL. La qualità delle risorse, in particolare la qualità dei vari livelli di annotazione acquista maggiore importanza quando queste devono essere usate per fare ricerca. Diventa cruciale sviluppare strumenti facilmente usabili e adattabili a diverse tipologie di contenuto e fornire soluzioni volte facilitare il reperimento e la condivisione di risorse e di tecnologie. E' proprio per rispondere a queste esigenze e per far incontrare chi produce e sviluppa risorse e tecnologie linguistiche con chi le usa, che è stata creata CLARIN (Common Language Resources Infrastructure for Social Sciences and Humanities), l'infrastruttura di ricerca europea per le risorse linguistiche al servizio delle scienze umane e sociali. CLARIN favorisce lo sviluppo di soluzioni tecnologiche volte a rendere le risorse e le tecnologie linguistiche visibili e disponibili per studiosi, ricercatori, studenti e cittadini, attraverso una modalità unificata e standardizzata di accesso. Tale innovazione consente di adottare nuovi e diversi approcci alla disciplina tradizionale determinando, in prospettiva, nuove consuetudini di studio che, sulla base delle buone pratiche lasciate in eredità dalla tradizione precedente, permettono lo sviluppo di una diversa e più attuale metodologia di ricerca e di prassi didattica.}, KEYWORDS = {Digital Humanities, CLARIN-IT}, URL = {http://www.clarin-it.it/sites/default/files/documents/UniParma_Workshop_2017_Locandina.pdf}, CONFERENCE_NAME = {DIGITAL HUMANITIES E FILOLOGIA GRECA: risorse e infrastrutture di ricerca applicate allo studio del greco antico}, CONFERENCE_PLACE = {Parma}, CONFERENCE_DATE = {20/11/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_382191, AUTHOR = {Monachini, M.}, TITLE = {Nuove tecnologie e nuovi sviluppi di indagine: CLARIN-IT e alcuni esempi di applicazione allo studio del greco antico}, YEAR = {2017}, ABSTRACT = {l lavoro tradizionale del filologo necessita oggi di una disponibilità sempre più ampia di dati e di testi (letteratura secondaria, bibliografia specifica, fonti primarie), il lavoro del singolo studioso sembra oramai accompagnarsi alla necessità di un team di ricerca che collabori su progetti di ampia scale, quali le edizioni dei testi. Molte delle informazioni indispensabili per il filologo sono oggi (o potrebbero essere) disponibili e maggiormente accessibili grazie all'utilizzo di strumenti informatici, ma spesso si tratta di materiali dispersi e poco connessi tra loro; talora la loro esistenza è persino ignota agli studiosi tradizionali. 2 Il trend dei dati che si registra nella disciplina, grazie alla diffusione del web, con la circolazione di risorse utili per l'analisi e la ricostruzione del testo, fa ripensare al rapporto tra filologia - in ogni suo aspetto - e nuove tecnologie e lascia ampio spazio alle riflessioni metodologiche sui procedimenti d'indagine. Si tratta di far dialogare questi dati e implementarli. Il primo passo riguarda l'individuazione delle opportunità offerte dal settore delle DH in relazione a ogni singola disciplina nella sua specificità e, d'altro lato definire quali siano le esigenze di ciascun singolo settore. Nel fare questo è necessario mantenere alto lo standard sia dello strumento sia del tipo di dati inseriti. Come tratta il testo lo studioso affiancato dall'ausilio delle nuove tecnologie? Cosa trova online? Che bisogni emergono nelle pratiche di uso odierne? Si tenterà di dare una risposta a queste domande con esempi pratici di metodo applicato allo studio - ad esempio - di un autore specifico. Nel contesto verranno inoltre presentate le attività sino ad ora svolte dal gruppo di ricerca. 1) Tramite Survey si sono identificati la pratica d'uso oggi, e i punti di forza e mancanze degli strumenti esistenti. 2) Si sono così definite le esigenze di una comunità specifica e le relative richieste e aspettative. 3) Si è definito un prototipo di strumento che risponda alle esigenze individuate, ora in fase di valutazione. 4) realizzazione, in prospettiva, di uno strumento che possa offrire una piattaforma collaborativa che metta a disposizione i dati (testo, apparato, commento, analisi a diversi livelli, etc.), variamente fruibili, assieme alla possibilità di accedere facilmente a tutti i dati relativi disponibili in rete.}, KEYWORDS = {Digital Humanities, Computational Philology}, URL = {http://www.clarin-it.it/sites/default/files/documents/UniParma_Workshop_2017_Locandina.pdf}, CONFERENCE_NAME = {2° Workshop di Studio Insegnamenti di Storia della Lingua Greca (LT) e Filologia Greca (LM)}, CONFERENCE_PLACE = {Parma}, CONFERENCE_DATE = {1/12/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_M_429407, AUTHOR = {Monachini, M.}, TITLE = {Digital Humanities and Research Infrastructures: CLARIN and CLARIN-IT}, YEAR = {2017}, ABSTRACT = {La lezione al Corso "Digital Humanities: Web Resources, Tools and Infrastructures" Venice International University (a.a. 2017-2018) ha lo scopo di dimostrare i vantaggi, i benefici e le opportunità offerte da una infrastruttura di ricerca come CLARIN-ERIC per rispondere ai quesiti di ricerca e le sfide nel settore delle Digital Humanities.}, KEYWORDS = {digital humanities, research infrastructures, data deluge}, URL = {https://publications.cnr.it/doc/429407}, CONFERENCE_NAME = {Course "Digital Humanities: Web Resources, Tools and Infrastructures" Venice International University}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {4/12/2017}, } @INPROCEEDINGS{MONACHINI_2017_INPROCEEDINGS_MNS_375982, AUTHOR = {Monachini, M. and Nicolosi, A. and Stefanini, A.}, TITLE = {Digital Classics: A Survey of the Needs of Ancient Greek Scholars in Italy}, YEAR = {2017}, ABSTRACT = {This paper presents and discusses the findings of a survey carried out in order to assess the use of digital resources and digital technologies with respect to work in Ancient Greek scholarship, as well as to identify the factors that are likely to constrain its use and to elicit needs and requirements of Ancient Greek scholars in Italy. The survey is in line with the principles behind the recent user engagement strategy developed by CLARIN-ERIC and constitutes one of the national efforts undertaken by CLARIN-IT to contribute to the wider impact of CLARIN on Digital Classicists.}, KEYWORDS = {CLARIN-ERIC, CLARIN-IT, CLARIN on Digital Classicists}, PAGES = {5}, URL = {https://www.clarin.eu/event/2017/clarin-annual-conference-2017-budapest-hungary}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {18-20 September, 2017}, } @INPROCEEDINGS{NICOLAS_2017_INPROCEEDINGS_NKMDCAEBQ_375984, AUTHOR = {Nicolas, L. and Konig, A. and Monachini, M. and Del Gratta, R. and Calamai, S. and Abel, A. and Enea, A. and Biliotti, F. and Quochi, V.}, TITLE = {CLARIN-IT: State of Affairs, Challenges and Opportunities}, YEAR = {2017}, ABSTRACT = {This paper provides an overview on the Italian national CLARIN consortium and the status of CLARIN-IT in general. It thus discusses the current state of affairs of the consortium and provi-des information on the members, especially with regards to what they offer to CLARIN in terms of resources, services and expertise, and what CLARIN offers them to further their own research.}, KEYWORDS = {Italian CLARIN consortium, CLARIN-IT}, PAGES = {4}, URL = {https://www.clarin.eu/event/2017/clarin-annual-conference-2017-budapest-hungary}, CONFERENCE_NAME = {CLARIN Annual Conference 2017}, CONFERENCE_PLACE = {Budapest, Hungary}, CONFERENCE_DATE = {18-20 September, 2017}, } @INPROCEEDINGS{PARDELLI_2017_INPROCEEDINGS_PGBD_366597, AUTHOR = {Pardelli, G. and Giannini, S. and Boschetti, F. and Del Gratta, R.}, TITLE = {AIUCD e CLiC-it: citazioni bibliografiche a confronto}, YEAR = {2017}, ABSTRACT = {Il lavoro propone l'analisi e il confronto dei riferimenti bibliografici delle cinque edizioni annuali della Conferenza dell'Associazione per l'Informatica Umanistica e la Cultura Digitale (AIUCD) e del primo biennio (2014-2015) della Conferenza Italiana di Linguistica Computazionale (CLiC-it) per misurare la direzione in cui si muove il trend citazionale.. L'analisi muove dal principio di rilevanza della citazione nella trasmissione della conoscenza in un periodo di grandi cambiamenti socioculturali e di importanti evoluzioni nelle modalità di produzione e diffusione dei risultati della ricerca scientifica. Lo scopo dello studio è quello di misurare la gamma delle risorse citate in questa area del sapere mediante l'uso di indicatori volti a comprendere la loro ampiezza, l'estensione temporale, la varietà, le relazioni con il mondo editoriale e i modelli di riferimento. L'osservazione dei risultati consente di classificare i documenti citati, di descriverne le caratteristiche e di valutare eventuali trasformazioni rispetto alle modalità di citazione tradizionali. -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- The paper suggests the analysis of the bibliographic references - enclosed in the proceedings of the Italian Conference on Computational Linguistics - CLICit in 2014-2015 biennium and of five AIUCD Conference editions 2012-2016 . The analysis moves from the principle of relevance of citation in the transmission of knowledge in a period of great socio-cultural changes and important developments in the production and dissemination of the results in the scientific research. The purpose of the study is to measure the range of resources cited in this area of knowledge by the use of indicators aimed in understanding their wideness, the time extension, the variety, the relations with the publishing world and the reference models. The observation of the results allows to classify the cited document, to describe its characteristics and to assess any changes compared to the traditional citation mode.}, KEYWORDS = {Corpus bibliografico, Analisi di metadati}, PAGES = {38-50}, URL = {http://aiucd2017.aiucd.it/wp-content/uploads/2017/01/book-of-abstract-AIUCD-2017.pdf}, CONFERENCE_NAME = {AIUCD 2017 Conference \& 3rd EADH Day}, CONFERENCE_PLACE = {Roma, Università "Sapienza"}, CONFERENCE_DATE = {24-28 January 2017}, BOOKTITLE = {AIUCD 2017 Conference}, } @INPROCEEDINGS{PICCINI_2017_INPROCEEDINGS_PMG_378393, AUTHOR = {Piccini, S. and Marchi, S. and Giovannetti, E.}, TITLE = {Étudier le structuralisme par le structuralisme: expériences de sémantique distributionnelle dans la construction d'un lexique électronique de la terminologie saussurienne}, YEAR = {2017}, ABSTRACT = {En 2010-2011, le premier lexique électronique dédié à la terminologie linguistique saussurienne a été créé [1] dans le cadre d'un projet de recherche intitulé « Pour une édition numérique des manuscrits de Ferdinand de Saussure », projet coordonné par le Professeur Gambarara. La première étape de construction de la ressource lexicale a consisté en l'identification manuelle dans les textes des termes clés du vocabulaire saussurien et de leurs propriétés sémantiques. L'informatique n'est intervenue que dans la phase de formalisation des données extraites. C'est pourquoi nous nous proposons d'illustrer ici la possibilité de recourir à des techniques automatiques et, en particulier, à des algorithmes de sémantique distributionnelle [2] pour identifier les relations que les termes entretiennent entre eux dans le texte. La méthodologie sous-jacente est basée sur l'hypothèse distributionnelle selon laquelle plus deux mots sont sémantiquement proches, plus ils ont tendance à se produire dans des contextes similaires. Le lexique d'un texte est considéré comme un espace métrique où chaque mot peut être représenté comme un vecteur à n dimensions, chacune d'elles enregistrant le nombre de fois que ce mot apparaît dans un contexte donné. La proximité spatiale entre deux vecteurs indique la similarité sémantique entre deux mots. Elle est calculée par le cosinus de l'angle compris entre les deux vecteurs : plus la valeur du cosinus est grande, plus les termes sont, en principe, sémantiquement similaires. Les techniques computationnelles ont été appliquées aux mêmes textes à partir desquels le lexique électronique a été construit : le Cours de linguistique générale [3], les Écrits de linguistique générale [4] et le Recueil des publications scientifiques [5]. Bien qu'au stade préliminaire, l'expérience a permis d'obtenir des résultats intéressants. À titre d'exemple, nous présentons ci-dessous (Tableau) les valeurs de similitude obtenues par l'algorithme entre le terme signe et d'autres mots dans les textes. Si l'on compare les résultats avec l'entrée du lexique signe, on peut remarquer que l'algorithme est en mesure de détecter un grand nombre de liens explicités dans la ressource et de suggérer, en outre, des relations possibles avec d'autres termes comme valeur, rapport, idée. L'application de ces techniques computationnelles au corpus saussurien peut donc constituer une aide précieuse non seulement pour les lexicographes mais également pour les experts du domaine en faisant émerger des connections qui n'apparaissent pas immédiatement de manière explicite et en suggérant ainsi des parcours alternatifs d'analyse de la pensée de l'auteur.}, KEYWORDS = {structuralisme, sémantique distributionnelle, terminologie saussurienne, lexique électronique}, URL = {https://publications.cnr.it/doc/378393}, CONFERENCE_NAME = {Atelier "Les manuscrits de Saussure, parmi d'autres. Problèmes, stratégies et solutions d'édition pour les archives numériques"}, CONFERENCE_PLACE = {Geneve}, CONFERENCE_DATE = {09-14/01/2017}, } @INPROCEEDINGS{PIRRELLI_2017_INPROCEEDINGS_P_381136, AUTHOR = {Pirrelli, V.}, TITLE = {Storage vs. Processing in Models of Word Inflection. A Neuro-computational Hebbian Perspective}, YEAR = {2017}, ABSTRACT = {The advent of connectionism in the 80's popularised the idea that the lexical processor consists of a network of parallel processing units selectively firing in response to sensory stimuli. In the light of these assumptions, the most important contribution of connectionism to the theoretical debate on lexical modelling at the time was the utter rejection of the widely accepted idea that word recognition and production require a dichotomous choice between storage and processing. However, in spite of the prima facie psycho-computational allure of this view of the lexicon, early connectionist models also embraced a number of unsatisfactory assumptions about word learning and processing. More recently, a growing number of approaches to inflection in both Psycholinguistics and Theoretical Linguistics developed the view that surface word relations represent a fundamental domain of morphological competence. Learning the morphology of a language amounts to acquiring relations between fully stored lexical forms, which are concurrently available in the speaker's mental lexicon and jointly facilitate processing of morphologically related forms through patterns of emergent self-organisation. This novel view presupposes an integrative language architecture, where storage and processing, far from being conceived of as insulated and poorly interacting modules, are the short-term and the long-term dynamics of the same underlying process of adaptive specialisation of synaptic connections. This view, upheld by recent evidence of the neuro-anatomical bases of short-term and long-term memory processes, crucially hinges on Hebbian principles of synaptic plasticity, which are, in turn, in keeping with mathematical models of discriminative learning. I contend that integrative computer models of Hebbian language learning represent an exciting way forward in current neuro-computational research on word processing, and a persistently fertile legacy of the connectionist revolution.}, KEYWORDS = {Hebbian Learning, Recurrent Neural Networks, Word Inflection}, PAGES = {19-19}, URL = {https://indico.sissa.it/event/12/abstract-book.pdf}, CONFERENCE_NAME = {International Morphological Processing Conference (MoProc)}, CONFERENCE_PLACE = {Trieste}, CONFERENCE_DATE = {22-24/06/2017}, } @INPROCEEDINGS{PIRRELLI_2017_INPROCEEDINGS_PMFC_381117, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M. and Cardillo, F. A.}, TITLE = {Paradigm Relative Entropy and Discriminative Learning}, YEAR = {2017}, ABSTRACT = {In the present contribution, we show that principles of discriminative learning of symbolic time series go a long way in accounting for these effects, thus making an important contribution to our understanding of the human lexical processor and its sensitivity to word distributions both within and across paradigms.}, KEYWORDS = {Paradigm Entropy, Discriminative Learning, Mental Lexicon, Verb Inflection}, PAGES = {5}, URL = {http://w3.erss.univ-tlse2.fr/ParadigMo2017/program.html}, CONFERENCE_NAME = {ParadigMo 2017: First Workshop on Paradigmatic Word Formation Modeling}, CONFERENCE_PLACE = {Toulouse}, CONFERENCE_DATE = {19-20/06/2017}, } @INPROCEEDINGS{RUSSO_2017_INPROCEEDINGS_RS_382094, AUTHOR = {Russo, I. and Soria, C.}, TITLE = {Digital Language Diversity on New Media: the DLDP Survey about European Minority Languages Speakers}, YEAR = {2017}, ABSTRACT = {How does the linguistic diversity of Europe reflect in the New Media? Do regional and minority languages contribute to EU digital language diversity? In this paper we will present the results of the first survey about actual needs of European minority languages speakers regarding digital opportunities. The survey is part of the work carried out by the Digital Language Diversity Project (DLDP), a three-year Erasmus+ project started in September 2015. The goal of DLDP is helping minority languages speakers in the acquisition of intellectual and practical skills to create, share, and reuse online digital content, at the same time defining general guidelines and best practices for the promotion of minority languages with poor digital representation, a fact that further prevents their usability on digital media and devices. The focus of the project is on four European minority languages at different stages of digital developments (Basque, Breton, Karelian and Sardinian), and this will enable a comparison about the role of the availability of digital content for promotion of digital usage of these languages and development of language-based digital applications. With the aim of understanding the specific needs and the peculiar behaviours of speakers of these languages, during Spring 2016 we conducted a survey focused on gathering information about their personal digital use of the language and about any known digital resource and services that make use of the language. We received feedback from almost 2000 speakers and we are now in the position of analysing results for future actions. In particular, taking into account media user typology elaborated by Brandtzæg (2010) (e.g. entertainment, instrumental and advanced users) we aim to profile speakers' answers according to these different classes, in order to better understand how to make speakers aware of the opportunities new media offer for preservation and revitalisation of minority languages.}, KEYWORDS = {minority languages, regional languages, new media, digital language diversity, digital language development}, URL = {https://minoritylanguagesnewmedia2017.files.wordpress.com/2017/03/final_abstracts-baal-cup-seminar-on-minority-languages-in-new-media.pdf}, CONFERENCE_NAME = {BAAL-Cambridge University Press Seminar on Minority Languages in New Media}, CONFERENCE_DATE = {27-28/4/2017}, } @INPROCEEDINGS{SASSOLINI_2017_INPROCEEDINGS_SC_382418, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {Approcci grafici all'analisi di corpora testuali}, YEAR = {2017}, ABSTRACT = {sperimentazioni finalizzate a combinare tecniche di "distant reading" e funzionalità classiche di Information Retrieval (IR) su dati testuali. Incrementare con sintesi grafiche e visuali l'offerta di strumenti di studio e di analisi dei dati testuali rappresenta una nuova frontiera del nostro ambito di ricerca consueto.}, KEYWORDS = {analisi testuale, distant reading, visual analytics}, PAGES = {83-86}, URL = {http://aiucd2017.aiucd.it/wp-content/uploads/2017/01/book-of-abstract-AIUCD-2017.pdf}, CONFERENCE_NAME = {AIUCD 2017 Conference}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {24-28/01/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382071, AUTHOR = {Soria, C.}, TITLE = {The digital language vitality scale: a model for assessing digital vitality of languages}, YEAR = {2017}, ABSTRACT = {In this paper, we present the Digital Language Vitality Scale, a tool for measuring the degree of digital vitality of languages. Digital vitality can be defined as the extent to which a language is present, used and usable over the Internet through digital devices. The scale is inspired to ethnolinguistic vitality assessment (such as GIDS, Fishman 2001), updated by (Lewis and Simons 2010) as EGIDS, and the UNESCO "nine factors" (UNESCO 2003), and is based on previous work in this area such as (Kornai 2013) and (Gibson 2015). Seven levels of digital vitality are identified, from "pre-digital" to "digitally thriving", and a set of associated indicators. The indicators associated with the scale are proxies representing both digital representation (presence) of a language and digital use. They are clustered into three groups: a first group of indicators refers to digital usability of a language, for instance, the existence of Internet connection or the availability of standardised fonts for writing the language. A second group of indicators is related to the quality and amount of digital use of a language: if and how much a language is used for texting and emailing, on websites, blogs, if there are e-books, Wikipedias, if the language is used on social media. The last group of indicators correlates with the digital prestige of a language; they are a sign of a language that not only is used on digital media and devices, but it is so in a full-fledged way, enjoying the widest possible ranges of uses and applications (e.g. localised digital services, machine translation, edu-tainment products and services). The scale is currently being used in the context of the DLDP project (http://www.dldp.eu) as an assessing instrument for digital language planning, with particular reference to regional and minority languages.}, KEYWORDS = {digital vitality, language vitality, digital language diversity}, PAGES = {100-100}, URL = {https://icriml.indiana.edu/conference-program/Abstractbook.pdf}, CONFERENCE_NAME = {First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_PLACE = {Barcelona/Vic}, CONFERENCE_DATE = {19-21/04/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382081, AUTHOR = {Soria, C.}, TITLE = {Language policies and speakers' attitudes: evaluating the impact of official recognition on some of Italy's regional languages}, YEAR = {2017}, ABSTRACT = {The panel focuses on the minority (some of them highly endangered) languages of Italy, with a special attention to those which are not recognized (nor supported) by the Italian Government. Key points will be a. the official language policy of Italy, b. language discrimination, c. language ideology and d. the ambiguous role of academic institutions vis-à-vis languages and dialects, e. the effects (and results) of official support for recognized minority languages, as well as f. grassroots approaches to the standardization and development of unrecognized languages and new developments on the net.}, KEYWORDS = {minority languages, multilingualism, language policy}, PAGES = {42-42}, URL = {https://icriml.indiana.edu/conference-program/Abstractbook.pdf}, CONFERENCE_NAME = {First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_DATE = {19-21/04/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382083, AUTHOR = {Soria, C.}, TITLE = {Inquiring current digital use and usability of regional and minority languages: the DLDP survey}, YEAR = {2017}, ABSTRACT = {The Digital Language Diversity Project is a three-year project funded under the Erasmus+ programme that addresses the problem of low digital representation and use of EU regional and minority languages, a cause for their endangerment according to some scholars. One of the first actions of the project is to assess the current use and usability of four EU regional/minority languages, representing very different degrees of digital language representation and use: these languages are Sardinian, Karelian, Basque and Breton. From June to September 2016, the DLDP project has been spreading a survey, that was localized and translated into these languages. The survey is developed on the basis of previous work carried out in the area of ethnolinguistic vitality, such as the ELDIA Barometer, and other inquiries addressing specifically digital use of languages and availability and usability of digital resources and media. The DLDP survey consists of a general part collecting basic information on the informant (age, sex, proficiency level in the language, frequency of use, etc.). The second part is focused on gathering information about his/her personal digital use of the language and about any known digital resource and services that make use of the language. The survey is the first ever study of the digital needs of minority language speakers. It will give stakeholders and academia a detailed view into what actual language speakers are thinking about in terms of how they want to develop provision for their languages in the digital sphere. Therefore, we strongly encourage wide adoption and dissemination of the survey to regional and minority languages beyond the four investigated. The workshop intends to illustrate and discuss the model survey, to share it with researchers interested in adopting it for other languages, and to discuss collaboration paths.}, KEYWORDS = {digital language use, regional languages, minority languages, digital language vitality}, PAGES = {44-44}, URL = {https://icriml.indiana.edu/conference-program/Abstractbook.pdf}, CONFERENCE_NAME = {First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_DATE = {19-21/04/2017}, } @INPROCEEDINGS{SORIA_2017_INPROCEEDINGS_S_382086, AUTHOR = {Soria, C.}, TITLE = {Alliances for digital linguistic diversity}, YEAR = {2017}, ABSTRACT = {Linguapax proposes a complementary pair of Roundtable discussions in the Conference's thematic line 1: The value of linguistic diversity, from an operational perspective. This "diptych" will present different types of alliances that create contexts for the preservation and continuation of linguistic diversity (RT 1), and will show how a plural perspective on linguistic diversity can emerge, drawing on Linguapax's experience as an international network (RT 2). In the first Roundtable discussion we will tackle the generation of different contexts of interaction (networks and alliances) to enhance the presence and vitality of linguistic diversity - those contexts emerging from linguistic, cultural and digital networks (although should be - in theory- mostly overlapping, an overview of their specific potentialities is useful and needed): 1. "Linguistic cooperation": International networks of projects/exchange of experiences in language revitalisation. Andoni Barreña (Garabide Elkartea, Basque Country); 2. Alliances for digital linguistic diversity. Claudia Soria. Consiglio Nazionale delle Ricerche. Pisa. 3. International cultural cooperation. Although "cultural cooperation" should be the common umbrella, it is usually driven by a specific logic in which linguistic diversity rarely comes consistently into play. On this occasion for dialogue, the Round Table will expose linguists/activists to a potentially useful vocabulary and world of networks and platforms. Jordi Pascual, expert on international cultural relations. This Roundtable aims to harness the relational potential of the Conference: On one hand, putting into dialogue two contributions already planning individual participation (1, 2), and on the other, inviting an 74 external perspective to crossfertilise the debate and increase the scope for networking. Given the operational approach of this Roundtable, active interaction with the audience will be a key element for achieving the desired multiplier effect. Moderator: Alícia Fuentes-Calle. Linguapax (Barcelona). Departament de Lingüística. Universitat de Barcelona.}, KEYWORDS = {linguistic diversity, digital language diversity}, URL = {https://publications.cnr.it/doc/382086}, CONFERENCE_NAME = {ROUNDTABLE DISCUSSION-Linguapax-I. Generating contexts for linguistic diversity to thrive: networks of linguistic, cultural and digital cooperation. First International Conference on Revitalization of Indigenous and Minoritized Languages}, CONFERENCE_DATE = {19-21/04/2017}, } @INPROCEEDINGS{WEINGART_2017_INPROCEEDINGS_WG_377381, AUTHOR = {Weingart, A. and Giovannetti, E.}, TITLE = {From canabo to Cannabis sativa L.: Modelling Diachronic Termino-ontological Resources in the Context of DiTMAO}, YEAR = {2017}, ABSTRACT = {The paper aims at contributing to the understanding of the Medieval Brain from a knowledge engineering perspective. As the brain is conceived as locus of cognition and knowledge in medieval medicine, we want to focus on the representation of medieval medico-botanical knowledge by means of a three-level text-termino-ontological resource. The resource is based on lemon (a model for the representation of lexica as RDF) and a set of ontologies represented in OWL and consists of (i) a medieval termino-ontological resource, (ii) a modern termino-ontological resource and (iii) a documentation corpus. It is developed and implemented within the DFG-funded project "Dictionnaire de Termes Médico-botaniques de l'Ancien Occitan" (DiTMAO). In an introductory part we will briefly introduce the aims of the project and the particularities of the corpus. The corpus consists of medical monographs in Latin script but also of so-called synonym lists in Hebrew script. These lists can be described as ancient multilingual dictionaries, in our case of Old Occitan, (Judaeo-)Arabic, Hebrew, Latin or other Romance languages and sometimes Greek and Aramaic, and they are of particular importance because the equivalent terms in other ancient languages help to determine the meaning of otherwise opaque Old Occitan terms. After introducing the three components of the resource, given some examples from our corpus, we elaborate, in the main part of paper, a solution to the problem of representing the relation between medieval medical terms or concepts and their ancient and modern correspondences. The problem concerns in particular the process of determining the terms´ meaning through the documentation of each term in corpus-external dictionaries and editions, and how this process can be represented in a comprehensive and transparent way. We propose to relate the terms of the medieval termino-ontological resource at a lexical level using relations such as synonymy or sublemma. The connections between modern and medieval terms, like a translation into modern English, will be mediated by the documentation corpus. As for modern scientific terms, we opt for an ontological connection. These types of connections will be exemplified with (mainly) plant names from our corpus. For example, a medieval term has a referent in the medieval ontology, structuring the botanic world as conceived by a "medieval brain" by giving the medieval classifications e.g. the primary qualities. This ontological entity is related to a referent of a modern scientific name, given that the documentation of the medieval term provides such information. We will argue that a clear separation of datasets (medieval and modern) allows for the diachronic study of the evolution of terminology and, more importantly for this context, paves the way for the analysis of the changes in the cognitive representation of what those terms actually refer to.}, KEYWORDS = {lexica, multi-language lexica, termino-ontological resource, ancient occitan}, URL = {https://themedievalbrain.wordpress.com/}, CONFERENCE_NAME = {The Medieval Brain Conference}, CONFERENCE_PLACE = {University of York}, CONFERENCE_DATE = {09-11/03/2017}, } @TECHREPORT{CARLINO_2017_TECHREPORT_C_483691, AUTHOR = {Carlino, M.}, TITLE = {Rapporto annuale 2016 del CNR-ILC}, YEAR = {2017}, ABSTRACT = {Rapporto Annuale 2016 del Cnr-Istituto di Linguistica Computazionale "Antonio Zampolli" (CNR-ILC)}, KEYWORDS = {CNR-ILC, ILC, Annual Report, Rapporto Annuale, Istituto di Linguistica Computazionale, Zampolli, Activity report}, PAGES = {1-54}, URL = {https://publications.cnr.it/doc/483691}, } @TECHREPORT{CININI_2017_TECHREPORT_CCS_382931, AUTHOR = {Cinini, A. and Cucurullo, S. and Sassolini, E.}, TITLE = {Rapporto Tecnico: Standardizzazione del corpus testuale del PRIN Crusca}, YEAR = {2017}, ABSTRACT = {Attività previste nella convenzione operativa tra ILC-CNR e Accademia della Crusca che riguardano la progettazione e lo sviluppo di una piattaforma Web modulare per l'archiviazione, la gestione e l'interrogazione di corpora testuali in lingua italiana, con funzionalità derivate dal DBT (Data Base Testuale) nelle sue diverse implementazioni.Il lavoro preliminare svolto riguarda anche la normalizzazione dei testi e la conversione nello standard di rappresentazione XML TEI.}, KEYWORDS = {Codifica dei testi, Analisi testuale, formato XML TEI}, PAGES = {1-21}, URL = {https://publications.cnr.it/doc/382931}, } @TECHREPORT{RUSSO_2017_TECHREPORT_RS_382302, AUTHOR = {Russo, I. and Soria, C.}, TITLE = {Sardinian-a digital language?}, YEAR = {2017}, ABSTRACT = {In this report we present the results of the first survey about the actual needs of Sardinian speakers in terms of digital opportunities}, KEYWORDS = {digital use, digital language diversity, Sardinian}, URL = {http://www.dldp.eu/sites/default/files/documents/DLDP_Sardinian-Report.pdf}, } @TECHREPORT{SASSOLINI_2017_TECHREPORT_SC_383394, AUTHOR = {Sassolini, E. and Cinini, A.}, TITLE = {DIGESTO: NUOVE FUNZIONALITÀ E SITO WEB}, YEAR = {2017}, ABSTRACT = {Realizzazione di un nuovo sito web per la consultazione dei testi bilingui, con sviluppo di nuove funzionalità di ricerca, non più solo per parola ma anche per Titolo, Frammento o Paragrafo. Realizzazione di una versione PDF scaricabile di parti ragionate dell'intero corpus}, KEYWORDS = {testi paralleli, sito web, analisi testuale, visual analytics}, PAGES = {1-12}, URL = {https://publications.cnr.it/doc/383394}, } @MISC{ALBANESI_2017_MISC_ABBCDGCP_427294, AUTHOR = {Albanesi, D. and Bellandi, A. and Bulleri, F. and Carniani, E. and Dattilo, D. and Giovannetti, E. and Colombo, M. and Papini, M.}, TITLE = {Traduco}, YEAR = {2017}, ABSTRACT = {il Progetto Traduzione del Talmud Babilonese ha permesso di allestire una "officina digitale" specializzata nella traduzione di testi di particolare complessità interpretativa, quali, appunto, il Talmud Babilonese. Per la realizzazione della complessa opera di traduzione è stata costituita una équipe multidisciplinare che ha coinvolto traduttori, revisori di contenuto, redattori, curatori e grafici che, attraverso la piattaforma digitale Traduco hanno potuto lavorare congiuntamente sul testo da diversi luoghi del mondo. Traduco è uno strumento web collaborativo per la traduzione assistita di testi, per lo sviluppo del quale sono state condotte ricerche specifiche, sia nell'ambito dell'Ingegneria del Software che in quello della Linguistica Computazionale. Attraverso il lavoro di ricerca, è stato possibile mettere a punto un ambiente "intelligente" di supporto alla traduzione con caratteristiche innovative. Traduco ha consentito, come side effect positivo del processo vero e proprio di traduzione, di produrre innovazione tecnologica e scientifica.}, KEYWORDS = {traduzione collaborativa, traduzione assistita dal calcolatore, linguistica computazionale, traduco, talmud babilonese}, URL = {https://publications.cnr.it/doc/427294}, } @MISC{CARRARA_2017_MISC_CCCDFVT_429823, AUTHOR = {Carrara, F. and Cimino, A. and Cresci, S. and Dell'Orletta, F. and Falchi, F. and Vadicamo, L. and Tesconi, M.}, TITLE = {T4SA: Twitter for Sentiment Analysis}, YEAR = {2017}, ABSTRACT = {T4SA is intended for training and testing image sentiment analysis approaches. It contains little less than a million tweets, corresponding to about 1.5M images. We initially collected about 3.4M tweets corresponding to about 4M images. We classified the sentiment polarity of the texts (as described in Section 4) and we selected the tweets having the most confident textual sentiment predictions to build our Twitter for Sentiment Analysis (T4SA) dataset. The dataset is publicly available at: http://www.t4sa.it/}, KEYWORDS = {social media, sentiment analysis, image analysis, image sentiment analysis, deep learning, multimedia sentiment analysis, dataset, tweets}, URL = {http://www.t4sa.it/}, } @MISC{DELGROSSO_2017_MISC_D_390356, AUTHOR = {Del Grosso, A. M.}, TITLE = {Annotazioni collaborative di testi storici}, YEAR = {2017}, ABSTRACT = {Con l'avvento delle tecnologie e degli strumenti digitali, l'annotazione di testi (storici) è divenuta ancora più importante ed essenziale nel lavoro di strutturazione, di analisi e di comprensione dei fenomeni testuali. Inoltre, considerando gli aspetti più filologici, la realizzazione di una edizione digitale semanticamente connotata determina il bisogno di arricchire il testo con informazioni di varia granularità, di varia natura e di vario tipo. Questo intervento presenterà un approccio all'annotazione che tragga vantaggio dal Web semantico e dalle sue tecnologie. Le risorse testuali verranno caratterizzate da una forte interconnessione con risorse esterne favorendo di fatto la costruzione e l'interrogazione di un'unica banca dati globale, condivisa e formalmente descritta. In concreto, verrà presentato come scrivere annotazioni in RDF, utilizzando strumenti di annotazione disponibili sul web: a) il Text Encoder and Annotator (TEA); b) Annotarium. Il primo orientato alla trascrizione e all'annotazione delle risorse testuali, il secondo orientato alla gestione e alla interrogazione full-text e concettuale delle risorse annotate.}, KEYWORDS = {Digital Humanities, tools, software engineering, semantic web, digital edition}, URL = {https://digitaltools.labcd.unipi.it/past-editions/resources2017/#DelGrosso}, } @MISC{DELGROSSO_2017_MISC_D_390782, AUTHOR = {Del Grosso, A. M.}, TITLE = {Modelli Concettuali e Architetture orientate agli oggetti per la Progettazione e lo Sviluppo di una Digital Scholarly Platform}, YEAR = {2017}, ABSTRACT = {L'intervento affronta temi legati alla progettazione concettuale e alla definizione di architetture software object-oriented per lo sviluppo di una Digital Scholarly Platform}, KEYWORDS = {digital philology, computational philology, software engineering}, URL = {https://publications.cnr.it/doc/390782}, } @MISC{DELGROSSO_2017_MISC_D_390783, AUTHOR = {Del Grosso, A. M.}, TITLE = {Tools for Digital Textual Scholarship}, YEAR = {2017}, ABSTRACT = {la presentazione illustra le architetture software per la realizzazione di biblioteche digitali e archivi digitali, con particolare riguardo agli aspetti di studio filologico del testo}, KEYWORDS = {computational philology, digital philology, digital humanities}, URL = {https://publications.cnr.it/doc/390783}, } @MISC{DELGROSSO_2017_MISC_D_390784, AUTHOR = {Del Grosso, A. M.}, TITLE = {Strumenti software per lo studio e l'analisi di risorse testuali}, YEAR = {2017}, ABSTRACT = {Il contributo introduce i principali strumenti digitali per l'analisi dei testi letterari. Inoltre saranno illustrati gli strumenti sviluppati dal gruppo di Literary Computing dell'Istituto}, KEYWORDS = {digital humanities, digital philology, software, software engineering}, URL = {https://publications.cnr.it/doc/390784}, } @MISC{DELGROSSO_2017_MISC_D_484913, AUTHOR = {Del Grosso, A. M.}, TITLE = {Introduzione alle tecnologie digitali per la redazione e la pubblicazione di contenuti Web}, YEAR = {2017}, ABSTRACT = {Introduzione alle tecnologie digitali per la redazione e la pubblicazione di contenuti Web}, KEYWORDS = {tecnologie web, html, css}, URL = {https://publications.cnr.it/doc/484913}, } @MISC{DELGROSSO_2017_MISC_DM_390360, AUTHOR = {Del Grosso, A. M. and Marchi, S.}, TITLE = {Clavius on The Web search framework}, YEAR = {2017}, ABSTRACT = {Search engine and restful API developed within the Clavius On the Web project.}, KEYWORDS = {digital humanities, computational philology, software engineering, search engine}, URL = {https://github.com/literarycomputinglab/ClaviusSearch}, } @MISC{GIOVANNETTI_2017_MISC_GD_390396, AUTHOR = {Giovannetti, E. and Del Grosso, A. M.}, TITLE = {LicoLab@LabexObvil}, YEAR = {2017}, ABSTRACT = {This talk illustrates some aspects of our research activities, specifically some outcomes carried out during designing and developing a digital environment for textual scholarship.}, KEYWORDS = {digital humanities, computational philology, software engineering}, URL = {https://publications.cnr.it/doc/390396}, } @MISC{ZAMORANI_2017_MISC_Z_383456, AUTHOR = {Zamorani, N.}, TITLE = {Featured Linguist: Nicoletta Calzolari}, YEAR = {2017}, ABSTRACT = {The LINGUIST List Official LINGUIST List Blog: Featured Linguist: Nicoletta Calzolari Posted on April 7, 2017 by Clare Harshey We are proud to share with our readers the next featured linguist of our 2017 Fund Drive: Nicoletta Calzolari. We hope that you enjoy reading Dr. Calzolari's thoughts on her long and varied career as a computational linguist.}, KEYWORDS = {Computational Linguistics, Nicoletta Calzolari}, PAGES = {11}, URL = {https://blog.linguistlist.org/fund-drive/featured-linguist-nicoletta-calzolari/}, } @ARTICLE{BELLANDI_2016_ARTICLE_BABG_364945, AUTHOR = {Bellandi, A. and Albanesi, D. and Benotto, G. and Giovannetti, E.}, TITLE = {Il Sistema Traduco nel Progetto Traduzione del Talmud Babilonese}, YEAR = {2016}, ABSTRACT = {Nell'ambito del Progetto Traduzione del Talmud Babilonese, l'Istituto di Linguistica Computazionale del CNR ha sviluppato Traduco, uno strumento web collaborativo con alcune caratteristiche che lo rendono particolarmente adatto alla traduzione di testi che pongono problemi interpretativi. Ad oggi, gli strumenti per la traduzione assistita (in inglese, Computer-Assisted Translation, o CAT) sono utilizzati tipicamente per la traduzione di manuali tecnici, testi legislativi o siti Web e hanno principalmente lo scopo di accelerare il processo di traduzione. Traduco riprende la maggior parte dei componenti standard di uno strumento di traduzione assistita tradizionale, ma li estende con caratteristiche specifiche necessarie per supportare l'interpretazione e la traduzione di testi complessi che pongono particolari problemi di comprensione. In questo articolo presenteremo un caso di studio specifico, relativo a un testo con queste caratteristiche: il Talmud Babilonese. Traduco include funzionalità per l'aggiunta di note, riferimenti bibliografici, annotazioni semantiche e creazione di glossari. Traduttori, revisori, redattori, supervisori e utenti finali che accedono al Sistema sono supportati nell'intero processo di traduzione, che va dall'interpretazione del testo originario alla fase ditoriale per la stampa delle traduzioni, attraverso l'uso di tecnologie di traduzione assistita, l'annotazione semantica del testo, l'arricchimento delle traduzioni con informazioni esplicative, l'esportazione delle traduzioni in XML e in TEI e l'integrazione di tecniche per il trattamento automatico della lingua. La progettazione e lo sviluppo di Traduco ha richiesto l'adozione di un approccio multidisciplinare che combina aspetti di ingegneria del software, linguistica computazionale, ingegneria della conoscenza ed editoria digitale.}, KEYWORDS = {traduco, traduzione-assistita, talmud}, PAGES = {109-126}, URL = {https://journals.openedition.org/ijcol/404}, VOLUME = {2}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{BOSCHETTI_2016_ARTICLE_BB_382026, AUTHOR = {Boschetti, F. and Buzzoni, M.}, TITLE = {Cronache-Edizioni digitali: Rappresentazione, Interoperabilità, Analisi del testo e Infrastrutture (Venezia, 7-9 settembre 2016)}, YEAR = {2016}, ABSTRACT = {Cronaca del quinto convegno annuale dell'Associazione di Informatica Umanistica e Cultura Digitale (AIUCD), che si è svolto dal 7 al 9 settembre 2016 presso l'Aula Magna di Ca' Dolfin dell'Università Ca' Foscari di Venezia e ha avuto per tema: "Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture".}, KEYWORDS = {convegno AIUCD}, PAGES = {269-274}, URL = {http://bit.ly/2CW0BOV}, VOLUME = {13}, PUBLISHER = {Carrocci (Roma, Italia)}, ISSN = {1825-5361}, JOURNAL = {Ecdotica (Roma)}, } @ARTICLE{BRUNATO_2016_ARTICLE_BD_366755, AUTHOR = {Brunato, D. and Dell'Orletta, F.}, TITLE = {ISACCO: a corpus for investigating spoken and written language development in Italian school-age children}, YEAR = {2016}, ABSTRACT = {In this paper we present ISACCO (Italian School-Age Children COrpus), a corpus of oral and written retellings of Italian-speaking children attending primary school. All texts were digitalized and automatically enriched with multi-level linguistic annotation. Preliminary explorations of both the form and the content of children's productions were carried out based on a set of features automatically extracted by NLP tools. Written retellings were manually annotated with a typology of errors belonging to three different linguistic levels. The resource, which has been made publicly available1, is conceived to support research and computational modeling of "later language acquisition", with an emphasis on comparative assessment of the evolution of oral and written language competencies in early school grades.}, KEYWORDS = {Child language acquisition, Oral and Written language, multi-level linguistic analysis}, PAGES = {63-76}, URL = {http://www.italianlp.it/wp-content/uploads/2016/09/04_brunato_dell-orletta.pdf}, VOLUME = {2}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{BRUNATO_2016_ARTICLE_BDMV_385220, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Monitoraggio linguistico di Scritture Brevi: aspetti metodologici e primi risultati}, YEAR = {2016}, ABSTRACT = {Se da un lato le tecnologie del linguaggio svolgono un ruolo ormai indiscusso per l'accesso al contenuto testuale, ciò non appare scontato quando si va a considerare il loro ruolo nella valutazione delle strutture linguistiche sottostanti al testo. Questo contributo si focalizza sulla definizione di una metodologia innovativa di monitoraggio linguistico della lingua italiana che a partire dall'output di strumenti di annotazione linguistica automatica permette di ricostruire un profilo linguistico di una collezione di testi rappresentativa di una specifica varietà d'uso della lingua. Tale metodologia è stata applicata a un corpus di tweet allo scopo di far luce su interrogativi aperti quali la possibilità di rintracciare tendenze lessicali, morfo-sintattiche e sintattiche peculiari all'interno di questa tipologia testuale; di studiare come queste tendenze si rapportino ai tratti caratterizzanti della lingua scritta e parlata; di individuare possibili differenze nella forma linguistica in cui si twittano contenuti di natura diversa.}, KEYWORDS = {Trattamento Automatico del Linguaggio, Monitoraggio Linguistico, Varietà d'Uso della Lingua, Lingua del Web}, PAGES = {149-176}, URL = {https://publications.cnr.it/doc/385220}, VOLUME = {N. S. 5}, PUBLISHER = {Università degli Studi di Napoli "L'Orientale" (Napoli, Italia)}, ISSN = {1825-2796}, JOURNAL = {Quaderni Aion}, } @ARTICLE{FRONTINI_2016_ARTICLE_FBRJJ_357604, AUTHOR = {Frontini, F. and Brando, C. and Riguet, M. and Jacquot, C. and Jolivet, V.}, TITLE = {Annotation of Toponyms in TEI Digital Literary Editions and Linking to the Web of Data}, YEAR = {2016}, ABSTRACT = {This paper aims to discuss the challenges and benefits of the annotation of place names in literary texts and literary criticism. We shall first highlight the problems of encoding spatial information in digital editions using the TEI format by means of two manual annotation experiments and the discussion of various cases. This will lead to the question of how to use existing semantic web resources to complement and en-rich toponym mark-up, in particular to provide mentions with precise geo-referencing. Finally the automatic annotation of a large corpus will show the potential of visualizing places from texts, by illustrating an analysis of the evolution of literary life from the spatial and geographical point of view.}, KEYWORDS = {digital literary studies toponyms semantic web geographic databases maps and visualizations}, PAGES = {49-75}, URL = {http://dx.doi.org/10.14195/2182-8830_4-2_3}, VOLUME = {4}, DOI = {10.14195/2182-8830_4-2_3}, ISSN = {2182-8830}, JOURNAL = {MATLIT: Materialidades da Literatura}, } @ARTICLE{FRONTINI_2016_ARTICLE_FCG_357602, AUTHOR = {Frontini, F. and Carmen, B. and Ganascia, J. G.}, TITLE = {REDEN: Named Entity Linking in Digital Literary Editions Using Linked Data Sets}, YEAR = {2016}, ABSTRACT = {This paper proposes a graph-based Named Entity Linking (NEL) algorithm named REDEN for the disambiguation of authors' names in French literary criticism texts and scientific essays from the 19th and early 20th centuries. The algorithm is described and evaluated according to the two phases of NEL as reported in current state of the art, namely, candidate retrieval and candidate selection. REDEN leverages knowledge from different Linked Data sources in order to select candidates for each author mention, subsequently crawls data from other Linked Data sets using equivalence links (e.g., owl:sameAs), and, finally, fuses graphs of homologous individuals into a non-redundant graph well-suited for graph centrality calculation; the resulting graph is used for choosing the best referent. The REDEN algorithm is distributed in open-source and follows current standards in digital editions (TEI) and semantic Web (RDF). Its integration into an editorial workflow of digital editions in Digital humanities and cultural heritage projects is entirely plausible. Experiments are conducted along with the corresponding error analysis in order to test our approach and to help us to study the weaknesses and strengths of our algorithm, thereby to further improvements of REDEN.}, KEYWORDS = {Named Entity Linking, graph centrality, linked data, data fusion, digital humanities}, PAGES = {60-80}, URL = {https://csimq-journals.rtu.lv/article/view/csimq.2016-7.04}, VOLUME = {7}, DOI = {10.7250/csimq.2016-7.04}, ISSN = {2255-9922}, JOURNAL = {Complex Systems Informatics and Modeling Quarterly}, } @ARTICLE{GOGGI_2016_ARTICLE_GPBFMMDB_359144, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2016}, ABSTRACT = {Here we present the final results of the MAPS (Marine Planning and Service Platform) project, an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. The system takes as input non-textual data (measurements) and text - both published papers and documentation - and it provides an advanced search facility thanks to the rich set of metadata and, above all, to the possibility of a refined and domain targeted key-word indexing of texts using Natural Language Processing (NLP) techniques. The paper describes the system in its details providing also evidence of evaluation.}, KEYWORDS = {Information Extraction, Search Engine, Operative Oceanography}, PAGES = {155-161}, URL = {http://www.greynet.org/thegreyjournal/currentissue.html}, VOLUME = {12}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{LENCI_2016_ARTICLE_LLMM_367820, AUTHOR = {Lenci, A. and Labanca, N. and Marazzini, C. and Montemagni, S.}, TITLE = {Voci della Grande Guerra: An Annotated Corpus of Italian Texts on World War I}, YEAR = {2016}, ABSTRACT = {Voci della Grande Guerra (Voices of the Great War) is a scientific and cultural initiative with the aim of preserving and promoting the memory of Italy in World War I through the creation of a corpus of digital texts selected by historians and linguists in order to be representative of the different ways to experience and describe the Italian war by its protagonists. With the help of advanced techniques of computational linguistics, semantic web and information visualization, the digitized historical materials will be explored with an online interface to enable easy but effective and innovative search modalities. The project will allow experts as well as non-experts to become acquainted with "linguistic polyphony" of Italy during World War I.}, KEYWORDS = {Great War, World War, digital texts, corpus, Italian, Voci della Grande Guerra, Voices of the Great War}, PAGES = {101-108}, URL = {http://www.ai-lc.it/IJCoL/v2n2/6-lenci_et_al.pdf}, VOLUME = {2}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{MARZI_2016_ARTICLE_MFCP_360723, AUTHOR = {Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {Effects of frequency and regularity in an integrative model of word storage and processing}, YEAR = {2016}, ABSTRACT = {Considerable evidence has accrued on the role of paradigms as both theoretical and cognitive structures regimenting the way words are processed and acquired. The evidence supports a view of the lexicon as an emergent integrative system, where word forms are concurrently and competitively stored as repeatedly successful processing patterns, and on-line processing crucially depends on the internal organisation of stored patterns.}, KEYWORDS = {Lexical access, word recall, serial processing, parallel activation, inflectional paradigms, mental lexicon}, PAGES = {79-114}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84986550295\&origin=inward}, VOLUME = {28}, PUBLISHER = {Pacini (Ospedaletto, Italia)}, ISSN = {1120-2726}, JOURNAL = {Rivista di Linguistica}, } @ARTICLE{MONACHINI_2016_ARTICLE_MF_373630, AUTHOR = {Monachini, M. and Frontini, F.}, TITLE = {CLARIN, l'infrastruttura europea delle risorse linguistiche per le scienze umane e sociali e il suo network italiano CLARIN-IT}, YEAR = {2016}, ABSTRACT = {ll 1°ottobre 2015 il MIUR firma l'adesione dell'Italia a CLARIN-ERIC, l'infrastruttura di ricerca che offre risorse e tecnologie linguistiche dedicate al settore delle scienze del linguaggio e delle scienze umane e sociali. Questo articolo intende fornire alla comunità italiana una ampia panoramica di CLARIN, la sua missione, i suoi pilastri, i servizi, la sua organizzazione tecnica ed amministrativa e la struttura di governance, sia a livello europeo che locale. Viene introdotto il network italiano, con il primo centro nazionale ILC4CLARIN, ospitato ed in via di sviluppo presso l'ILC-CNR, le funzionalità, le risorse ed i servizi offerti; viene presentato infine il primo nucleo del consorzio nazionale CLARIN-IT, illustrando i criteri di costituzione, le attività previste e le prospettive future.}, KEYWORDS = {Infrastrutture di ricerca, Tecnologie linguistiche, Network italiano CLARIN-IT}, PAGES = {1-30}, URL = {http://www.ai-lc.it/IJCoL/v2n2/1-monachini_and_frontini.pdf}, VOLUME = {Vol. 2}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{MUGELLI_2016_ARTICLE_MBDDKT_364960, AUTHOR = {Mugelli, G. and Boschetti, F. and Del Gratta, R. and Del Grosso, A. M. and Khan, F. and Taddei, A.}, TITLE = {A user-centred design to annotate ritual facts in ancient greek tragedies}, YEAR = {2016}, ABSTRACT = {Euporia is an annotation system developed with a user-centred approach for the study of ritual and religion in ancient Greek tragedy. Euporia adopts a domain specific language (DSL) and a lightweight web user interface in order to offer digital support to an anthropological study of ancient Greek tragedy that compares ritual as it is performed or described in Greek tragedy with ancient ritual as it can be reconstructed from literary, archaeological, and epigraphic sources. The case study discussed in this paper (Aesch. Ag 67-71) shows one of the main features of Euporia: the ability to annotate different readings and different interpretations of the text and their consequences in the reconstruction of ancient Greek ritual.}, KEYWORDS = {Digital Philology, Digital Humanities, Digital Classicist, Computational philology, Computational Linguistics}, PAGES = {103-120}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85007489227\&origin=inward}, VOLUME = {59}, DOI = {10.1111/j.2041-5370.2016.12041.x}, PUBLISHER = {Institute of Classical Studies, University of London (London, Regno Unito)}, ISSN = {0076-0730}, JOURNAL = {Bulletin-University of London. Institute of Classical Studies}, } @ARTICLE{REHM_2016_ARTICLE_RUABBBBBCDGGGVHHJKKKLMMMMMMOOPPPRRPSDTTTVVVZ_355592, AUTHOR = {Rehm, G. and Uszkoreit, H. and Ananiadou, S. and Bel, N. and Bieleviciene, A. and Borin, L. and Branco, A. and Budin, G. and Calzolari, N. and Daelemans, W. and Garabik, R. and Grobelnik, M. and Garcia Mateo, C. and Van Genabith, J. and Hajic, J. and Hernaez, I. and Judge, J. and Koeva, S. and Krek, S. and Krstev, C. and Linden, K. and Magnini, B. and Mariani, J. and McNaught, J. and Melero, M. and Monachini, M. and Moreno, A. and Odijk, J. and Ogrodniczuk, M. and Pezik, P. and Piperidis, S. and Przepiorkowski, A. and Rognvaldsson, E. and Rosner, M. and Pedersen, B. S. and Skadina, I. and De Smedt, K. and Tadic, M. and Thompson, P. and Tufis, D. and Varadi, T. and Vasiljevs, A. and Vider, K. and Zabarskaite, J.}, TITLE = {The strategic impact of META-NET on the regional, national and international level}, YEAR = {2016}, ABSTRACT = {This article provides an overview of the dissemination work carried out in META-NET from 2010 until 2015; we describe its impact on the regional, national and international level, mainly with regard to politics and the funding situation for LT topics. The article documents the initiative's work throughout Europe in order to boost progress and innovation in our field.}, KEYWORDS = {Language technology, Multilingual technologies, Machine translation, Language resources, META-NET, META-SHARE}, PAGES = {351-374}, URL = {http://link.springer.com/article/10.1007/s10579-015-9333-4}, VOLUME = {50}, DOI = {10.1007/s10579-015-9333-4}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{RHEM_2016_ARTICLE_RUCM_344298, AUTHOR = {Rhem, G. and Uzkoreit, H. and Calzolari, N. and Monachini, M.}, TITLE = {The strategic impact of META-NET on the regional, national and international level}, YEAR = {2016}, ABSTRACT = {This article provides an overview of the dissemination work carried out in META-NET from 2010 until 2015; we describe its impact on the regional, national and international level, mainly with regard to politics and the funding situation for LT topics. The article documents the initiative's work throughout Europe in order to boost progress and innovation in our field.}, KEYWORDS = {Language technology, Multilingual technologies, Machine translation, Language resources, META-NET, META-SHARE}, PAGES = {26}, URL = {http://www.springer.com/home?SGWID=0-0-1003-0-0\&aqId=2981193\&download=1\&checkval=6c0c2a6da36ef097f2a5e48a49f794e4}, DOI = {10.1007/s10579-015-9333-4}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @INCOLLECTION{BRUNATO_2016_INCOLLECTION_BV_366759, AUTHOR = {Brunato, D. and Venturi, G.}, TITLE = {Le tecnologie linguistico-computazionali per la leggibilità della comunicazione istituzionale}, YEAR = {2016}, ABSTRACT = {Il contributo illustra il ruolo delle tecnologie linguistico-computazionali per la valutazione automatica della leggibilità dei testi della comunicazione istituzionale e propone alcuni esempi di semplificazione semi-automatica di testi amministrativi e normativi.}, KEYWORDS = {tecnologie linguistico-computazionali, valutazione automatica della leggibilità, comunicazione istituzionale}, PAGES = {119-157}, URL = {https://publications.cnr.it/doc/366759}, PUBLISHER = {Pisa University Press (Pisa, ITA)}, ISBN = {978-88-6741-627-1}, } @INCOLLECTION{DELGRATTA_2016_INCOLLECTION_DBDKM_353799, AUTHOR = {Del Gratta, R. and Boschetti, F. and Del Grosso, A. and Khan, F. and Monachini, M.}, TITLE = {Cooperative philology on the way to web services: The case of the cophiwordnet platform}, YEAR = {2016}, ABSTRACT = {In this paper we present ongoing research carried out at the Institute for Computational Linguistics "A. Zampolli" (ILC) in Pisa. The institute has been active since many years in the field of Digital Humanities providing resources, tools and solutions to address issues of the to digital humanists. Starting from those previous initiatives, we show how to re-engineer them as Web Services in order to make connections between lexicons, semantic resources and a fine grained text management. Linked Open Data is chosen as the paradigm used to link the different resources as well as the modality of data presentation.}, KEYWORDS = {Canonical text services, Cooperative philology, Linked open data, Web services}, PAGES = {173-187}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84961744725\&partnerID=q2rCbXpz}, VOLUME = {9442}, DOI = {10.1007/978-3-319-31468-6_13}, PUBLISHER = {Springer International Publishing (Switzerland, CHE)}, ISBN = {978-3-319-31468-6}, BOOKTITLE = {Worldwide Language Service Infrastructure: Second International Workshop, WLSI 2015, Kyoto, Japan, January 22-23, 2015. Revised Selected Papers}, EDITOR = {Murakami, Y. and Li, D.}, } @INCOLLECTION{FRONTINI_2016_INCOLLECTION_FDM_357638, AUTHOR = {Frontini, F. and Del Gratta, R. and Monachini, M.}, TITLE = {GeoDomainWordNet: Linking the Geonames Ontology to WordNet}, YEAR = {2016}, ABSTRACT = {This paper illustrates the transformation of GeoNames' ontology concepts, with their English labels and glosses, into a GeoDomain WordNet-like resource in English, its translation into Italian, and its linking to the existing generic WordNets of both languages. The paper describes the criteria used for the linking of domain synsets to each other and to the generic ones and presents the published resource in RDF according to the w3c and lemon schema.}, KEYWORDS = {GeoNames, WordNet, Language resources, Lexicons, Linguistic linked data, lemon, RDF}, PAGES = {229-242}, URL = {http://link.springer.com/chapter/10.1007/978-3-319-43808-5_18}, VOLUME = {9561}, DOI = {10.1007/978-3-319-43808-5}, ISBN = {978-3-319-43808-5}, BOOKTITLE = {Human Language Technology. Challenges for Computer Science and Linguistics}, EDITOR = {Vetulani, Z. and Uszkoreit, H. and Kubis, M.}, } @INCOLLECTION{MONTEMAGNI_2016_INCOLLECTION_MW_367809, AUTHOR = {Montemagni, S. and Wieling, M.}, TITLE = {Tracking linguistic features underlying lexical variation patterns: A case study on Tuscan dialects}, YEAR = {2016}, ABSTRACT = {In this paper, we illustrate the application of hierarchical spectral partitioning of bipartite graphs in the study of lexical variation in Tuscany based on the data from a regional linguistic atlas. This method makes it possible not only to identify existing patterns of lexical variation in Tuscany, but also to uncover the underlying lexical features in terms of the most characteristic concept-lexicalization pairs. The results are promising, demonstrating the potential of the method for tracking the linguistic features underlying identified patterns of lexical variation and change across generations.}, KEYWORDS = {tuscan, dialects, lexical variation, linguistic atlas}, PAGES = {117-135}, URL = {http://langsci-press.org/catalog/view/81/146/376-1}, VOLUME = {1}, DOI = {10.17169/langsci.b81.146}, PUBLISHER = {Language Science Press (Berlin, DEU)}, BOOKTITLE = {The future of dialects}, EDITOR = {Côté, M. and Knooihuizen, R. and Nerbonne, J.}, } @INCOLLECTION{QUOCHI_2016_INCOLLECTION_Q_358123, AUTHOR = {Quochi, V.}, TITLE = {Development and representation of Italian light-fare constructions}, YEAR = {2016}, ABSTRACT = {The essay describes the study of the development and use of light fare 'do' constructions in Child-directed Speech and in Child Language with the twofold goal of showing that a Construction Grammar approach is viable, and of providing support to usage-based, functional predictions on language acquisition. The analysis of naturalistic data derived from the CHILDES database lead to two main findings: first, a representation of fare Light Verb Constructions as a family of constructions organized like a radial category is not only possible but more explicative, second, there exists a 'fare' pivot schema that children generalize at an early stage because it serves the purpose of naming new events, activities or situations.}, KEYWORDS = {Corpus linguistics Language Acquisition Construction Grammar, phraseology}, PAGES = {39-64}, URL = {https://benjamins.com/#catalog/books/cal.19.03quo/details}, VOLUME = {19}, DOI = {10.1075/cal.19.03quo}, PUBLISHER = {John Benjamins Publishing Company (Amsterdam/Philadelphia, USA)}, ISBN = {9789027204417}, BOOKTITLE = {Corpus-based Approaches to Construction Grammar}, EDITOR = {Yoon, J. and Th Gries, S.}, } @INCOLLECTION{WEINGART_2016_INCOLLECTION_WG_364950, AUTHOR = {Weingart, A. and Giovannetti, E.}, TITLE = {Extending the Lemon Model for a Dictionary of Old Occitan Medico-Botanical Terminology}, YEAR = {2016}, ABSTRACT = {The article presents the adaptation of the lemon model (a model for lexica as RDF data) for a multilingual and multi- alphabetical lexicon of Old Occitan medico-botanical terminology. The lexicon is the core component of an ontology-based information system that will be constructed and implemented within the DFG-funded project "Dictionnaire des Termes Medico-botaniques de l'Ancien Occitan" (DiTMAO). The difficulties for the lemmatization raised by the particularities of the corpus (terms in Latin, Hebrew and Arabic script and corresponding terms in other ancient languages, mostly Hebrew and Arabic) can be perfectly solved by extending the basic properties of lemon and introducing domain specific vocabulary.}, KEYWORDS = {Lemon model, RDF, Multilingual, Multi-alphabetical, Historical lexicon, Medico-Botanical terminology, Old occitan, Hebrew, Arabic}, PAGES = {408-421}, URL = {http://link.springer.com/chapter/10.1007/978-3-319-47602-5_53}, VOLUME = {9989}, DOI = {10.1007/978-3-319-47602-5_53}, ISBN = {978-3-319-47601-8}, BOOKTITLE = {The Semantic Web}, EDITOR = {Sack, H. and Rizzo, G. and Steinmetz, N. and Mladenić, D. and Auer, S. and Lange, C.}, } @INCOLLECTION{WIELING_2016_INCOLLECTION_WM_367813, AUTHOR = {Wieling, M. and Montemagni, S.}, TITLE = {Infrequent forms: Noise or not?}, YEAR = {2016}, ABSTRACT = {In this study we ask the question whether simplifying the data in dialectometrical studies by removing infrequent forms is advantageous to uncovering the geographical structure in dialect data. By investigating lexical variation in a large corpus of Tuscan dialect data via hierarchical bipartite spectral graph partitioning, we are able to identify the main geographical areas together with their linguistic basis. In order to assess the influence of infrequent forms, we conduct two analyses: one which includes only lexical variants used by at least 0.5% of the informants, and another which includes all lexical variants in the data. Using this approach we show that using all data enables us to find a geographical characterization with a more adequate linguistic basis than by using the trimmed data.}, KEYWORDS = {dialectometrical studies, dialectology, dialect data, lexical variation, Tuscan}, PAGES = {215-224}, URL = {http://langsci-press.org/catalog/view/81/78/367-1}, VOLUME = {Language Variation 1}, DOI = {10.17169/langsci.b81.78}, PUBLISHER = {Language Science Press (Berlin, DEU)}, ISBN = {978-3-946234-18-0}, BOOKTITLE = {The Future of Dialects}, EDITOR = {Côté, M. and Knooihuizen, R. and Nerbonne, J.}, } @EDITORIAL{BRANCO_2016_EDITORIAL_BCC_367184, AUTHOR = {Branco, A. and Calzolari, N. and Choukri, K.}, TITLE = {4REAL Workshop: Workshop on Research Results Reproducibility and Resources Citation in Science and Technology of Language}, YEAR = {2016}, ABSTRACT = {This workshop seeks to foster the discussion and the advancement on a topic that has been so far given insufficient attention in the research area of language processing tools and resources (Branco, 2013, Fokkens et al., 2013) and that has been an important topic emerging in other scientific areas. That is the topic of the reproducibility of research results and the citation of resources, and its impact on research integrity.}, KEYWORDS = {Research Results Reproducibility, Resources Citation}, PAGES = {1-38}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, } @EDITORIAL{BRUNATO_2016_EDITORIAL_BDVFB_367760, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Venturi, G. and François, T. and Blache, P.}, TITLE = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC 2016)}, YEAR = {2016}, ABSTRACT = {Introduzione agli atti della prima edizione del workshop "Computational Linguistics for Linguistic Complexity" che raccoglie lavori che studiano da prospettive diverse il tema della complessità linguistica workshop allo scopo di promuovere una riflessione comune su approcci diversi all'indagine, al trattamento e alla valutazione di aspetti che rendono complessa la lingua.}, KEYWORDS = {Linguistic Complexity, Computational Linguistics}, PAGES = {1-245}, URL = {https://aclweb.org/anthology/W/W16/W16-41.pdf}, ISBN = {978-4-87974-709-9}, } @EDITORIAL{CALZOLARI_2016_EDITORIAL_CCDGGMMMMOP_355640, AUTHOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, TITLE = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, YEAR = {2016}, ABSTRACT = {Curatela dei 744 articoli presentati alla conferenza LREC2016.}, KEYWORDS = {Language Resources Evaluation}, PAGES = {1-4693}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, } @EDITORIAL{DISEGNI_2016_EDITORIAL_D_383159, AUTHOR = {Di Segni, D. G.}, TITLE = {Talmud Babilonese - Trattato Rosh haShanà}, YEAR = {2016}, ABSTRACT = {Traduzione in italiano del trattato Rosh haShanà del Talmud Babilonese}, KEYWORDS = {Talmud, Traduco, Linguistica computazionale}, PAGES = {365}, URL = {https://www.talmud.it/}, VOLUME = {5}, PUBLISHER = {Giuntina (Firenze, ITA)}, ISBN = {978-88-8057-628-0}, } @EDITORIAL{KHAN_2016_EDITORIAL_KVLFFPGU_355434, AUTHOR = {Khan, F. and Vintar, Š. and León Araúz, P. and Faber, P. and Frontini, F. and Parvizi, A. and Grčić Simeunović, L. and Unger, C.}, TITLE = {Language and Ontology (LangOnto2) & Terminology and Knowledge Structures (TermiKS)}, YEAR = {2016}, ABSTRACT = {This joint workshop brings together two different but closely related strands of research. On the one hand it looks at the overlap between ontologies and computational linguistics and on the other it explores the relationship between knowledge modelling and terminologies. In particular the workshop aims to create a forum for discussion in which the different relationships and commonalities between these two areas can be explored in detail, as well as presenting cutting edge research in each of the two individual areas. A significant amount of human knowledge can be found in texts. It is not surprising that languages such as OWL, which allow us to formally represent this knowledge, have become more and more popular both in linguistics and in automated language processing. For instance ontologies are now of core interest to many NLP fields including Machine Translation, Question Answering, Text Summarization, Information Retrieval, and Word Sense Disambiguation. At a more abstract level, however, ontologies can also help us to model and reason about phenomena in natural language semantics. In addition, ontologies and taxonomies can also be used in the organisation and formalisation of linguistically relevant categories such as those used in tagsets for corpus annotation. Notably also, the fact that formal ontologies are being increasingly accessed by users with limited to no background in formal logic has led to a growing interest in developing accessible front ends that allow for easy querying and summarisation of ontologies. It has also led to work in developing natural language interfaces for authoring ontologies and evaluating their design. Additionally in recent years there has been a renewed interest in the linguistic aspects of accessing, extracting, representing, modelling and transferring knowledge. Numerous tools for the automatic extraction of terms, term variants, knowledge-rich contexts, definitions, semantic relations and taxonomies from specialized corpora have been developed for a number of languages, and new theoretical approaches have emerged as potential frameworks for the study of specialized communication. However, the building of adequate knowledge models for practitioners (e.g. experts, researchers, translators, teachers etc.), on the one hand, and NLP applications (including cross-language, cross-domain, cross-device, multi-modal, multi-platform applications), on the other hand, still remains a challenge. The papers included in the workshop range across a wide variety of different areas and reflect the strong inter-disciplinary approach, which characterises both areas of research. In addition we are very happy to include two invited talks in the program presented by authorities in their respective fields: Pamela Faber from the field of terminology, and John McCrae, an expert on linguistic linked data and the interface between NLP and ontologies.}, KEYWORDS = {lexicons, ontologies}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, } @EDITORIAL{MARZI_2016_EDITORIAL_MP_360724, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Word knowledge and word usage: A Foreword}, YEAR = {2016}, ABSTRACT = {This special issue, together with its companion issue to appear in Lingue e Linguaggio, stems from the NetWordS Final Conference Word knowledge and word usage: representations and processes in the mental lexicon.* The conference, held on the 30th and 31st of March, and the 1st of April 2015 in Pisa, concluded the 4-year NetWordS project, the European Network of Word Structure funded by the European Science Foundation within the Research Networking Programme. In line with the highly multidisciplinary profile of NetWordS agenda, the conference offered a comprehensive and inclusive forum focussing on two main lines of lexical inquiry: (i) usage-based approaches to bootstrapping word form and structure (morpho-phonological and morpho-syntactic issues), including: acquisition of lexical categories, emergence of morphological structure, lexical memories, anticipatory prediction-based mechanisms of word recognition, word production, frequency-based models of lexical productivity, word encoding, models of lexical architecture, family-based effects in word processing, word reading and writing; (ii) usage-based approaches to word meanings (lexical semantics and pragmatics in morphologically simple and complex words), including: distributional semantics, compound interpretation, concept composition and coercion, conceptualization of perception and action, time and space in the lexicon, metonymy and metaphor, lexico-semantic relations, perceptual grounding and embodied cognition, context-based and encyclopedic knowledge, semantic association and categorization. The multidisciplinary focus on word knowledge and word usage promoted by the Conference led participants to openly discuss an impressive range of approaches and empirical data: priming and lexical decision in a number of contexts, distributional semantics and models of semantic composition, neural networks, machine learning and mathematical modelling of empirical evidence, as well as their neuro-biological and neuro-functional correlates.}, KEYWORDS = {word knowledge, word usage, mental lexicon, interdisciplinary approach, NetWordS}, PAGES = {3-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84986558643\&origin=inward}, VOLUME = {28. 1}, PUBLISHER = {Pacini (Pisa, ITA)}, } @EDITORIAL{MARZI_2016_EDITORIAL_MP_360725, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {Word knowledge and word usage: A foreword}, YEAR = {2016}, ABSTRACT = {This special issue, together with its companion issue to appear in Italian Journal of Linguistics, stems from the NetWordS Final Conference "Word knowledge and word usage: representations and processes in the mental lexicon". The conference, held on the 30th and 31st of March, and the 1st of April 2015 in Pisa, concluded the 4-year NetWordS project, the European Network of Word Structure funded by the European Science Foundation within the Research Networking Programme. In line with the highly multidisciplinary profile of NetWordS agenda, the conference offered a comprehensive and inclusive forum focussing on two main lines of lexical inquiry: (i) usage-based approaches to bootstrapping word form and structure (morpho-phonological and morpho-syntactic issues), including: acquisition of lexical categories, emergence of morphological structure, lexical memories, anticipatory prediction-based mechanisms of word recognition, word production, frequency-based models of lexical productivity, word encoding, models of lexical architecture, family-based effects in word processing, word reading and writing; (ii) usage-based approaches to word meanings (lexical semantics and pragmatics in morphologically simple and complex words), including: distributional semantics, compound interpretation, concept composition and coercion, conceptualization of perception and action, time and space in the lexicon, metonymy and metaphor, lexico-semantic relations, perceptual grounding and embodied cognition, context-based and encyclopedic knowledge, semantic association and categorization. The multidisciplinary focus on word knowledge and word usage promoted by the Conference led participants to openly discuss an impressive range of approaches and empirical data: priming and lexical decision in a number of contexts, distributional semantics and models of semantic composition, neural networks, machine learning and mathematical modelling of empirical evidence, as well as their neuro-biological and neuro-functional correlates. It is widely acknowledged that looking at the same problem from different angles has an additive effect on the impact of current language research. Certainly more can be achieved, however, if, rather than simply adding more perspectives on the same subject, with individual research efforts staying within the boundaries of single knowledge domains, scholars manage to integrate them into a boundary-shifting methodological perspective. When psycholinguistic evidence from humans is successfully replicated algorithmically through a computational model implementing a few well-understood principles of time-series processing, we are in a position to empirically assess what input conditions favour memorisation and acquisition of symbolic strings by the model, and test these algorithmic predictions back on human subjects, thus going full circle. This may have a multiplicative effect on current research, providing not only mathematical modelling of present behavioural evidence, but amounting to fully explanatory mechanisms. Our current understanding of WHERE and WHEN some cognitive processes are implemented in the brain will be complemented by knowledge of WHAT information they rely on and HOW they integrate it. Other compelling examples of the full potential of cross-disciplinary integration can be found in the present volume and in the twin issue of Italian Journal of Linguistics. As a general point, we contend that only by putting single-domain acquisitions into the wider context of human communication, and developing an interdisciplinary framework whereby each specialist will take advantage of insights from other disciplines, we can make substantial progress in our understanding of the lexical roots of human verbal communication in real contexts. The edited selection of papers presented here provides a representative sample of the range of approaches debated at the NetWordS Pisa Conference, by way of illustration of how aspects of knowledge integration and methodological innovation can be put at the service of a better understanding of broad lexical issues.}, KEYWORDS = {word knowledge, word usage, interdisciplinary approach, mental lexicon, NetWordS}, PAGES = {3-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84978285090\&origin=inward}, VOLUME = {XV. 1}, DOI = {10.1418/83651}, PUBLISHER = {Il Mulino (Bologna, ITA)}, ISBN = {978-88-15-26226-4}, } @EDITORIAL{MONTEMAGNI_2016_EDITORIAL_M_372004, AUTHOR = {Montemagni, S.}, TITLE = {Proceedings CLiC-it 2016 and EVALITA 2016}, YEAR = {2016}, ABSTRACT = {Proceedings of Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}, KEYWORDS = {Computational Linguistics Natural Language Processing Speech Tools for Italian CLiC-it EVALITA}, URL = {http://ceur-ws.org/Vol-1749/}, VOLUME = {1749}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, } @EDITORIAL{SORIA_2016_EDITORIAL_SPDMSW_355531, AUTHOR = {Soria, C. and Pretorius, L. and Declerck, T. and Mariani, J. and Scannell, K. and Wandl Vogt, E.}, TITLE = {CCURL 2016 Collaboration and Computing for Under-Resourced Languages: Towards an Alliance for Digital Language Diversity}, YEAR = {2016}, ABSTRACT = {Atti del Workshop "CCURL 2016 Collaboration and Computing for Under-Resourced Languages: Towards an Alliance for Digital Language Diversity"}, KEYWORDS = {Less-resourced languages, Language Technology, digital language vitality, digital language diversity}, PAGES = {1-103}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/workshops/LREC2016Workshop-CCURL2016_Proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, } @EDITORIAL{BASILI_2016_EDITORIAL_BM_372022, AUTHOR = {Basili, R. and Montemagni, S.}, TITLE = {Nota Editoriale}, YEAR = {2016}, KEYWORDS = {Computational Linguistics CLiC-it natural language processing}, PAGES = {7-10}, URL = {http://www.ai-lc.it/IJCoL/v2n1/00_nota_editoriale.pdf}, VOLUME = {2}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, BOOKTITLE = {Italian Journal of Computational Linguistics}, } @EDITORIAL{IDE_2016_EDITORIAL_IC_367165, AUTHOR = {Ide, N. and Calzolari, N.}, TITLE = {Editors' introduction to the special issue: papers from LREC 2014}, YEAR = {2016}, KEYWORDS = {Language Resources}, PAGES = {163-164}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84975686215\&origin=inward}, VOLUME = {50}, DOI = {10.1007/s10579-016-9358-3}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, BOOKTITLE = {Language resources and evaluation (Print)}, } @INPROCEEDINGS{BARBAGLI_2016_INPROCEEDINGS_BLDMV_366749, AUTHOR = {Barbagli, A. and Lucisano, P. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {CItA: an L1 Italian Learners Corpus to Study the Development of Writing Competence}, YEAR = {2016}, ABSTRACT = {In this paper, we present the CItA corpus (Corpus Italiano di Apprendenti L1), a collection of essays written by Italian L1 learners collected during the first and second year of lower secondary school. The corpus was built in the framework of an interdisciplinary study jointly carried out by computational linguistics and experimental pedagogists and aimed at tracking the development of written language competence over the years and students' background information.}, KEYWORDS = {Italian Learner Corpus, Diachronic Evolution of Written Language Competence, Error Annotation}, PAGES = {88-95}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/pdf/536_Paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz (Slovenia)}, CONFERENCE_DATE = {23-28 maggio 2016}, } @INPROCEEDINGS{BELLANDI_2016_INPROCEEDINGS_BBDG_364953, AUTHOR = {Bellandi, A. and Benotto, G. and Di Segni, G. and Giovannetti, E.}, TITLE = {Investigating the Application and Evaluation of Distributional Semantics in the Translation of Humanistic Texts: a Case Study}, YEAR = {2016}, ABSTRACT = {Digital Humanities are persisting ascending and the need for translating humanistic texts using Computer Assisted Translation (CAT) tools demands for a specific investigation both of the available technologies and of the evaluation techniques. Indeed, humanistic texts can present deep differences from texts that are usually translated with CAT tools, due to complex interpretative issues, the request of heavy rephrasing, and the addition of explicative parts in order to make the translation fully comprehensible to readers and, also, stylistically pleasant to read. In addition, these texts are often written in peculiar languages for which no linguistic analysis tool can be available. We faced this situation in the context of the project for the translation of the Babylonian Talmud from Ancient Hebrew and Aramaic into Italian. In this paper we describe a work in progress on the application of distributional semantics to the informing of the Translation Memory, and on the evaluation issues arising from its assessment.}, KEYWORDS = {computer-assisted translation, distributional semantics, talmud}, PAGES = {6-11}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/workshops/LREC2016Workshop-NLP4TM_Proceedings.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, CONFERENCE_NAME = {2nd Workshop on Natural Language Processing for Translation Memories (NLP4TM 2016)}, CONFERENCE_PLACE = {Portorož, Slovenia}, CONFERENCE_DATE = {28/05/2016}, BOOKTITLE = {Proceedings of the 2nd Workshop on Natural Language Processing for Translation Memories (NLP4TM 2016)}, } @INPROCEEDINGS{BENOTTO_2016_INPROCEEDINGS_BGM_364952, AUTHOR = {Benotto, G. and Giovannetti, E. and Marchi, S.}, TITLE = {Investigating the Application of Distributional Semantics to Stylometry}, YEAR = {2016}, ABSTRACT = {The inclusion of semantic features in the stylometric analysis of literary texts appears to be poorly investigated. In this work, we experiment with the application of Distributional Semantics to a corpus of Italian literature to test if words distribution can convey stylistic cues. To verify our hypothesis, we have set up an Authorship Attribution experiment. Indeed, the results we have obtained suggest that the style of an author can reveal itself through words distribution too.}, KEYWORDS = {NLP for Digital Humanities, Stilometry, Distributional Semantic}, PAGES = {61-65}, URL = {https://drive.google.com/open?id=0B0sEp2O7Oo7feVJLdHI3YXBxdTg}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {9788899982089}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-6/12/2016}, BOOKTITLE = {Proceedings of Third Italian Conference on Computational Linguistics (CLiC-it 2016)}, } @INPROCEEDINGS{BENOTTO_2016_INPROCEEDINGS_BGN_364353, AUTHOR = {Benotto, G. and Giovannetti, E. and Nahli, O.}, TITLE = {An application of distributional semantics for the analysis of the Holy Quran}, YEAR = {2016}, ABSTRACT = {In this contribution we illustrate the methodology and the results of an experiment we conducted by applying Distributional Semantics Models to the analysis of the Holy Quran. Our aim was to gather information on the potential differences in meanings that the same words might take on when used in Modern Standard Arabic w.r.t. their usage in the Quran. To do so we used the Penn Arabic Treebank as a contrastive corpus.}, KEYWORDS = {Distributional Semantics, the Holy Quran, Classical Arabic, Modern Standard Arabic, Contrastive Linguistics}, PAGES = {374-379}, URL = {http://ieeexplore.ieee.org/document/7805074/}, DOI = {10.1109/CIST.2016.7805074}, ISBN = {978-1-5090-0751-6}, CONFERENCE_NAME = {4th (IEEE) International Colloquium on Information Science and Technology, CiSt 2016}, CONFERENCE_PLACE = {Tangier, Morocco}, CONFERENCE_DATE = {24-26/10/2016}, BOOKTITLE = {2016 4th IEEE International Colloquium on Information Science and Technology (CiSt)}, EDITOR = {El Mohajir, M. and Chahhou, M. and Al Achhab, M. and El Mohajir, B. E.}, } @INPROCEEDINGS{BERTI_2016_INPROCEEDINGS_BCYBBD_363704, AUTHOR = {Berti, M. and Crane, G. and Yousef, T. and Bizzoni, Y. and Boschetti, F. and Del Gratta, R.}, TITLE = {Ancient Greek WordNet meets the Dynamic Lexicon: the example of the fragments of the Greek Historians}, YEAR = {2016}, ABSTRACT = {The Ancient Greek WordNet (AGWN) and the Dynamic Lexicon (DL) are multilingual resources to study the lexicon of Ancient Greek texts and their translations. Both AGWN and DL are works in progress that need accuracy improvement and manual validation. After a detailed description of the current state of each work, this paper illustrates a methodology to cross AGWN and DL data, in order to mutually score the items of each resource according to the evidence provided by the other resource. The training data is based on the corpus of the Digital Fragmenta Historicorum Graecorum (DFHG), which includes ancient Greek texts with Latin translations.}, KEYWORDS = {wordnet}, PAGES = {34-38}, URL = {http://gwc2016.racai.ro/procedings.pdf}, ISBN = {978-606-714-239-6}, CONFERENCE_NAME = {Global WordNet Conference}, CONFERENCE_PLACE = {Bucarest}, CONFERENCE_DATE = {27-30/01/2016}, } @INPROCEEDINGS{BOMPOLAS_2016_INPROCEEDINGS_BMFCP_362297, AUTHOR = {Bompolas, S. and Marzi, C. and Ferro, M. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {Reassessing inflectional regularity in Modern Greek conjugation}, YEAR = {2016}, ABSTRACT = {Paradigm-based approaches to word processing/learning assume that word forms are not acquired in isolation, but through associative relations linking members of the same word family (e.g. a paradigm, or a set of forms filling the same paradigm cell). Principles of correlative learning offer a set of dynamic equations that are key to modelling this complex dynamic at a considerable level of detail. We use these dynamic equations to simulate acquisition of Modern Greek conjugation, and we compare the results with evidence from German and Italian. Simulations show that different Greek verb classes are processed and acquired differentially, depending on their degrees of formal transparency and predictability. We relate these results to psycholinguistic evidence on Modern Greek word processing, and interpret our findings as supporting a view of the mental lexicon as an emergent integrative system.}, KEYWORDS = {word processing, paradigm-based learning, morphological processing, Greek stem allomoprhy, Temporal Self-Organising Map}, PAGES = {72-77}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009242702\&origin=inward}, VOLUME = {1749}, DOI = {10.4000/books.aaccademia.1721}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {978-88-99982-08-9}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}, CONFERENCE_PLACE = {Napoli, Italy}, CONFERENCE_DATE = {05-07/12/2016}, BOOKTITLE = {CLiC-it \& EVALITA 2016-Proceedings of Third Italian Conference on Computational Linguistics (CLiC-it 2016) \& Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2016)}, EDITOR = {Basile, P. and Corazza, A. and Monetmagni, S. and Nissim, M. and Patti, V. and Semeraro, G. and Sprugnoli, R.}, } @INPROCEEDINGS{BRANDO_2016_INPROCEEDINGS_BAF_348461, AUTHOR = {Brando, C. and Abadie, N. and Frontini, F.}, TITLE = {Linked Data Quality for Domain-Specific Named-Entity Linking}, YEAR = {2016}, ABSTRACT = {We present outgoing research whose goal is to assess quality of Linked Data for its usage in domain-specific Named-entity Linking (NEL). NEL is the task of assigning appropriate referents, typically an Uniform Resource Identifier (URI), to mentions of entities (e.g. persons or places) identified in textual documents. Nowadays, many of these approaches strongly rely on Linked Data as knowledge base. However, the scope of the chosen data sets can have an important influence on the performances of NEL as texts often concern specific domains of knowledge. In this paper, we describe LD quality aspects which should be considered for improving NEL in domain-specific contexts, then propose quality metrics and compute them for both French DBpedia and the French National Library (BnF) data sets thereby to discuss the opportunity of using these data sets for the linking of authors in old French Literary digital editions. Our ultimate goal is to improve a Natural Language Processing (NLP) pipeline for the automatic annotation of these texts.}, KEYWORDS = {Linked Data, Quality, Named Entity Linking}, PAGES = {13-24}, URL = {https://publications.cnr.it/doc/348461}, CONFERENCE_NAME = {Atelier-Qualité des Données du Web (QLOD'16) Joint à la 16ème édition de la conférence internationale francophone EGC 2016}, CONFERENCE_PLACE = {Reims}, CONFERENCE_DATE = {19/01/2016}, } @INPROCEEDINGS{BRUNATO_2016_INPROCEEDINGS_BCDV_366726, AUTHOR = {Brunato, D. and Cimino, A. and Dell'Orletta, F. and Venturi, G.}, TITLE = {PaCCSS-IT: A Parallel Corpus of Complex-Simple Sentences for Automatic Text Simplification}, YEAR = {2016}, ABSTRACT = {In this paper we present PaCCSS-IT, a Parallel Corpus of Complex-Simple Sentences for ITalian. To build the resource we develop a new method for automatically acquiring a corpus of complex-simple paired sentences able to intercept structural transformations and particularly suitable for text simplification. The method requires a wide amount of texts that can be easily extracted from the web making it suitable also for less-resourced languages. We test it on the Italian language making available the biggest Italian corpus for automatic text simplification.}, KEYWORDS = {Automatic Text Simplification, Sentence alignment, Italian corpus}, PAGES = {351-361}, URL = {https://www.aclweb.org/anthology/D/D16/D16-1034.pdf}, DOI = {10.18653/v1/d16-1034}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-945626-25-8}, CONFERENCE_NAME = {Conference on Empirical Methods in Natural Language Processing (EMNLP 2016)}, CONFERENCE_PLACE = {Austin, Texas}, CONFERENCE_DATE = {01-05/11/2016}, } @INPROCEEDINGS{CALZOLARI_2016_INPROCEEDINGS_C_367150, AUTHOR = {Calzolari, N.}, TITLE = {Preface: General Chair}, YEAR = {2016}, ABSTRACT = {COLING is organised under the auspices of the International Committee on Computational Linguistics (ICCL, http://nlp.shef.ac.uk/iccl/index.html). ICCL is a very special committee, with no fixed rules and no funding, whose only function is to make sure that aCOLING appears every two years and that it is a good and friendly conference.}, KEYWORDS = {Computational Linguistics}, URL = {http://aclweb.org/anthology/C/C16/C16-1000.pdf}, ISBN = {978-4-87974-702-0}, CONFERENCE_NAME = {The 26th International Conference on Computational Linguistics, COLING 2016}, CONFERENCE_PLACE = {Osaka, Japan}, CONFERENCE_DATE = {December 11-16, 2016}, } @INPROCEEDINGS{CALZOLARI_2016_INPROCEEDINGS_C_367173, AUTHOR = {Calzolari, N.}, TITLE = {Introduction of the Conference Chair and ELRA President Nicoletta Calzolari}, YEAR = {2016}, ABSTRACT = {Chair of the 10th International Conference on Language Resources and Evaluation and ELRA President}, KEYWORDS = {Language Resources}, PAGES = {1-6}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {LREC 2016, Tenth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Portorose, Slovenia}, CONFERENCE_DATE = {May 23-28, 2016}, BOOKTITLE = {The LREC 2016 Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{CONNOR_2016_INPROCEEDINGS_CC_364655, AUTHOR = {Connor, R. and Cardillo, F. A.}, TITLE = {Quantifying the specificity of near-duplicate image classification functions}, YEAR = {2016}, ABSTRACT = {There are many published methods for detecting similar and near-duplicate images. Here, we consider their use in the context of unsupervised near-duplicate detection, where the task is to find a (relatively small) near-duplicate intersection of two large candidate sets. Such scenarios are of particular importance in forensic near-duplicate detection. The essential properties of a such a function are: performance, sensitivity, and specificity. We show that, as collection sizes increase, then specificity becomes the most important of these, as without very high specificity huge numbers of false positive matches will be identified. This makes even very fast, highly sensitive methods completely useless. Until now, to our knowledge, no attempt has been made to measure the specificity of near-duplicate finders, or even to compare them with each other. Recently, a benchmark set of near-duplicate images has been established which allows such assessment by giving a near-duplicate ground truth over a large general image collection. Using this we establish a methodology for calculating specificity. A number of the most likely candidate functions are compared with each other and accurate measurement of sensitivity vs. specificity are given. We believe these are the first such figures be to calculated for any such function.}, KEYWORDS = {near-duplicate image detection}, PAGES = {647-654}, URL = {https://publications.cnr.it/doc/364655}, CONFERENCE_NAME = {11th International Conference on Computer Vision Theory and Applications}, CONFERENCE_PLACE = {Roma}, CONFERENCE_DATE = {27-29/02/2016}, } @INPROCEEDINGS{CONNOR_2016_INPROCEEDINGS_CVCR_363066, AUTHOR = {Connor, R. and Vadicamo, L. and Cardillo, F. A. and Rabitti, F.}, TITLE = {Supermetric search with the four-point property}, YEAR = {2016}, ABSTRACT = {Metric indexing research is concerned with the efficient evaluation of queries in metric spaces. In general, a large space of objects is arranged in such a way that, when a further object is presented as a query, those objects most similar to the query can be efficiently found. Most such mechanisms rely upon the triangle inequality property of the metric governing the space. The triangle inequality property is equivalent to a finite embedding property, which states that any three points of the space can be isometrically embedded in two-dimensional Euclidean space. In this paper, we examine a class of semimetric space which is finitely 4-embeddable in three-dimensional Euclidean space. In mathematics this property has been extensively studied and is generally known as the four-point property. All spaces with the four-point property are metric spaces, but they also have some stronger geometric guarantees. We coin the term supermetric space as, in terms of metric search, they are significantly more tractable. We show some stronger geometric guarantees deriving from the four-point property which can be used in indexing to great effect, and show results for two of the SISAP benchmark searches that are substantially better than any previously published.}, KEYWORDS = {Similarity search, Metric Space, Supermetric Space, Metric Indexing, Four-point property, Hilbert Embedding, H. 3. 3 INFORMATION STORAGE AND RETRIEVAL. Information Search and Retrieval}, PAGES = {51-64}, URL = {https://link.springer.com/chapter/10.1007%2F978-3-319-46759-7_4}, VOLUME = {9939}, DOI = {10.1007/978-3-319-46759-7_4}, CONFERENCE_NAME = {Similarity Search and Applications. 9th International Conference}, CONFERENCE_PLACE = {Tokyo, Japan}, CONFERENCE_DATE = {24-26 October 2016}, BOOKTITLE = {Similarity Search and Applications. SISAP 2016}, EDITOR = {Amsaleg, L. and Houle, M. and Schubert, E.}, } @INPROCEEDINGS{DELGRATTA_2016_INPROCEEDINGS_DFMPRBKSC_355425, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Khan, F. and Soria, C. and Calzolari, N.}, TITLE = {LREC as a Graph: People and Resources in a Network}, YEAR = {2016}, ABSTRACT = {This proposal describes a new way to visualise resources in the LREMap, a community-built repository of language resource descriptions and uses. The LREMap is represented as a force-directed graph, where resources, papers and authors are nodes. The analysis of the visual representation of the underlying graph is used to study how the community gathers around LRs and how LRs are used in research.}, KEYWORDS = {Language Resources, Resources Documentation, Data Visualisation}, PAGES = {2529-2532}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{DELLORLETTA_2016_INPROCEEDINGS_DMV_366757, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Esplorazioni computazionali nello spazio dell'interlingua: verso una nuova metodologia di indagine}, YEAR = {2016}, ABSTRACT = {Il presente contributo intende proporre un innovativo approccio all'identificazione delle caratteristiche linguistiche che aiutano a definire l'interlingua. Tale approccio consiste nella ricostruzione del profilo linguistico di corpora di produzioni scritte da apprendenti una lingua seconda basato su strumenti di trattamento automatico del linguaggio.}, KEYWORDS = {interlingua, annotazione linguistica automatica, monitoraggio linguistico}, PAGES = {143-161}, URL = {https://www.bulzoni.it/it/catalogo/lingue-in-contatto-contact-linguistics.html}, PUBLISHER = {Bulzoni Editore (Roma, ITA)}, ISBN = {978-88-6897-029-1}, CONFERENCE_NAME = {XLVIII Congresso Internazionale di Studi della Società di Linguistica Italiana (SLI 2014)}, CONFERENCE_PLACE = {Udine}, CONFERENCE_DATE = {25-27 settembre 2014}, } @INPROCEEDINGS{DELLORLETTA_2016_INPROCEEDINGS_DV_366752, AUTHOR = {Dell'Orletta, F. and Venturi, G.}, TITLE = {ULISSE: una strategia di adattamento al dominio per l'annotazione sintattica automatica}, YEAR = {2016}, ABSTRACT = {This paper deals with Domain Adaptation for automatic syntactic annotation. Until the half of the 1980s, automatic linguistic annotation was based on algorithms built on groups of hand-written rules, defined a priori on the basis of the knowledge of the system to formalise. Subsequently, thanks to the progress of research in the field of Artificial Intelligence and to the development of linguistic resources, algorithms based on machine learning techniques began to be employed. The major difficulties of those algorithms were due to certain aspects of natural language such as ambiguities, diachronic evolutions, or language variations from the original domain of knowledge. More specifically, the issue of Domain Adaptation can be put in the following terms: "can an annotated corpus [which is representative of a specific linguistic variety] be used for the syntactic analysis of a second corpus [which is representative of a different linguistic variety]?". The author answer presenting an algorithm called ULISSE (Unsupervised LInguistically-driven Selection of dEpendency parses), which selects in an optima way the most representative sentences of a new target domain and feed them to the parser in addition to the original training set.}, KEYWORDS = {Domain Adaptation, annotazione sintattica automatica}, PAGES = {55-79}, URL = {http://www.italianlp.it/wp-content/uploads/2016/10/Compter_Parler_Soigner_ULISSE.pdf}, ISBN = {978-88-6952-038-9}, CONFERENCE_NAME = {Atti del convegno "Compter parler soigner: tra linguistica e intelligenza artificiale"}, CONFERENCE_PLACE = {Pavia}, CONFERENCE_DATE = {15-17 dicembre 2014}, } @INPROCEEDINGS{FERRO_2016_INPROCEEDINGS_FCPGS_362349, AUTHOR = {Ferro, M. and Cardillo, F. A. and Pirrelli, V. and Gagné, C. L. and Spalding, T. L.}, TITLE = {Written word production and lexical self-organisation: evidence from English (pseudo)compounds}, YEAR = {2016}, ABSTRACT = {Elevation in typing latency for the initial letter of the second constituent of an English compound, relative to the latency for the final letter of the first constituent of the same compound, provides evidence that implementation of a motor plan for written compound production involves smaller constituents, in both semantically transparent and semantically opaque compounds. We investigate here the implications of this evidence for algorithmic models of lexical organisation, to show that effects of differential perception of the internal structure of compounds and pseudo-compounds can also be simulated as peripheral stages of lexical access by a self-organising connectionist architecture, even in the absence of morphosemantic information. This complementary evidence supports a maximizationof-opportunity approach to lexical modelling, accounting for the integration of effects of pre-lexical and lexical access.}, KEYWORDS = {compound, pseudo-compound, written word production, lexical self-organisation, temporal self organising map}, PAGES = {146-151}, URL = {http://ceur-ws.org/Vol-1749/}, VOLUME = {1749}, DOI = {10.4000/books.aaccademia.1775}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {9788899982546}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it 2016)}, CONFERENCE_PLACE = {Napoli (Italia)}, CONFERENCE_DATE = {5-6/12/2016}, BOOKTITLE = {Proceedings CLiC-it 2016}, EDITOR = {Basile, P. and Corazza, A. and Cutugno, F. and Montemagni, S. and Nissim, M. and Patti, V. and Semeraro, G. and Sprugnoli, R.}, } @INPROCEEDINGS{GIANNINI_2016_INPROCEEDINGS_GBGP_348024, AUTHOR = {Giannini, S. and Biagioni, S. and Goggi, S. and Pardelli, G.}, TITLE = {Grey Literature citations in the age of Digital Repositories and Open Access}, YEAR = {2016}, ABSTRACT = {The work measures grey citations in the years 2012, 2013 and 2014 and then describes the features of GL documents cited in different areas of knowledge: Computational Linguistics, Computer Science and Engineering. With the aim of surveying a wide and varied range of resources, we selected a sample data based on the bibliographical references of articles contained in four journals - all indexed by Scopus Citation Database and ISI Web of Science, with an Impact Factor (IF) over the last three years - and two proceedings of international conferences held in 2012 and 2014.}, KEYWORDS = {Grey Literature Citations}, PAGES = {137-145}, URL = {https://publications.cnr.it/doc/348024}, VOLUME = {17}, ISBN = {978-90-77484-27-2}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature: A New Wave of Textual and Non-Textual Grey literature}, CONFERENCE_PLACE = {Amsterdam, NL}, CONFERENCE_DATE = {1-2 December 2015}, BOOKTITLE = {A New Wave of Textual and Non-Textual Grey literature}, } @INPROCEEDINGS{GOGGI_2016_INPROCEEDINGS_GPBFMMDB_350374, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2016}, ABSTRACT = {Here we present the final results of the MAPS (Marine Planning and Service Platform) project, an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. The system takes as input non-textual data (measurements) and text - both published papers and documentation - and it provides an advanced search facility thanks to the rich set of metadata and, above all, to the possibility of a refined and domain targeted key-word indexing of texts using Natural Language Processing (NLP) techniques. The paper describes the system in its details providing also evidence of evaluation.}, KEYWORDS = {Information Extraction, Search Engine, Operative Oceanography}, PAGES = {104-111}, URL = {https://publications.cnr.it/doc/350374}, VOLUME = {17}, ISBN = {978-90-77484-27-2}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {December 1st-2nd 2015}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KBM_366525, AUTHOR = {Khan, A. F. and Bellandi, A. and Monachini, M.}, TITLE = {Tools and Instruments for Building and Querying Diachronic Computational Lexica}, YEAR = {2016}, ABSTRACT = {This article describes work on enabling the addition of temporal information to senses of words in linguistic linked open data lexica based on the lemonDia model. Our contribution in this article is twofold. On the one hand, we demonstrate how lemonDia enables the querying of diachronic lexical datasets using OWL-oriented Semantic Web based technologies. On the other hand, we present a preliminary version of an interactive interface intended to help users in creating lexical datasets that model meaning change over time.}, KEYWORDS = {OWL-oriented Semantic Web based technologies}, PAGES = {164-171}, URL = {https://www.clarin-d.net/images/lt4dh/pdf/LT4DH22.pdf}, ISBN = {978-4-87974-708-2}, CONFERENCE_NAME = {Language Technology Resources and Tools for Digital Humanities (LT4DH 2016)}, CONFERENCE_PLACE = {Osaka, Japan}, CONFERENCE_DATE = {December 11-16, 2016}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KDM_355470, AUTHOR = {Khan, F. and Díaz Vera, J. E. and Monachini, M.}, TITLE = {Representing Polysemy and Diachronic Lexico-Semantic Data on the Semantic Web}, YEAR = {2016}, ABSTRACT = {In this article we will outline two different vocabularies, both extensions of the lemon model, for representing diachronic lexico-semantic data on the Semantic Web. This is especially useful for repre-senting the evolution of scientific terminologies where many terms are polysemous and or imported from other languages. The first vocabulary, polyLemon , allows for the representation of data about polysemy; the second, lemonDIA the representation of meaning shift over time.}, KEYWORDS = {Language Resources, Resource Data Framework (RDF)}, PAGES = {37-45}, URL = {http://ceur-ws.org/Vol-1595/paper4.pdf}, VOLUME = {1595}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Second International Workshop on Semantic Web for Scientific Heritage co-located with 13th Extended Semantic Web Conference (ESWC 2016)}, CONFERENCE_PLACE = {Heraklion, Greece}, CONFERENCE_DATE = {May 30th, 2016}, BOOKTITLE = {SWASH 2016 Semantic Web for Scientific Heritage Proceedings of the Second International Workshop on Semantic Web for Scientific Heritage co-located with 13th Extended Semantic Web Conference (ESWC 2016)}, EDITOR = {Draelants, I. and Zucker, C. F. and Monnin, A. and Zucker, A.}, } @INPROCEEDINGS{KHAN_2016_INPROCEEDINGS_KDM_355442, AUTHOR = {Khan, F. and Díaz Vera, J. and Monachini, M.}, TITLE = {The Representation of an Old English Emotion Lexicon as Linked Open Data}, YEAR = {2016}, ABSTRACT = {We present the ongoing conversion of a lexicon of emotion terms in Old English (OE) into RDF using an extension of lemon called lemonDIA and which we briefly describe. We focus on the translation of the subset of the lexicon dealing with terms for shame and guilt and give a number of illustrative example.}, KEYWORDS = {Linguistic Linked Open Data, Old English, Lexicon}, PAGES = {73-76}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, BOOKTITLE = {LDL 2016 5th Workshop on Linked Data in Linguistics: Managing, Building and Using Linked Language Resources}, EDITOR = {McCrae, J. P. and Chiarcos, C. and Ponsoda, E. M. and Declerck, T. and Osenova, P. and Hellmann, S.}, } @INPROCEEDINGS{MONTEMAGNI_2016_INPROCEEDINGS_M_372010, AUTHOR = {Montemagni, S.}, TITLE = {Preface}, YEAR = {2016}, ABSTRACT = {Our very warm welcome to CLiC - it 2016 (http://clic - it2016.dieti.unina.it/), the 3 rd edition of the Italian Conference on Computational Linguistics , held on December 5 th and 6 th , in Naples, Italy, co - located with Evalita 2016 ( http://www.evalita.it/2016 ), hosted and locally organized by Università Federico II, one the oldest public and laic universities in the world. The organization of the conference is the result of a fruitful conjoint effort of different research groups (Istituto di Linguistica Computazionale "Antonio Zampolli" del CNR, Università degli Studi di Bari Aldo Moro and Università degli Studi di Napoli Federico II) showing the nationwide spreading of Computational Linguistics in Italy. The CLiC - it conference series is organized by the Italian Association for Computational Linguistics (AILC) and has clearly established itself as the premier national forum for research and development in the fields of Computational Linguistics (CL) and Natural Language Processing (NLP), where leading researchers and practitioners from academia and industry meet to share their challenges, solutions, research results, and experiences. CLiC - it covers all aspects of computational linguistics and natural language (both written and spoken) processing, and targets state - of - art theoretical results, experimental methodologies, technologies, as well as application perspectives, which may contribute to advance the field.}, KEYWORDS = {Computational Linguistics Natural Language Processing Speech Tools for Italian CLiC-it EVALITA}, URL = {https://publications.cnr.it/doc/372010}, VOLUME = {1749}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, CONFERENCE_NAME = {CLiC-it \& EVALITA 2016}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-7/12/2016}, BOOKTITLE = {Proceedings CLiC-it 2016 and EVALITA 2016}, EDITOR = {Montemagni, S.}, } @INPROCEEDINGS{NAHLI_2016_INPROCEEDINGS_NFMKZK_355436, AUTHOR = {Nahli, O. and Frontini, F. and Monachini, M. and Khan, F. and Zarghili, A. and Khalfi, M.}, TITLE = {Al Qamus al Muhit, a Medieval Arabic Lexicon in LMF}, YEAR = {2016}, ABSTRACT = {This paper describes the conversion into LMF, a standard lexicographic digital format of 'al-q?m?s al-mu???, a Medieval Arabic lexicon. The lexicon is first described, then all the steps required for the conversion are illustrated. The work is will produce a useful lexicographic resource for Arabic NLP, but is also interesting per se, to study the implications of adapting the LMF model to the Arabic language. Some reflections are offered as to the status of roots with respect to previously suggested representations. In particular, roots are, in our opinion are to be not treated as lexical entries, but modeled as lexical metadata for classifying and identifying lexical entries. In this manner, each root connects all entries that are derived from it.}, KEYWORDS = {Arabic Lexicon, LMF, Al Qamus al Muhi}, PAGES = {943-950}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{ORLETTI_2016_INPROCEEDINGS_ODI_366723, AUTHOR = {Orletti, F. and Dell'Orletta, F. and Iovino, R.}, TITLE = {La leggibilità dei testi di ambito medico rivolti al paziente: Il caso dei bugiardini di farmaci senza obbligo di prescrizione medica}, YEAR = {2016}, ABSTRACT = {In this paper we present the first results of an exploratory analysis of simplification of the package leaflets of medicines, considered representative texts of doctor-patient communication. It will be shown how natural language processing tools can be used to reconstruct the linguistic profile of these texts and to guide their simplification.}, KEYWORDS = {leggibilità}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009291162\&origin=inward}, VOLUME = {1749}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-6/12/2016}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{PARDELLI_2016_INPROCEEDINGS_PGGB_355458, AUTHOR = {Pardelli, G. and Goggi, S. and Giannini, S. and Biagioni, S.}, TITLE = {Two decades of terminology: European framework programmes titles}, YEAR = {2016}, ABSTRACT = {This work analyses a corpus made of the titles of research projects belonging to the last four European Commission Framework Programmes (FP4, FP5, FP6, FP7) during a time span of nearly two decades (1994-2012). The starting point is the idea of creating a corpus of titles which would constitute a terminological niche, a sort of "cluster map" offering an overall vision on the terms used and the links between them. Moreover, by performing a terminological comparison over a period of time it is possible to trace the presence of obsolete words in outdated research areas as well as of neologisms in the most recent fields. Within this scenario, the minimal purpose is to build a corpus of titles of European projects belonging to the several Framework Programmes in order to obtain a terminological mapping of relevant words in the various research areas: particularly significant would be those terms spread across different domains or those extremely tied to a specific domain. A term could actually be found in many fields and being able to acknowledge and retrieve this cross-presence means being able to linking those different domains by means of a process of terminological mapping.}, KEYWORDS = {Terminology Extraction, Natural Language Processing, Terminological Comparison}, PAGES = {373-378}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {LREC 2016-Tenth International Conference on Language Resources and Evaluation}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 May}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{PESINI_2016_INPROCEEDINGS_PDB_288074, AUTHOR = {Pesini, L. and Del Grosso, A. M. and Bozzi, A.}, TITLE = {F. de Saussure e la linguistica romanza. Un'applicazione web per l'edizione elettronica dei manoscritti}, YEAR = {2016}, ABSTRACT = {Se il ruolo di F. de Saussure nella linguistica indoeuropea e nella linguistica generale è ben noto, il suo interesse nell'ambito della linguistica romanza è stato quasi del tutto trascurato dagli studiosi. Fino ad oggi è stato difficile valutare il contributo di Saussure alla romanistica, dato che i suoi lavori (riguardanti soprattutto i patois francesi e la toponomastica) restano in gran parte inediti.Soltanto nel quadro di una nuova edizione critica digitale di tutte le opere di Saussure sarà possibile apprezzare il suo profondo interesse per la filologia romanza e far luce sul ruolo che lo studio dell'evoluzione delle lingue neolatine ebbe nella riflessione generale di questo linguista, sia comparatista che romanista.}, KEYWORDS = {Computational and collaborative philology, digital humanities, software engineering}, PAGES = {239-254}, URL = {http://www.atilf.fr/cilpr2013/actes/section-16/CILPR-2013-16-Pesini-Bozzi-Del_Grosso.pdf}, PUBLISHER = {Société de linguistique romane/ÉLiPhi (Strasbourg, FRA)}, ISBN = {979-10-91460-32-3}, CONFERENCE_NAME = {XXVIIe Congrès international de linguistique et de philologie romanes}, CONFERENCE_PLACE = {Nancy}, CONFERENCE_DATE = {15-20 juillet 2013}, EDITOR = {Buchi, É. and Chauveau, J. and Pierrel, J. M.}, } @INPROCEEDINGS{PICCINI_2016_INPROCEEDINGS_PBBG_388601, AUTHOR = {Piccini, S. and Bellandi, A. and Benotto, G. and Giovannetti, E.}, TITLE = {La modellazione diacronica di risorse termino-ontologiche nell'ambito delle digital humanities: Esperimenti su clavius}, YEAR = {2016}, ABSTRACT = {In this work, we present an experiment in the modeling of a diachronic termino-ontological resource named CLAVIUS through both the N-ary relations model and the 4D-fluents approach. Some of the salient differences of these two models are discussed. The overall objective of this research is to illustrate the main advantages and disadvantages in the adoption of a given model to build diachronic resources.}, KEYWORDS = {Diachronic terminology, termino-ontological resources, n-ary models, perdurantist models}, PAGES = {205-245}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009289234\&origin=inward}, VOLUME = {1749}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISSN = {1613-0073}, ISBN = {9788899982089}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {Third Italian Conference on Computational Linguistics}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{PICCINI_2016_INPROCEEDINGS_PGBR_282572, AUTHOR = {Piccini, S. and Giovannetti, E. and Bellandi, A. and Ruimy, N.}, TITLE = {Le lexique électronique de la terminologie de Ferdinand de Saussure: une première}, YEAR = {2016}, ABSTRACT = {Le Projet italien intitulé "Per un'edizione digitale dei manoscritti di Ferdinand de Saussure" a pour objectif la création d'un prototype d'édition numérique des textes du grand linguiste genevois, réalisé à partir d'une sélection de ses documents autographes. Dans ce projet, l'un des volets novateurs est la création du premier thésaurus-lexique électronique de la terminologie linguistique Saussurienne. Afin de réaliser cette base de connaissance, nous nous sommes inspirés du modèle lexical SIMPLE qui a permis le développement de vastes lexiques sémantiques informatisés, et l'avons adapté aux exigences de notre domaine de la connaissance. Le lexique réalisé a été conçu dans le but d'offrir un outil performant pour les études saussuriennes. Il fournit en effet une représentation structurée de la terminologie de Saussure, une définition du contenu sémantique de chacun des termes ainsi qu'un cadre explicite de la nature et de l'importance des liens qui les unissent. Ce faisceau d'informations devrait donc contribuer de manière significative à mieux maîtriser le vocabulaire saussurien et, partant, à éclairer certains aspects originaux de la pensée du père de la linguistique moderne.}, KEYWORDS = {Saussure, lessici computazionali, ontologie}, PAGES = {255-267}, URL = {http://www.atilf.fr/cilpr2013/actes/section-16/CILPR-2013-16-Piccini-Giovannetti-Bellandi-Ruimy.pdf}, ISBN = {979-10-91460-32-3}, CONFERENCE_NAME = {XXVII Congrès international de linguistique et de philologie romanes}, CONFERENCE_PLACE = {Nancy}, CONFERENCE_DATE = {15-20/06/2013}, BOOKTITLE = {Section 16: Projets en cours; ressources et outils nouveaux}, EDITOR = {Trotter, D. and Bozzi, A. and Fairon, C.}, } @INPROCEEDINGS{PIERI_2016_INPROCEEDINGS_PBD_366724, AUTHOR = {Pieri, G. and Brunato, D. and Dell'Orletta, F.}, TITLE = {Studio sull'ordinamento dei costituenti nel confronto tra generi e complessità}, YEAR = {2016}, ABSTRACT = {In questo articolo presentiamo uno studio sull'ordine dei costituenti in italiano basato su corpora annotati in maniera automatica fino all'analisi sintattica a dipendenze. L'indagine comparativa ha permesso di valutare l'influenza sia del genere testuale sia della complessità linguistica nella distribuzione dei fenomeni di marcatezza sintattica.}, KEYWORDS = {Complessità linguistica, Corpora annotati, Generi testuali}, PAGES = {5}, URL = {http://ceur-ws.org/Vol-1749/paper44.pdf}, VOLUME = {1749}, CONFERENCE_NAME = {Third Italian Conference on Computational Linguistics (CLiC-it 2016)}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-6/12/2016}, EDITOR = {Basile, P. and Corazza, A. and Cutugno, F. and Montemagni, S. and Nissim, M. and Patti, V. and Semeraro, G. and Sprugnoli, R.}, } @INPROCEEDINGS{POPESCU_2016_INPROCEEDINGS_PLDCC_363731, AUTHOR = {Popescu, V. and Liu, L. and Del Gratta, R. and Choukri, K. and Calzolari, N.}, TITLE = {New Developments in the LRE Map}, YEAR = {2016}, ABSTRACT = {In this paper we describe the new developments brought to LRE Map, especially in terms of the user interface of the Web application, of the searching of the information therein, and of the data model updates. Thus, users now have several new search facilities, such as faceted search and fuzzy textual search, they can now register, log in and store search bookmarks for further perusal. Moreover, the data model now includes the notion of paper and author, which allows for linking the resources to the scientific works. Also, users can now visualise author-provided field values and normalised values. The normalisation has been manual and enables a better grouping of the entries. Last but not least, provisions have been made towards linked open data (LOD) aspects, by exposing an RDF access point allowing to query on the authors, papers and resources. Finally, a complete technological overhaul of the whole application has been undertaken, especially in terms of the Web infrastructure and of the text search backend.}, KEYWORDS = {Language resource, LRE Map, Information search and retrieval, Data modelling}, PAGES = {4526-4530}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/pdf/1256_Paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28/05/2016}, } @INPROCEEDINGS{RUSSO_2016_INPROCEEDINGS_RM_367412, AUTHOR = {Russo, I. and Monachini, M.}, TITLE = {Samskara minimal structural features for detecting subjectivity and polarity in Italian tweets}, YEAR = {2016}, ABSTRACT = {Sentiment analysis classification tasks strongly depend on the properties of the medium that is used to communicate opinionated content. There are some limitations in Twitter that force the user to exploit structural properties of this social network with features that have pragmatic and communicative functions. Samskara is a system that uses minimal structural features to classify Italian tweets as instantiations of a textual genre, obtaining good results for subjectivity classification, while polarity classification needs substantial improvements.}, KEYWORDS = {sentiment analysis, twitter}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009270160\&origin=inward}, VOLUME = {1749}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Fifth Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop EVALITA 2016}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {7/12/2016}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{SORIA_2016_INPROCEEDINGS_SRQHGST_355526, AUTHOR = {Soria, C. and Russo, I. and Quochi, V. and Hicks, D. and Gurrutxaga, A. and Sarhimaa, A. and Tuomisto, M.}, TITLE = {Fostering digital representation of EU regional and minority languages: the Digital Language Diversity Project}, YEAR = {2016}, ABSTRACT = {Poor digital representation of minority languages further prevents their usability on digital media and devices. The Digital Language Diversity Project, a three-year project funded under the Erasmus+ programme, aims at addressing the problem of low digital representation of EU regional and minority languages by giving their speakers the intellectual an practical skills to create, share, and reuse online digital content. Availability of digital content and technical support to use it are essential prerequisites for the development of language-based digital applications, which in turn can boost digital usage of these languages. In this paper we introduce the project, its aims, objectives and current activities for sustaining digital usability of minority languages through adult education.}, KEYWORDS = {Less-resourced languages, Language Technology, digital language vitality, digital language diversity}, PAGES = {3256-3260}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, } @INPROCEEDINGS{TUSA_2016_INPROCEEDINGS_TDMV_366754, AUTHOR = {Tusa, E. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Dieci sfumature di marcatezza sintattica: Verso una nozione computazionale di complessita}, YEAR = {2016}, ABSTRACT = {In this work, we will investigate whether and to what extent algorithms typically used to assess the reliability of the output of syntactic parsers can be used to study the correlation between processing complexity and the linguistic notion of markedness. Although still preliminary, achieved results show the key role of features such as dependency direction and length in defining the markedness degrees of a given syntactic construction.}, KEYWORDS = {marcatezza sintattica, complessità linguistica, annotazione linguistica automatica}, PAGES = {1-6}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-85009279517\&origin=inward}, VOLUME = {1749}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Napoli}, CONFERENCE_DATE = {5-6 dicembre 2016}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{VALSECCHI_2016_INPROCEEDINGS_VABMP_356355, AUTHOR = {Valsecchi, F. and Abrate, M. and Bacciu, C. and Marchetti, A. and Piccini, S.}, TITLE = {Text Encoder and Annotator: an all-in-one editor for transcribing and annotating manuscripts with RDF}, YEAR = {2016}, ABSTRACT = {In the context of the digitization of manuscripts, transcription and annotation are often distinct, sequential steps. This could lead to difficulties in improving the transcribed text when annotations have already been defined. In order to avoid this, we devised an approach which merges the two steps into the same process. Text Encoder and Annotator (TEA) is a prototype application embracing this concept. TEA is based on a lightweight language syntax which annotates text using Semantic Web technologies. Our approach is currently being developed within the Clavius on the Web project, devoted to studying the manuscripts of Christophorus Clavius, an influential 16th century mathematician and astronomer.}, KEYWORDS = {RDF, Semantic Annotation, Semantic Web}, PAGES = {399-407}, URL = {https://publications.cnr.it/doc/356355}, VOLUME = {9989}, DOI = {10.1007/978-3-319-47602-5_52}, CONFERENCE_NAME = {European Semantic Web Conference 2016}, CONFERENCE_PLACE = {Heraklion, Greece}, CONFERENCE_DATE = {29/05-02/06-2016}, BOOKTITLE = {The Semantic Web}, } @INPROCEEDINGS{WEINGART_2016_INPROCEEDINGS_WG_364954, AUTHOR = {Weingart, A. and Giovannetti, E.}, TITLE = {A lexicon for Old Occitan medico-botanical terminology in lemon}, YEAR = {2016}, ABSTRACT = {The article presents the adaptation of the lemon model (a model for lexica as RDF data) for a multilingual and multi-alphabetical lexicon of Old Occitan medico-botanical terminology. The lexicon is the core component of an ontology-based information system that will be constructed and implemented within the DFG-funded project "Dictionnaire de Termes Médico-botaniques de l'Ancien Occitan" (DiTMAO). The difficulties for the lemmatization raised by the particularities of the corpus (terms in Latin, Hebrew and Arabic script and corresponding terms in other ancient languages, mostly Hebrew and Arabic) can be perfectly solved by extending the basic properties of lemon and introducing domain specific vocabulary.}, KEYWORDS = {lemon model, RDF, multilingual, multi-alphabetical, historical lexicon, medico-botanical terminology, Old Occitan, Hebrew, Arabic}, PAGES = {25-36}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84978818653\&origin=inward}, VOLUME = {1595}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Second International Workshop on Semantic Web for Scientific Heritage (SW4SH 2016)}, CONFERENCE_PLACE = {Héraklion, Greece}, CONFERENCE_DATE = {30/05/2016}, BOOKTITLE = {CEUR workshop proceedings}, } @INPROCEEDINGS{WIELING_2016_INPROCEEDINGS_WSCM_359168, AUTHOR = {Wieling, M. and Sassolini, E. and Cucurullo, S. and Montemagni, S.}, TITLE = {ALT Explored: Integrating an Online Dialectometric Tool and an Online Dialect Atlas}, YEAR = {2016}, ABSTRACT = {In this paper, we illustrate the integration of an online dialectometric tool, Gabmap, together with an online dialect atlas, the Atlante Lessicale Toscano (ALT-Web). By using a newly created url-based interface to Gabmap, ALT-Web is able to take advantage of the sophisticated dialect visualization and exploration options incorporated in Gabmap. For example, distribution maps showing the distribution in the Tuscan dialect area of a specific dialectal form (selected via the ALT-Web website) are easily obtainable. Furthermore, the complete ALT-Web dataset as well as subsets of the data (selected via the ALT-Web website) can be automatically uploaded and explored in Gabmap. By combining these two online applications, macro- and micro-analyses of dialectal data (respectively offered by Gabmap and ALT-Web) are effectively and dynamically combined.}, KEYWORDS = {Lexicon, Lexical Database, Tools, Systems, Applications}, PAGES = {3265-3272}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {LREC 2016}, CONFERENCE_PLACE = {Portorož, Slovenia}, CONFERENCE_DATE = {23/10/2016}, } @INPROCEEDINGS{ARRIGONI_2016_INPROCEEDINGS_AKMB_363708, AUTHOR = {Arrigoni, S. and Khan, F. and Monachini, M. and Boschetti, F.}, TITLE = {Misurare Memorata Poetis: prime statistiche}, YEAR = {2016}, KEYWORDS = {intertestualità, temi e motivi}, PAGES = {151-155}, URL = {http://www.himeros.eu/aiucd2016/c47.pdf}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/9/2016}, BOOKTITLE = {AIUCD 2016-Book of Abstracts}, EDITOR = {Boschetti, F.}, } @INPROCEEDINGS{BARTOLINI_2016_INPROCEEDINGS_BPGGB_362848, AUTHOR = {Bartolini, R. and Pardelli, G. and Goggi, S. and Giannini, S. and Biagioni, S.}, TITLE = {A terminological "journey" in the Grey Literature domain}, YEAR = {2016}, ABSTRACT = {"When we read the articles or papers of a particular domain, we can recognize some lexical items in the texts as technical terms. In a domain where new knowledge is generated, new terms are constantly created to fulfil the needs of the domain, while others become obsolete. In addition, existing terms may undergo changes of meaning..." (Kageura K.,1998/1999). According to Kaugera, our aim with this work is to make a "journey" in the Grey Literature (GL) domain in order to offer an overall vision on the terms used and the links" "between them. Moreover, by performing a terminological comparison over a given period of time it could be possible to trace the presence of obsolete words as well as of neologisms in the most recent research fields.Within this scenario, the work analyzes a corpus constituted of the entire amount of full" "research papers published in the GL conference series over a time span of more than one decade (2003-2014) with the aim of creating a terminological map of relevant words. "... corpora used to extract terminological units can be further investigated to find semantic and conceptual information on terms or to represent conceptual relationships between terms. (Bourigault D. et al., 2001). Another interesting inquiry is the terminology used in the GL conferences for describing the types of documents (Pej?ová P. et al., 2012). The work is split up in four sections: creation of the corpus by acquiring the digital papers of GL conference proceedings (GL5 - GL16)1; data cleaning; data processing; terminological" "analysis and comparison. The corpus - made up of 231 research papers (for a total amount of 785.042 tokens) - was processed using a Natural Language Processing (NLP) tool for term extraction developed at the Institute of Computational Linguistics "Antonio Zampolli" of CNR (Goggi et al. 2015; 2016). This tool is what is called a "pipeline" (that is, a sequence of different tools) which extracts lexical knowledge from texts: in short, this is a rule system tool for knowledge extraction and document indexing that combines NLP technologies for term extraction and techniques to measure the associative strength of multi-words. This tool extracts a list of single (monograms) and multi-word terms (bigrams and trigrams) ordered by frequency with respect to the context. The pipeline - used as semantic engine within the MAPS project - has been customized for the extraction of terms from our corpus. This survey on the results of the information extraction process performed by the described NLP tool has been a sort of linguistic path in the past and present of terminology used in GL proceedings. By means of samplings, it has been possible to obtain the terminological flow in GL domain and to determine if and how the lexicon was evolving over these twelve years and investigate on its dynamic nature.}, KEYWORDS = {Grey Literature, Digital Repositories, Open Access}, PAGES = {79-84}, URL = {https://publications.cnr.it/doc/362848}, VOLUME = {18}, ISBN = {978-90-77484-29-6}, CONFERENCE_NAME = {GL18-Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {New York, US}, CONFERENCE_DATE = {28-29 November 2016}, BOOKTITLE = {Leveraging Diversity in Grey Literature}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{DELGROSSO_2016_INPROCEEDINGS_DBMG_360640, AUTHOR = {Del Grosso, A. M. and Boschetti, F. and Marchi, S. and Giovannetti, E.}, TITLE = {Vantaggi dell'Astrazione attraverso l'Approccio Orientato agli Oggetti per il Digital Scholarly Editing}, YEAR = {2016}, KEYWORDS = {Object Oriented Design, ADT, Digital Textual Scholarship}, URL = {http://www.himeros.eu/aiucd2016/c33.pdf}, DOI = {10.6092/unibo/amsacta/5559}, ISBN = {978-88-942535-0-4}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/09/2016}, } @INPROCEEDINGS{DOLOWYRYBINSKA_2016_INPROCEEDINGS_DS_354799, AUTHOR = {Dolowy Rybinska, N. and Soria, C.}, TITLE = {Surveying the ethnolinguistic vitality of two regional collateral languages: the case of Kashubian and Piedmontese}, YEAR = {2016}, ABSTRACT = {The paper presents the results of a Polish-Italian research project concerning the vitality of two regional collateral languages: Kashubian in Poland and Piedmontese in Italy. Despite their diffeent status (Kashubian is a language recognised under the Polish law while Piedmontese is not), they are both perceived as dialects of the State language by the inhabitants of Poland and Italy. The status and prestige of both languages in their respective countries are low; consciousness about the importance of their maintenance within the communities and outside them is weakening. As they belong to the same language family as the dominant language they were/are treated as dialects of the State languages not worth of preservation. Current accounts of language vitality for Kashubian and Piedmontese are not entirely satisfactory in that they seem to overestimate the importance of the number of speakers over speakers' attitudes and stigma. In this paper, we will present the preliminary results of the survey, focusssing on the interdependence between actual and perceived use of the two languages on the one side, and different ethnolinguistic vitality parameters, such as self-assessment of language proficiency, awareness of the language institutional status and policies, attitudes towards the language, and language ideology. This research is a pilot study that aims to raise the discussion on current assessment of ethnolinguistic vitality and to broaden it to other languages that are contested, unrecognized or treated as dialects of the State languages.}, KEYWORDS = {regional and minority languages ethnolinguistic vitality study}, URL = {https://publications.cnr.it/doc/354799}, CONFERENCE_NAME = {Contested Languages in the Old World #2}, CONFERENCE_PLACE = {Torino}, CONFERENCE_DATE = {5-6/05/2016}, } @INPROCEEDINGS{FRONTINI_2016_INPROCEEDINGS_FCG_357603, AUTHOR = {Frontini, F. and Carmen, B. and Ganascia, J. G.}, TITLE = {REDEN ONLINE: Disambiguation, Linking and Visualisation of References in TEI Digital Editions}, YEAR = {2016}, KEYWORDS = {entity linking, visualization, literary criticism, TEI}, URL = {http://dh2016.adho.org/abstracts/362}, CONFERENCE_NAME = {Digital Humanities 2016}, CONFERENCE_PLACE = {Jagiellonian University \& Pedagogical University, Kraków}, CONFERENCE_DATE = {11-16/07/2016}, BOOKTITLE = {Digital Humanities 2016: Conference Abstracts}, } @INPROCEEDINGS{MANZELLA_2016_INPROCEEDINGS_MBBDDFMMMNS_355476, AUTHOR = {Manzella, G. M. R. and Bartolini, R. and Bustaffa, F. and D'Angelo, P. and De Mattei, M. and Frontini, F. and Maltese, M. and Medone, D. and Monachini, M. and Novellino, A. and Spada, A.}, TITLE = {Marine Planning and Service Platform: Specific Ontology Based semantic Search Engine Serving Data Management and Sustainable Development}, YEAR = {2016}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is aiming at building a computer platform supporting a Marine Information and Knowledge System. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. In oceanography the cost related to data collection is very high and the new paradigm is based on the concept to collect once and re-use many times (for re-analysis, marine environment assessment, studies on trends, etc). This concept requires the access to quality controlled data and to information that is provided in reports (grey literature) and/or in relevant scientific literature. Hence, creation of new technology is needed by integrating several disciplines such as data management, information systems, knowledge management...}, KEYWORDS = {Marine Information, Knowledge System}, PAGES = {2}, URL = {http://meetingorganizer.copernicus.org/EGU2016/orals/20144}, VOLUME = {18}, PUBLISHER = {Copernicus GmbH (Katlenburg-Lindau, Germania)}, ISSN = {1607-7962}, CONFERENCE_NAME = {European Geosciences Union General Assembly (EGU 2016)}, CONFERENCE_PLACE = {Vienna, Austria}, CONFERENCE_DATE = {17-22 aprile 2016}, BOOKTITLE = {Geophysical research abstracts (Online)}, } @INPROCEEDINGS{MONACHINI_2016_INPROCEEDINGS_M_368274, AUTHOR = {Monachini, M.}, TITLE = {CLARIN-IT The Italian Common Language Resources and Technology Infrastructure CLARIN-IT: l'infrastruttura di ricerca per le scienze umane e sociali}, YEAR = {2016}, ABSTRACT = {The CLARIN-IT National Coordinator presented a keynote CLARIN-IT, l'Infrastruttura di Ricerca per le Scienze Umane e Sociali, in the 5th Annual Conference of the Associazione per l'Informatica Umanistica e la Cultura Digitale (AIUCD) held in Venezia from 7th to 9th September 2016. It is time for research infrastructures to be able to guarantee interoperability and integration between the instruments for philological studies and the instruments for the analysis of large textual corpora, breaking down the rigid barriers between digital and computational philology, on the one hand, and corpus linguistics on the other hand. Programma: https://docs.google.com/viewer?a=v\&pid=sites\&srcid=dW5pdmUuaXR8YWl1Y2QyMDE2fGd4OjIyMDhhMzk2ODk0MjUyNDQ}, KEYWORDS = {CLARIN-IT, scienze umane e sociali}, URL = {http://www.clarin-it.it/en/content/clarin-it-aiucd-2016}, CONFERENCE_NAME = {5th Annual Conference of the Associazione per l'Informatica Umanistica e la Cultura Digitale (AIUCD)}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {7th to 9th September 2016}, } @INPROCEEDINGS{MONACHINI_2016_INPROCEEDINGS_M_382195, AUTHOR = {Monachini, M.}, TITLE = {Infrastrutture e ricerca nel settore umanistico}, YEAR = {2016}, ABSTRACT = {L'informatica applicata allo studio del testo ha una lunga storia che parte dagli anni '50 dello scorso secolo. Nel corso del tempo allo sviluppo di risorse e strumenti prevalentemente pensati per l'analisi linguistica, come ad esempio la lemmatizzazione, si sono affiancati metodi, risorse e strumenti più squisitamente filologici, come la codifica delle varianti, i repertori digitali di molteplici edizioni del medesimo testo e gli strumenti per l'allineamento automatico delle stesse. Tuttavia la conoscenza di queste tecniche ed il loro utilizzo è ancora piuttosto limitato nel panorama degli studi italiani di filologia classica, nonostante alcune acquisizioni concettuali e tecniche di prima grandezza nel settore siano dovute a studiosi italiani. Il workshop si propone di discutere temi e problematiche attinenti la filologia digitale sulla scorta di una rassegna del settore iniziata all'università di Parma con la collaborazione del CNR-ILC di Pisa: o Motivazioni per l'adozione di tecniche di filologia digitale nel campo della ricerca e dell'insegnamento. Barriere al loro utilizzo. o I risultati di una rassegna sull'impiego di tecniche di filologia digitale da parte di studiosi italiani sulla base di un questionario o Diffusione della conoscenza del settore in Italia o Correnti e future tematiche di ricerca}, KEYWORDS = {Digital Humanities, Computational Philology}, URL = {https://publications.cnr.it/doc/382195}, CONFERENCE_NAME = {Utilizzo e diffusione di metodi, strumenti e tecnologie digitali per gli studi filologici: l'applicazione della filologia digitale al greco antico}, CONFERENCE_PLACE = {Parma}, CONFERENCE_DATE = {10. 10. 2016}, } @INPROCEEDINGS{MONACHINI_2016_INPROCEEDINGS_MEF_368272, AUTHOR = {Monachini, M. and Enea, A. and Frontini, F.}, TITLE = {CLARIN-IT: servizi per la comunità italiana delle scienze umane e sociali}, YEAR = {2016}, ABSTRACT = {CLARIN-IT -The Italian Common Language Resources and Technology Infrastructure: Monica Monachini - CLARIN Italian National Coordinator Alessandro Enea - Responsible of ILCforCLARIN \& contact person for IDEM Francesca Frontini - Standing Committee for CLARIN Technical Centres (SCCTC) ILC-CNR National Representative}, KEYWORDS = {CLARIN-IT, The Italian Common Language Resources and Technology Infrastructure}, URL = {http://www.clarin-it.it/en/content/clarin-it-idem-day-2016}, CONFERENCE_NAME = {CLARIN-IT @ IDEM Day 2016}, CONFERENCE_PLACE = {Roma [Università degli Studi di Roma Tre]}, CONFERENCE_DATE = {6-8 giugno 2016}, } @INPROCEEDINGS{NAHLI_2016_INPROCEEDINGS_NBAT_363709, AUTHOR = {Nahli, O. and Boschetti, F. and Arrigoni, S. and Tessarolo, L.}, TITLE = {Il corpus di testi arabi in Memorata Poetis}, YEAR = {2016}, KEYWORDS = {letteratura araba, temi e motivi}, PAGES = {157-162}, URL = {http://www.himeros.eu/aiucd2016/c03.pdf}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/9/2016}, BOOKTITLE = {AIUCD 2016-Book of Abstracts}, EDITOR = {Boschetti, F.}, } @INPROCEEDINGS{PARDELLI_2016_INPROCEEDINGS_PGMBR_362073, AUTHOR = {Pardelli, G. and Goggi, S. and Monachini, M. and Bartolini, R. and Russo, I.}, TITLE = {A Geographical Visualization of GL Community: a Snapshot}, YEAR = {2016}, ABSTRACT = {"Today, in the spirit of science, grey literature communities are called to demonstrate their know-how and merit to wider audiences" [Farace Dominic J., 2011]. This quotation stresses the important role of the several international organizations in producing and disseminating knowledge in the field of Grey Literature (GL): the paper aims to provide a first snapshot of the geographical distribution of GL organizations and their participation to the annual International Conference on Grey Literature over the time (in the period from 2003 to 2015). Nowadays a visual representation of data is often associated with the traditional statistical graphs, in particular for representing complex phenomena by means of maps and diagrams, which allow a deeper and more focused analysis of the data. In our case the geographical representation of stakeholders in government, academics, business and industry aims at visualizing the GL community across the globe: it concerns 675 organizations which over the years have contributed to the development of a common vision on the most pressing issues of the field by using new paradigms such as Open Acces and the social networks.}, KEYWORDS = {Geographical Visualization, Grey Literature}, PAGES = {67-67}, URL = {https://publications.cnr.it/doc/362073}, VOLUME = {18}, ISBN = {978-90-77484-29-6}, CONFERENCE_NAME = {Eighteenth International Conference on Grey Literature: Leveraging Diversity in Grey Literature}, CONFERENCE_PLACE = {New York}, CONFERENCE_DATE = {November 28-29, 2016}, BOOKTITLE = {GL18 Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{RECCHIA_2016_INPROCEEDINGS_RFMD_362391, AUTHOR = {Recchia, V. and Ferro, M. and Maglie, R. and Dodaro, A.}, TITLE = {Readability of current patient information leaflets for informed consent in UK radiotherapy centers}, YEAR = {2016}, ABSTRACT = {Background: Guidelines on informed consent recommend the use of plain language and readability standards to enhance patient's comprehension, engagement and shared decision making. Aim: To assess the readability of current patient information leaflets (PILs) used for informed consent in radiotherapy. Methods: We evaluated PILs (n=38) from three radiation therapy centers in UK. They regard the most common radiation therapy techniques for different kinds of cancer and body disctricts, such as bladder, bowel, colo-rectum, brain, breast-chest, femal pelvis, prostate, lung, linphomas, stomach. We analyzed each text with Flesch-Kincaid (F-K) grade level, with higher numbers indicating harder-to-read text (from 0 = easy, to 25 = difficult). Then, we compared the related grade levels to the health literacy recommended standard of US grade level 5, indicating that patient education texts might be understood by a typical student in the US primary school. Results: Readibility is suboptimal for the analised PILs (red, green and blue points in the figure) and should be improved with respect to the international standard score (red dotted line in the figure). The results show a mean grade level equal to 8.1 (std = 0.8), thus suggesting the need of a 3-points decrease on average. Conclusion: Current PILs for informed consent in the three analised radiotherapy centers are hardly readable for the average patient. Although the readability scores achieved in the three centers is not very low, substantially higher readability scores should be achieved with novel PILs which explicitly discuss risks/benefits and other elements relevant for informed consent, and should be prepared by following standard recommendations of plain language.}, KEYWORDS = {Ethics and communication, Communicating Risk and Uncertainty, Health Literacy}, PAGES = {1}, URL = {http://www.communication.aau.dk/research/dihm/events/comet2016/}, CONFERENCE_NAME = {COMMUNICATION, MEDICINE AND ETHICS CONFERENCE 2016}, CONFERENCE_PLACE = {Aalborg, Denmark}, CONFERENCE_DATE = {4-6/6/2016}, } @INPROCEEDINGS{RIZZETTO_2016_INPROCEEDINGS_RTFPTBS_363705, AUTHOR = {Rizzetto, M. and Trevisiol, A. and Falcone, D. and Pilon, N. and Tomè, P. and Boschetti, F. and Springmann, U.}, TITLE = {Nuove frontiere delle Digital Humanities in classe: esperienze dal campo}, YEAR = {2016}, KEYWORDS = {didattica, ocr, latino umanistico}, PAGES = {119-122}, URL = {http://www.himeros.eu/aiucd2016/c32.pdf}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/9/2016}, BOOKTITLE = {AIUCD 2016-Book of Abstracts}, EDITOR = {Boschetti, F.}, } @INPROCEEDINGS{SASSOLINI_2016_INPROCEEDINGS_SCC_382394, AUTHOR = {Sassolini, E. and Cucurullo, S. and Cinini, A.}, TITLE = {I corpora digitali: dall'obsolescenza tecnologica, alla salvaguardia e alla condivisione}, YEAR = {2016}, ABSTRACT = {Il progetto di recupero, nato pochi anni fa come iniziativa fortemente voluta da ILC, prosegue oggi con la collaborazione di molte istituzioni pubbliche e private, impegnate sullo stesso fronte. Approccio al recupero a tappe: inizialmente lavorando su testi che erano stati prodotti per essere indicizzati con le prime procedure di analisi testuale presenti all'ILC sin dalla fine degli anni '70 del secolo scorso. Definizione di criteri da adottare per la scelta dei testi, basati sullo studio di casi significativi e sull'importanza dei materiali, spesso legati alla realizzazione di autorevoli progetti nazionali e internazionali.}, KEYWORDS = {recupero testi, conversione in formato XML, valorizzazione dei risultati}, PAGES = {1-3}, URL = {https://www.eventi.garr.it/it/conf16/home/materiali-conferenza-2016/paper}, CONFERENCE_NAME = {Conferenza GARR 2016-The CreActive Network}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {30/11/2016-02/12/2016}, } @INPROCEEDINGS{STANZIONE_2016_INPROCEEDINGS_SRMTBD_363706, AUTHOR = {Stanzione, A. and Re, G. and Mugelli, G. and Taddei, A. and Boschetti, F. and Del Gratta, R.}, TITLE = {Homeric Greek WordNet: costruire una risorsa lessico-semantica fra ricerca e didattica}, YEAR = {2016}, KEYWORDS = {wordnet, semantica, greco antico}, PAGES = {129-132}, URL = {http://www.himeros.eu/aiucd2016/c40.pdf}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/9/2016}, BOOKTITLE = {AIUCD 2016-Book of Abstracts}, EDITOR = {Boschetti, F.}, } @TECHREPORT{CARLINO_2016_TECHREPORT_C_483689, AUTHOR = {Carlino, M.}, TITLE = {Rapporto annuale 2015 del CNR-ILC}, YEAR = {2016}, ABSTRACT = {Rapporto Annuale 2015 del Cnr-Istituto di Linguistica Computazionale "Antonio Zampolli" (CNR-ILC)}, KEYWORDS = {CNR-ILC, Annual Report, Rapporto Annuale, ILC, Istituto di Linguistica Computazionale, Zampolli, Activity report}, PAGES = {1-50}, URL = {https://publications.cnr.it/doc/483689}, } @MISC{BARONI_2016_MISC_BA_483771, AUTHOR = {Baroni, P. and Affè, F.}, TITLE = {ILC4CLARIN Web Site}, YEAR = {2016}, ABSTRACT = {Sito Web dello ILC4CLARIN Centre at the Institute for Computational Linguistics, realizzato con WordPress, sviluppato in italiano e inglese}, KEYWORDS = {CLARIN, Metadata Providing Centre, Service Providing Centre}, URL = {https://ilc4clarin.ilc.cnr.it}, } @MISC{DELGROSSO_2016_MISC_D_390558, AUTHOR = {Del Grosso, A. M.}, TITLE = {il progetto "Cultura Digitale": Promozione alla cittadinanza Digitale}, YEAR = {2016}, ABSTRACT = {Il contributo presenta il lavoro di allineamento e traduzione svolto con strumenti digitali dagli studenti del Liceo Classico Medi-Livatino di San Marco dei Cavoti (BN) su una selezione di passi tratti dall'opera di Ippocrate "Sulle Arie, Sulle Acque, e Sui Luoghi".}, KEYWORDS = {digital humanities, cultura digitale, tools, scuola digitale}, URL = {https://publications.cnr.it/doc/390558}, } @MISC{DELGROSSO_2016_MISC_D_355220, AUTHOR = {Del Grosso, A. M.}, TITLE = {Modelli concettuali e architetture Object-Oriented per la progettazione e lo sviluppo di una Digital Scholarly Platform}, YEAR = {2016}, ABSTRACT = {"OOP e DH": incontro possibile? E' possibile modellare il dominio relativo allo studio scientifico del testo attraverso l'approccio Object-Oriented? Cosa implica, dal punto di vista metodologico e tecnologico, progettare e sviluppare strumenti modulari e riusabili per l'analisi scientifica di risorse testuali? L'intervento esaminerà questi e ad altri temi di natura ingegneristica nell'ambito delle Digital Humanities. Il seminario, quindi, ripercorrerà alcuni risultati ottenuti all'interno della linea di ricerca presente presso l'ILC-CNR orientata allo sviluppo di componenti software per sistemi Web di linguistica e filologia computazionale volti al trattamento di testi di tradizione medievale, a stampa e di autori moderni e contemporanei. La progettazione di strumenti computazionali nel dominio delle Digital Humanities deve necessariamente rispondere alle esigenze di diverse tipologie di utenti: 1) l'utente generico, inteso come persona curiosa oppure pubblico poco esperto che ha comunque competenze per arricchire il prodotto digitale; 2) l'utente accademico, inteso come lo studente, il docente, o il ricercatore, in grado di aumentare la significatività, l'espressività e la rilevanza della risorsa digitale, 3) il programmatore, inteso come sviluppatore di applicazioni nel campo dell'Informatica Umanistica. Durante il seminario si introdurranno le basi della modellazione Object-Oriented a partire da concreti casi di studio ed esperienze maturate in progetti di ricerca nazionali ed internazionali. In particolare, si illustreranno gli esiti di alcune iniziative: oil progetto "Clavius On the Web", finanziato dal Registro.it, che vede coinvolti due istituti del CNR di Pisa (IIT-CNR e ILC-CNR) e l'Archivio Storico della Pontificia Università Gregoriana (APUG); oil progetto "Greek into Arabic", finanziato dall'European Resource Council e diretto dalla Prof.ssa Cristina D'Ancona dell'Università di Pisa; oil progetto PRIN "Per un'edizione dei manoscritti di F. De Saussure", diretto dal Prof. Daniele Gambarara dell'Università della Calabria. oil progetto "Talmud" finanziato dal MIUR e partecipato dal CNR, dall'Unione delle Comunità Ebraiche e dal Collegio Rabbinico Italiano.}, KEYWORDS = {Object-Oriented, UML, Filologia Computazionale, Literary Computing, Digital Humanities}, URL = {http://www.labcd.unipi.it/seminari/angelo-mario-del-grosso-modelli-concettuali-e-architetture-object-oriented-per-la-progettazione-e-lo-sviluppo-di-una-digital-scholarly-platform/}, } @MISC{NAHLI_2016_MISC_N_390724, AUTHOR = {Nahli, O.}, TITLE = {Corpus dei testi arabi in "Memorata Poestis"}, YEAR = {2016}, ABSTRACT = {Corpus epigrafico arabo, per motivi didattici e scientifici, i testi sono stati vocalizzati e tradotti in italiano.}, KEYWORDS = {Poesia, Epigrafi, Memorata Poetis, lingua araba}, URL = {http://www.memoratapoetis.it/public/}, } @ARTICLE{ATTARDI_2015_ARTICLE_ABBCDMPSS_366713, AUTHOR = {Attardi, G. and Basile, V. and Bosco, C. and Caselli, T. and Dell'Orletta, F. and Montemagni, S. and Patti, V. and Simi, M. and Sprugnoli, R.}, TITLE = {State of the Art Language Technologies for Italian: The EVALITA 2014 Perspective}, YEAR = {2015}, ABSTRACT = {Shared task evaluation campaigns represent a well established form of competitive evaluation, an important opportunity to propose and tackle new challenges for a specific research area and a way to foster the development of benchmarks, tools and resources. The advantages of this approach are evident in any experimental field, including the area of Natural Language Processing. An outlook on state-of-the-art language technologies for Italian can be obtained by reflecting on the results of the recently held workshop "Evaluation of NLP and Speech Tools for Italian", EVALITA 2014. The motivations underlying individual shared tasks, the level of knowledge and development achieved within each of them, the impact on applications, society and economy at large as well as directions for future research will be discussed from this perspective.}, KEYWORDS = {Evaluation Campaign, Natural Language Processing, Dependency Parsing, Sentiment Analysis, Temporal Processing}, PAGES = {43-61}, URL = {https://publications.cnr.it/doc/366713}, VOLUME = {9}, DOI = {10.3233/IA-150076}, PUBLISHER = {Associazione Italiana per l'Intelligenza Artificiale (Bari, Italia)}, ISSN = {1724-8035}, JOURNAL = {Intelligenza Artificiale}, } @ARTICLE{BARBAGLI_2015_ARTICLE_BLDMV_357152, AUTHOR = {Barbagli, A. and Lucisano, P. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Il ruolo delle tecnologie del linguaggio nel monitoraggio dell'evoluzione delle abilità di scrittura: primi risultati}, YEAR = {2015}, ABSTRACT = {L'ultimo decennio ha visto l'affermarsi a livello internazionale dell'uso di tecnologie del linguaggio per lo studio dei processi di apprendimento. Questo contributo riporta i primi e promettenti risultati di uno studio interdisciplinare che si è avvalso di metodi e tecniche di analisi propri della linguistica computazionale, della linguistica e della pedagogia sperimentale. Lo studio, finalizzato al monitoraggio dell'evoluzione del processo di apprendimento della lingua italiana, è stato condotto a partire dalle produzione scritte di studenti della scuola secondaria di primo grado con strumenti di annotazione linguistica automatica e di estrazione di conoscenza e ha portato all'identificazione di un insieme di tratti qualificanti il processo di apprendimento linguistico.}, KEYWORDS = {evoluzione delle competenze linguistiche, Didattica Sperimentale, Estrazione di conoscenza, Annotazione linguistica automatica}, PAGES = {99-117}, URL = {https://journals.openedition.org/ijcol/326}, DOI = {10.4000/ijcol.326}, PUBLISHER = {aAccademia University Press, Torino (Italia)}, ISSN = {2499-4553}, JOURNAL = {Italian Journal of Computational Linguistics}, } @ARTICLE{DELGRATTA_2015_ARTICLE_DFKM_287051, AUTHOR = {Del Gratta, R. and Frontini, F. and Khan, F. and Monachini, M.}, TITLE = {Converting the PAROLE SIMPLE CLIPS Lexicon into RDF with lemon}, YEAR = {2015}, ABSTRACT = {This paper describes the publication and linking of (parts of) PAROLE SIMPLE CLIPS (PSC), a large scale Italian lexicon, to the Semantic Web and the Linked Data cloud using the lemon model. The main challenge of the conversion is discussed, namely the reconciliation between the PSC semantic structure which contains richly encoded semantic information, following the qualia structure of the Generative Lexicon theory and the lemon view of lexical sense as a reified pairing of a lexical item and a concept in an ontology. The result is two datasets: one consists of a list of lemon lexical entries with their lexical properties, relations and senses; the other consists of a list of OWL individuals representing the referents for the lexical senses. These OWL individuals are linked to each other by a set of semantic relations and mapped onto the SIMPLE OWL ontology of higher level semantic types.}, KEYWORDS = {lemon, linked data, generative lexicon, RDF, OWL, lexical resource}, PAGES = {387-392}, URL = {http://www.semantic-web-journal.net/content/converting-parole-simple-clips-lexicon-rdf-lemon-0}, VOLUME = {6}, DOI = {10.3233/SW-140168}, PUBLISHER = {IOS Press (Amsterdam, Paesi Bassi)}, ISSN = {1570-0844}, JOURNAL = {Semantic web (Print)}, } @ARTICLE{GIANNINI_2015_ARTICLE_GBGP_329507, AUTHOR = {Giannini, S. and Biagioni, S. and Goggi, S. and Pardelli, G.}, TITLE = {Mapping Italian grey communities: what is there beyond the Academy?}, YEAR = {2015}, ABSTRACT = {This research aims at verifying whether - and eventually how much - the grey literature available on the web is actually structured, accessible or even managed by systems dealing with its organization and aiming at its retrieval and storing. The utmost goal is to build up a map of non-academic communities and their mechanisms for managing, presenting and disseminating this type of material. It is a sort of journey among the streams of the Web, which channel meeting minutes, manifests, fliers, pictures, newspapers articles, journalistic services and audio/video material on various topics. These "grey" products - by conveying basic information about social and popular culture - store, represent and spread knowledge.}, KEYWORDS = {Italian Grey Literature A. 1 INTRODUCTORY AND SURVEY}, PAGES = {17-28}, URL = {http://www.greynet.org/thegreyjournal.html}, VOLUME = {11}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{GOGGI_2015_ARTICLE_GMFBPDBM_334894, AUTHOR = {Goggi, S. and Monachini, M. and Frontini, F. and Bartolini, R. and Pardelli, G. and De Mattei, M. and Bustaffa, F. and Manzella, G.}, TITLE = {Marine Planning and Service Platform (MAPS) An Advanced Research Engine for Grey Literature in Marine Science}, YEAR = {2015}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting a Marine Information and Knowledge System, as part of the data management activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. We will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced search engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the great impact that the processing, re-use as well as application of grey data have on societal needs/problems and their answers.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {171-178}, URL = {https://publications.cnr.it/doc/334894}, VOLUME = {11}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{GOGGI_2015_ARTICLE_GPGBB_329873, AUTHOR = {Goggi, S. and Pardelli, G. and Giannini, S. and Biagioni, S. and Battisti, M.}, TITLE = {La littérature grise des projets de recherche européens}, YEAR = {2015}, ABSTRACT = {Les projets scientifiques financés par la Commission européenne produisent de la littérature grise. Une étude menée en 2013 sur 226 projets CNR du 7e programme-cadre (2007-2013) a analysé la typologie, le format et la disponibilité des documents signalés sur le serveur Cordis (rapports de recherche et articles scientifiques) et les sites projets (contenant listes de partenaires, brochures, communiqués,...}, KEYWORDS = {Grey Literature. European Commission Projects}, PAGES = {34-34}, URL = {http://www.cairn.info/revue-i2d-information-donnees-et-documents-2015-1-p-34.htm}, VOLUME = {52}, DOI = {10.3917/i2d.151.0034}, PUBLISHER = {A. D. B. S (Paris, Francia)}, ISSN = {0012-4508}, JOURNAL = {I2D-Information, données \& documents. Pratiques \& recherches}, } @ARTICLE{MARZI_2015_ARTICLE_MP_346413, AUTHOR = {Marzi, C. and Pirrelli, V.}, TITLE = {A Neuro-Computational Approach to Understanding the Mental Lexicon}, YEAR = {2015}, ABSTRACT = {Human lexical knowledge does not appear to be organised to minimise storage, but rather to maximise processing efficiency. The way lexical information is stored reflects the way it is dynamically processed, accessed and retrieved. A detailed analysis of the way words are memorised, of the dynamic interaction between lexical representations and distribution and degrees of regularity in input data, can shed some light on the emergence of structures and relations within fully-stored words. We believe that a bottom-up investigation of low-level memory and processing functions can help understand the cognitive mechanisms that govern word processing in the mental lexicon. Neuro-computational models can play an important role in this inquiry, as they help understand the dynamic nature of lexical representations by establishing an explanatory connection between lexical structures and processing models dictated by the micro-functions of human brain. Starting from some linguistic, psycholinguistic and neuro-physiological evidence supporting a dynamic view of the mental lexicon as an integrative system, we illustrate Temporal Self Organising-Maps (TSOMs), artificial neural networks that can model such a view by memorising time series of symbolic units (words) as routinized patterns of short-term node activation. On the basis of a simple pool of principles of adaptive Hebbian synchronisation, TSOMs can perceive possible surface relations between word forms and store them by partially overlapping activation patterns, reflecting gradient levels of lexical specificity, from holistic to decompositional lexical representations. We believe that TSOMs offer an algorithmic model of the emergence of high-level, global and language-specific morphological structure through the working of low-level, language-aspecific processing functions, thus promising to bridge the persisting gap between high-level principles of grammar architecture (lexicon vs. rules), computational correlates (storage vs. processing) and low-level principles and localisations of brain functions. Extensions of the current TSOM architecture are envisaged and their theoretical implications are discussed.}, KEYWORDS = {Mental lexicon dynamic storage parallel distributed processing hebbian learning temporal self-organising maps}, PAGES = {493-535}, URL = {http://jcs.snu.ac.kr/jcs/issue/vol16/no4/05+Marzi+and+Pirrelli.pdf}, VOLUME = {16}, PUBLISHER = {Institute for cognitive science, Seoul national university (Seoul, Corea del Sud)}, ISSN = {1976-6939}, JOURNAL = {Journal of cognitive science (Seoul. Online)}, } @ARTICLE{PICCINI_2015_ARTICLE_P_452104, AUTHOR = {Piccini, S.}, TITLE = {Transimpersonal constructions in Lithuanian: towards the emergence of Split Intransitivity}, YEAR = {2015}, ABSTRACT = {Transimpersonalines konstrukcijos pastaruoju metu yra patraukusios daugelio tyreju, ypac funkcines tipologijos specialistu, demesi, kadangi jos atlieka svarbu vaidmeni formuojantis dalinio intranzityvumo (ang. split intransitivity) modeliams ivairiose kalbose. Straipsnyje naujausiu pasiekimu sviesoje ivertinamos kai kurios lietuviu kalbos konstrukcijos, gramatikose laikomos beasmenemis (,,impersonalinemis"), meginant interpretuoti jas kaip transimpersonalines. Tyrimas atliktas is sinchronines perspektyvos, didziausia demesi skiriant dabartinei bendrinei kalbai, taciau tam tikrais atvejais lyginama ir su senosios lietuviu kalbos ir ypac tarmiu duomenimis. Analize remiasi fizine bukle nusakanciais veiksmazodziais, kurie priklausomai nuo reiksmes gali buti vartojami ivairiose sintaksinese konstrukcijose. Sinchroniname lygmenyje matomas skirtingas siu eksperienciniu veiksmazodziu elgesys gali buti projektuojamas diachronineje perspektyvoje. Taip galima geriau isryskinti ivairius reanalizes proceso, rodancio laipsniska raida dalinio intranzityvumo atsiradimo kryptimi, etapus. Kai kurie veiksmazodziai, atrodo, reanalizes kelyje yra pazenge gana toli, taciau tikrieji dalinio intranzityvumo modeliai dar nesusiformave. Analizuojamuju veiksmazodziu eksperienciniu argumentu subjekto statusas sintakses poziuriu tebelieka problemiskas.}, KEYWORDS = {verbi impersonali, codifica non canonica del soggetto, lituano, intransitività scissa}, PAGES = {19-55}, URL = {http://www.baltistica.lt/index.php/baltistica/article/view/2239/2214}, VOLUME = {50}, DOI = {10.15388/baltistica.50.1.2239}, PUBLISHER = {Mintis; [poi] Vilniaus universiteto leidykla (Vilnius, Lituania)}, ISSN = {0132-6503}, JOURNAL = {Baltistica (Print)}, } @INCOLLECTION{BRANDO_2015_INCOLLECTION_BFG_334082, AUTHOR = {Brando, C. and Frontini, F. and Ganascia, J.}, TITLE = {Disambiguation of Named Entities in Cultural Heritage Texts Using Linked Data Sets}, YEAR = {2015}, ABSTRACT = {This paper proposes a graph-based algorithm baptized REDEN for the disambiguation of authors' names in French literary criticism texts and scientific essays from the 19th century. It leverages knowledge from different Linked Data sources in order to select candidates for each author mention, then performs fusion of DBpedia and BnF individuals into a single graph, and finally decides the best referent using the notion of graph centrality. Some experiments are conducted in order to identify the best size of disambiguation context and to assess the influence on centrality of specific relations represented as edges. This work will help scholars to trace the impact of authors' ideas across different works and time periods.}, KEYWORDS = {Named-entity disambiguation Centrality Linked data Data fusion Digital humanities}, PAGES = {505-514}, URL = {http://link.springer.com/chapter/10.1007%2F978-3-319-23201-0_51}, VOLUME = {539}, DOI = {10.1007/978-3-319-23201-0_51}, ISBN = {978-3-319-23200-3}, BOOKTITLE = {New Trends in Databases and Information Systems}, EDITOR = {Morzy, T. and Valduriez, P. and Bellatreche, L.}, } @INCOLLECTION{MARCHI_2015_INCOLLECTION_M_344710, AUTHOR = {Marchi, S.}, TITLE = {GREEK INTO ARABIC, A RESEARCH INFRASTRUCTURE BASED ON COMPUTATIONAL MODULES TO ANNOTATE AND QUERY HISTORICAL AND PHILOSOPHICAL DIGITAL TEXTS Part ii. System components and features}, YEAR = {2015}, ABSTRACT = {Computer technology nowadays allows users to build simple and effective tools designed to meet the needs of researchers and institutions in various fields of research. Since its reation, the World Wide Web prompted the existence of an environment that breaks down the boundaries of time (i.e. synchronous activity) and space (i.e. location of activities), a prerequisite for the design of tools enabling the collaboration among users. Over the past years text processing systems have become part and parcel of the daily language of scholars working in the field of Humanities, despite some objections raised against this type of technology because of their apparent lack of simplicity of usage, appropriateness, and flexibility. Usage requires special attention with respect to the interface between the information system and the user, while appropriateness and flexibility have not been sufficiently taken into account, not to mention that they two desiderata almost seem to be in contrast to each other. Therefore, it is not easy to plan and implement a text processing system which is suitable for specific types of research and at the same time as flexible as to operate in various fields of research.}, KEYWORDS = {textual scholarship, Collaborative Application, web application}, PAGES = {43-56}, URL = {http://www.olschki.it/libro/9788822263933}, VOLUME = {60}, PUBLISHER = {Leo S. Olschki (Firenze, ITA)}, ISBN = {9788822263933}, BOOKTITLE = {Digital texts, translations, lexicons in a multi-modular web application: methods and samples}, EDITOR = {Bozzi, A.}, } @INCOLLECTION{MORGAVI_2015_INCOLLECTION_MNMCFCM_333210, AUTHOR = {Morgavi, G. and Nerino, R. and Marconi, L. and Cutugno, P. and Ferraris, C. and Cinini, A. and Morando, M.}, TITLE = {An Integrated Approach to the Well-Being of the Elderly People at Home}, YEAR = {2015}, ABSTRACT = {The paper presents the outline and the preliminary developments of NINFA (iNtelligent Integrated Network For Aged people), a project for the well-being of the elderly people at home. This architecture is based on a service platform suited for elder people called the Virtual Village Network, whose user interface allows to deliver different services at home, namely: user supervision, communication and interaction among users for social inclusion, exergame delivering, monitoring of the wellness status.}, KEYWORDS = {ICT platform, Wellness network services, 3D movement analysis, Linguistic and cognitive analysis, exergames, "at-home" technologies}, PAGES = {265-274}, URL = {https://publications.cnr.it/doc/333210}, VOLUME = {XIV}, DOI = {10.1007/978-3-319-18374-9_25}, ISBN = {978-3-319-18373-2}, BOOKTITLE = {Springer-Ambient Assisted Living-Italian Forum 2014}, EDITOR = {Andò, P. B. and Siciliano, P. P. and Marletta, P. V. and Monteriù, P. A.}, } @INCOLLECTION{PIRRELLI_2015_INCOLLECTION_PFM_330234, AUTHOR = {Pirrelli, V. and Ferro, M. and Marzi, C.}, TITLE = {Computational complexity of abstractive morphology}, YEAR = {2015}, ABSTRACT = {Abstractive and constructive approaches to word structure make radically different assumptions concerning nature and role of the building blocks that make up a speaker's morphological competence. In this contribution, we show that the two views are also computationally different. In particular, we contend that a number of problems arising in connection with a subsymbolic implementation of the constructive view (as epitomised by classical multi-layered perceptrons) are tackled effectively, or disappear altogether, in a neurally-inspired implementation of associative networks, resting on key-notions such as self-organization and emergence. A particular variant of Kohonen's Self-Organizing Map is introduced as a model to explore and assess the implications of an abstractive approach in terms of its computational complexity. Details of the model (Temporal Self-Organizing Map, TSOM) and experimental data are shown to illustrate the interplay between processing and storage in language acquisition.}, KEYWORDS = {Word processing, computational complexity, mental lexicon, dynamic memories, self-organisation, word structure, morphology}, PAGES = {141-166}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84938781714\&origin=inward}, DOI = {10.1093/acprof:oso/9780198723769.003.0008}, PUBLISHER = {Oxford University Press (Oxford, GBR)}, ISBN = {978-0-19-872376-9}, BOOKTITLE = {Understanding and Measuring Mprphological Complexity}, EDITOR = {Baerman, M. and Brown, D. and Corbett, G. G.}, } @INCOLLECTION{SIMI_2015_INCOLLECTION_SMB_330110, AUTHOR = {Simi, M. and Montemagni, S. and Bosco, C.}, TITLE = {Harmonizing and merging Italian treebanks: Towards a merged Italian dependency treebank and beyond}, YEAR = {2015}, ABSTRACT = {In this paper we address the challenge of combining existing CoNLL-compliant dependency-annotated corpora with the final aim of constructing a bigger treebank for the Italian language. To this end, we defined amethodology formapping different annotation schemes, based on: (i)The analysis of similarities and differences of considered source and target dependency annotation schemes; (ii) The analysis of the performance of state of the art dependency parsers trained on the source and target treebanks; (iii) The mapping of the source annotation scheme(s) onto a set of target (possibly underspecified) data categories. This methodology was applied in two different case studies. The first one was aimed at constructing a "Merged Italian Dependency Treebank" (MIDT) starting from existing Italian dependency treebanks, namely TUT and ISST-TANL. The second case study, still ongoing, consists in the conversion of the MIDT resource into the Stanford Dependencies de facto standard with the final aim of developing an "Italian Stanford Dependency Treebank" (ISDT).}, KEYWORDS = {Harmonization and merging of resources, Italian, Dependency Treebank}, PAGES = {3-23}, URL = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84927143016\&partnerID=q2rCbXpz}, VOLUME = {589}, DOI = {10.1007/978-3-319-14206-7_1}, PUBLISHER = {Springer International Publishing (CH-6330 Cham (ZG), CHE)}, ISBN = {978-3-319-14205-0}, BOOKTITLE = {Harmonization and Development of Resources and Tools for Italian Natural Language Processing within the PARLI Project}, EDITOR = {Basili, R. and Bosco, C. and Delmonte, R. and Moschitti, A. and Simi, M.}, } @INCOLLECTION{SORIA_2015_INCOLLECTION_S_333636, AUTHOR = {Soria, C.}, TITLE = {Assessing the effect of official recognition on the vitality of minority and regional languages: a case study from Italy}, YEAR = {2015}, ABSTRACT = {In 1999, a rather controversial Italian law granted official recognition to twelve endangered regional and minority languages but denied it to others that were nevertheless also classed as endangered by UNESCO and the Ethnologue. This turn of events has produced a perfect scenario to assess the impact of language policies on protected languages and, at the same time, the effects of lack of official protection and recognition for languages that are denied such institutional support. This chapter presents the results of a survey carried out among speakers of these endangered languages. It assesses their vitality in terms of speaker numbers, domains of use, intergenerational transmission and speaker attitudes, arguing that a correlation can be established, on the one hand, between positive speaker attitudes and favourable language policies and, on the other, between lack of policy support and negative language attitudes. The chapter further argues language policy can actually alter linguistic behaviour.}, KEYWORDS = {language policy, endangered languages, regional languages}, PAGES = {123-137}, URL = {https://publications.cnr.it/doc/333636}, PUBLISHER = {Cambridge university press (Cambridge, GBR)}, ISBN = {978-1-107-09922-7}, BOOKTITLE = {Policy and Planning for Endangered Languages}, EDITOR = {Jones, M. C.}, } @EDITORIAL{PIRRELLI_2015_EDITORIAL_PMF_329357, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, TITLE = {Proceedings of the NetWordS Final Conference on Word Knowledge and Word Usage: Representations and Processes in the Mental Lexicon}, YEAR = {2015}, ABSTRACT = {The international conference "Word Knowledge and Word Usage: Representations and processes in the mental lexicon" is the final outcome of 4 years of intense multi-disciplinary research networking and cooperation funded by the European Science Foundation within the framework of the NetWordS programme (May 2011 - April 2015). NetWordS' mission was to bring together experts of various research fields (from brain sciences and computing to cognition and linguistics) and of different theoretical inclinations, to advance the current awareness of theoretical, typological, psycholinguistic, computational and neurophysiological evidence on the structure and processing of words, with a view to developing novel research paradigms and bringing up a new generation of language scholars. The conference was intended to provide a first forum for assessing current progress of crossdisciplinary research on language architecture and usage, and discussing prospects of future synergy. People are known to memorise, parse and access words in a context-sensitive and opportunistic way, by caching their most habitual and productive processing patterns into routinized behavioural schemes. Speakers not only take advantage of token-based information such as frequency of individual, holistically stored words, but they are also able to organise stored words through paradigmatic structures (or word families) whose overall size and frequency is an important determinant of ease of lexical access and interpretation. Accordingly, lexical organisation is not necessarily functional to descriptive economy and minimisation of storage, but to more performance-oriented factors such as efficiency of memorisation, access and recall. Usage-based approaches to word processing lend support to this view, to promote explanatory frameworks that aim to investigate the stable correlation patterns linking distributional entrenchment of lexical units with productivity, internal structure and ease of interpretation. Ultimately, this is intended to establish a deep interconnection between performance-oriented,low-level lexical functions such as memorisation, rehearsal, access and recall, and their neuroanatomical correlates.}, KEYWORDS = {mental lexicon, linguistics, brain sciences, psycholinguistics, computing, cognition}, PAGES = {1-189}, URL = {http://ceur-ws.org/Vol-1347/}, VOLUME = {1347}, PUBLISHER = {CEUR-WS. org (Aachen, DEU)}, } @INPROCEEDINGS{ALBANESI_2015_INPROCEEDINGS_ABBDG_332922, AUTHOR = {Albanesi, D. and Bellandi, A. and Benotto, G. and Di Segni, G. and Giovannetti, E.}, TITLE = {When Translation Requires Interpretation: Collaborative Computer-Assisted Translation of Ancient Texts}, YEAR = {2015}, ABSTRACT = {This paper introduces the main features of Traduco, a Web-based, collaborative Computer-Assisted Translation (CAT) tool developed to support the translation of ancient texts. In addition to the standard components offered by traditional CAT tools, Traduco includes a number of features designed to ease the translation of ancient texts, such as the Babylonian Talmud, posing specific structural, stylistic, linguistic and hermeneutical challenges.}, KEYWORDS = {Computer-Assisted Translation, Babylonian Talmud}, PAGES = {84-88}, URL = {https://publications.cnr.it/doc/332922}, ISBN = {978-1-941643-63-1}, CONFERENCE_NAME = {9th SIGHUM Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH 2015)}, CONFERENCE_PLACE = {Bejing}, CONFERENCE_DATE = {July 30, 2015}, BOOKTITLE = {Proceedings of the 9th SIGHUM Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities}, } @INPROCEEDINGS{BARBAGLI_2015_INPROCEEDINGS_BLDMV_357146, AUTHOR = {Barbagli, A. and Lucisano, P. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {CItA: un Corpus di Produzioni Scritte di Apprendenti l'Italiano L1 Annotato con Errori}, YEAR = {2015}, ABSTRACT = {In questo articolo presentiamo CItA il primo corpus di produzioni scritte di apprendenti l'italiano L1 del primo e del secondo anno della scuola secondaria di primo grado annotato con errori grammaticali, ortografici e lessicali. Le specificità del corpus e la sua natura diacronica lo rendono particolarmente utile sia per applicazioni linguistico-computazionali sia per studi socio-pedagogici.}, KEYWORDS = {Apprendiemento della lingua madre, evoluzione delle competenze linguistiche}, PAGES = {31-35}, URL = {http://www.italianlp.it/wp-content/uploads/2016/03/CItA_errori.pdf}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {2nd Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 dicembre 2015}, } @INPROCEEDINGS{BELLANDI_2015_INPROCEEDINGS_BBG_282565, AUTHOR = {Bellandi, A. and Bellusci, A. and Giovannetti, E.}, TITLE = {Computer Assisted Translation of Ancient Texts: the Babylonian Talmud Case Study}, YEAR = {2015}, ABSTRACT = {In this paper we introduce some of the features of the Computer Assisted Translation web application developed to support the translation of the Babylonian Talmud (BT) in Italian. The BT is a late antique Jewish anthological corpus, which, as other ancient texts, presents a number of hurdles related to its intrinsic linguistic and philological nature. In this work, we illustrate the solutions we adopted in the system, with particular emphasis on the Translation Memory and the translation suggestion component.}, KEYWORDS = {computer-assisted translation, Babylonian Talmud, Translation Memory}, PAGES = {287-302}, URL = {https://www.degruyter.com/view/book/9781501501289/10.1515/9781501501289.287.xml}, PUBLISHER = {De Gruyter Saur (Berlin/Munich, DEU)}, ISBN = {9781501501289}, CONFERENCE_NAME = {NLPCS 2014: 11th International Workshop on Natural Language Processing and Cognitive Science}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {27-29 ottobre 2014}, BOOKTITLE = {Natural Language Processing and Cognitive Science, Proceedings 2014}, EDITOR = {Sharp, B. and Delmonte, R.}, } @INPROCEEDINGS{BRANDO_2015_INPROCEEDINGS_BFG_344351, AUTHOR = {Brando, C. and Frontini, F. and Ganascia, J.}, TITLE = {Linked data for toponym linking in French literary texts}, YEAR = {2015}, ABSTRACT = {The present article discusses first experiments in toponym linking of Modern French digital editions aiming to provide an external referent to Linked Data sources. We have so far focused on testing two knowledge bases - French DBpedia and Geonames - for recall. Results highlight quality issues in these data sets for usage in NLP-tasks in domain-specific heritage texts.}, KEYWORDS = {Named-Entity Linking Linked Data Digital Humanities}, URL = {https://publications.cnr.it/doc/344351}, DOI = {10.1145/2837689.2837699}, PUBLISHER = {Association for Computing Machinery (New York, N. Y, Stati Uniti d'America)}, ISSN = {1933-7825}, ISBN = {978-1-4503-3937-7}, CONFERENCE_NAME = {GIR'15 9th Workshop on Geographic Information Retrieval}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {26-27th November, 2015}, BOOKTITLE = {GIR '15 Proceedings of the 9th Workshop on Geographic Information Retrieval}, EDITOR = {Purves, R. S. and Jones, C. B.}, } @INPROCEEDINGS{BRUNATO_2015_INPROCEEDINGS_BD_359256, AUTHOR = {Brunato, D. and Dell'Orletta, F.}, TITLE = {ISACCO: a corpus for investigating spoken and written language development in Italian school-age children}, YEAR = {2015}, ABSTRACT = {We present ISACCO (Italian school-age children corpus)1, a new corpus of oral and written retellings of Italian speaking children attending the primary school. All texts were digitalized and automatically enriched with linguistic information allowing preliminary explorations based on NLP features. Written retellings were also manually annotated with a typology of linguistic errors. The resource is conceived to support research and computational modeling of "later language acquisition", with an emphasis for comparative assessment of oral and written language skills across early school grades.}, KEYWORDS = {Child language acquisition, Oral and written language, multi-level linguistic analysis}, PAGES = {62-66}, URL = {http://www.italianlp.it/wp-content/uploads/2016/03/IsaccoCorpus.pdf}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics (CLiC-it 2015)}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {03/12/2015-04/12/2015}, BOOKTITLE = {Proceedings of the Second Italian Conference on Computational Linguistics (CLiC-it 2015)}, EDITOR = {Bosco, C. and Tonelli, S. and Zanzotto, F. M.}, } @INPROCEEDINGS{BRUNATO_2015_INPROCEEDINGS_BDVM_332693, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {Design and Annotation of the First Italian Corpus for Text Simplification}, YEAR = {2015}, ABSTRACT = {In this paper, we present design and construction of the first Italian corpus for automatic and semi--automatic text simplification. In line with current approaches, we propose a new annotation scheme specifically conceived to identify the typology of changes an original sentence undergoes when it is manually simplified. Such a scheme has been applied to two aligned Italian corpora, containing original texts with corresponding simplified versions, selected as representative of two different manual simplification strategies and addressing different target reader populations. Each corpus was annotated with the operations foreseen in the annotation scheme, covering different levels of linguistic description. Annotation results were analysed with the final aim of capturing peculiarities and differences of the different simplification strategies pursued in the two corpora.}, KEYWORDS = {Annotation Scheme, Automatic Text Simplification}, PAGES = {31-34}, URL = {https://aclweb.org/anthology/W/W15/W15-1604.pdf}, ISBN = {978-1-941643-47-1}, CONFERENCE_NAME = {Proceedings of LAW IX-The 9th Linguistic Annotation Workshop}, CONFERENCE_PLACE = {Denver, Colorado}, CONFERENCE_DATE = {5 giugno 2015}, } @INPROCEEDINGS{CHIARELLA_2015_INPROCEEDINGS_CBBCRZMC_336688, AUTHOR = {Chiarella, D. and Bibuli, M. and Bruzzone, G. and Caccia, M. and Ranieri, A. and Zereik, E. and Marconi, L. and Cutugno, P.}, TITLE = {Gesture-based Language for Diver-Robot Underwater Interaction}, YEAR = {2015}, ABSTRACT = {Underwater environment is characterized by harsh conditions and is difficult to monitor. The CADDY project deals with the development of a companion robot devoted to support and to monitor human operations and activities during the dive. In this scenario the communication and correct reception of messages between the diver and the robot are essential for success of the dive goals. However, the underwater environment poses a set of technical constraints hardly limiting the communication possibilities. For such reasons the solution proposed is to develop a communication language based on the consolidated and standardized diver gestures, commonly employed during professional and recreational dives, thus leading to the definition of a CADDY language, called CADDIAN, and a communication protocol. This article focuses on the creation of the language providing alphabet, syntax and semantics: future work will explain the part of recognition of gestures that is still in progress.}, KEYWORDS = {gesture language, human robot interaction, mobile robots, underwater environment, marine systems}, PAGES = {9}, URL = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=7271710\&filter=AND%28p_Publication_Number:7227859%29}, DOI = {10.1109/OCEANS-Genova.2015.7271710}, CONFERENCE_NAME = {OCEANS 2015 MTS/IEEE-Genova}, CONFERENCE_PLACE = {Genova}, CONFERENCE_DATE = {18-21/05/2015}, } @INPROCEEDINGS{CHIARELLA_2015_INPROCEEDINGS_CCML_383465, AUTHOR = {Chiarella, D. and Cutugno, P. and Marconi, L. and Lucentini, R.}, TITLE = {Domain-specific languages: a gesture-based approach for Human Robot Interaction in underwater environments}, YEAR = {2015}, ABSTRACT = {This paper introduces a gesture-based language for Human Robot Interaction (HRI) specifically aimed to divers. Divers generally operate in environments with harsh conditions and, at the same time, difficult to monitor; in this scenario, any sudden event can create an emergency situation that may compromise the immersion or even turns into worse consequences involving the safety of divers themselves. To cope with such situations, standard procedures suggest to dive in pairs and to follow well-defined rules to avoid the risk of accidents. However, these procedures may not be sufficient to avoid dangerous events such as failure in the breathing apparatus, burst eardrum, decompression sickness and nitrogen narcosis. FP7 CADDY project was developed to overcome these problems, with the idea to transfer robotics technology in diving: the main aim is improving the level of safety during diving. CADDY project focuses, in fact, on the development of a companion robot designed to support human operations and activities during the dive, as well as to monitor the status of the diver and in such a way to prevent harmful events. Various problems have to be confronted to provide the diver a reliable and useful supporting robotic vehicle: one of them is the development of a communication and interaction methodology that allows the diver and the robot to cooperate actively for the fulfilment of tasks required when diving. Communication and correct reception of messages between the diver and underwater robot are essential for the success of the objectives of immersion. However, the underwater environment poses a number of difficult technical constraints limiting the possibilities of communication (electro-magnetic waves strong attenuation and signal scattering and dispersion). The most reliable solution for underwater communication is acoustic technology, with two main drawbacks: high prices of devices and very low data rates. To solve these issues, the solution proposed is the development of acommunication language (called CADDIAN) based, partly, on the consolidated and standardized diver gestures that are commonly employed during professional and recreational dives.}, KEYWORDS = {domain-specific languages, human robot interaction, gesture-based language, underwater communication}, PAGES = {12}, URL = {https://publications.cnr.it/doc/383465}, ISBN = {9789597152347}, CONFERENCE_NAME = {IX Conferencia Científica Internacional Lingüística}, CONFERENCE_PLACE = {La Habana Cuba}, CONFERENCE_DATE = {25-27/11/2015}, } @INPROCEEDINGS{CIGNONI_2015_INPROCEEDINGS_CFF_329387, AUTHOR = {Cignoni, L. and Fornaciari, G. and Fornaciari, A.}, TITLE = {Many hands make light work: collaborative CLIL activities for University courses in Medieval funerary archaeology}, YEAR = {2015}, ABSTRACT = {This paper describes the activities performed by the students of the course of funerary archaeology held at the Division of Palaeopathology of Pisa University in collaboration with the Institute for Computational Linguistics (ILC) of the National Research Council (CNR) in Pisa in the period April- June 2014. The lessons, which used a Content and Language Integrated Learning (CLIL) approach, were aimed at studying the funerary beliefs and burial practices in Italy and England in the Middle Ages. The 2014 course followed on from the courses of the year 2012 (focused on the more general issue of taphonomy; primary and secondary burials; single, double, or multiple burials), and 2013 (which examined the world of the ancient Romans and their burial customs of cremation and inhumation). The lessons were conducted by using extracts from self-contained specialized texts that were simple to read and that offered the basic concepts of medieval funerary archaeology. The students were supported by a reference text for funerary archaeology, which established the correct nomenclature to use when describing bodies, grave goods and tombs. Powerpoint slide presentations helped students break up the monotony of the text work and made the material more interesting and engaging. The slides were used to illustrate different types of burials in filled or empty spaces; the position of burials in both rural and urban environments; the disposition of the limbs in the burial; the rise of the Monasteries in the early Middle Ages and of the religious Orders of the Dominicans and Franciscans in the late Middle Ages. Each student was responsible for researching and reporting on a particular topic, and was supported by the use of information and communication techniques. Particular attention was devoted to the Books of Hours, important illuminated medieval manuscripts (containing psalms, short prayers and biblical quotations) that marked the different parts of the day and that were specifically composed for wealthy people. Classroom activities ranged from the simpler multi-matching and gap-filling exercises to the more complex tasks of providing definitions for given words, creating mind-maps, enriching a bilingual English-Italian glossary and providing contextualized examples for an English grammar book. Educational videos from the BBC or other channels and pertaining to the topics treated during the lessons were projected each time and were followed by direct questioning and more general conversation, to help students gain proficiency in oral communication. In the last three years, the Italian students from Pisa University have been working in collaboration with those of Ohio University on an excavation project carried out at the Field School in Medieval Archaeology and Bioarchaeology at Badia Pozzeveri (Lucca, Italy), to which the prestigious International journal SCIENCE dedicated a special issue and cover in December 2013. Finally, multidisciplinary elements were also included in the courses, by exploiting the information extracted from videos related to disciplines other than funerary archaeology, for example a BBC Channel 4 video describing the British meals of the day, the origins of which date back to medieval times.}, KEYWORDS = {CLIL, collaborative learning, medieval funerary archaeology, computer technology, archaeological field work}, PAGES = {2271-2279}, URL = {https://publications.cnr.it/doc/329387}, ISBN = {978-84-606-5763-7}, CONFERENCE_NAME = {9th International Technology, Education and Development Conference}, CONFERENCE_PLACE = {Madrid}, CONFERENCE_DATE = {2-4 marzo 2015}, } @INPROCEEDINGS{CRESCI_2015_INPROCEEDINGS_CCDT_337237, AUTHOR = {Cresci, S. and Cimino, A. and Dell'Orletta, F. and Tesconi, M.}, TITLE = {Crisis Mapping during Natural Disasters via Text Analysis of Social Media Messages}, YEAR = {2015}, ABSTRACT = {Recent disasters demonstrated the central role of social media during emergencies thus motivating the exploitation of such data for crisis mapping. We propose a crisis mapping system that addresses limitations of current state-of-the-art approaches by analyzing the textual content of disaster reports from a twofold perspective. A damage detection component employs a SVM classifier to detect mentions of damage among emergency reports. A novel geoparsing technique is proposed and used to perform message geolocation. We report on a case study to show how the information extracted through damage detection and message geolocation can be combined to produce accurate crisis maps. Our crisis maps clearly detect both highly and lightly damaged areas, thus opening up the possibility to prioritize rescue efforts where they are most needed.}, KEYWORDS = {crisis informatics, Emergency Management, geoparsing, social media mining, Twitter}, PAGES = {1-8}, URL = {https://publications.cnr.it/doc/337237}, PUBLISHER = {Springer (Berlin, Germania)}, ISSN = {0302-9743}, CONFERENCE_NAME = {Web Information Systems Engineering-WISE 2015}, CONFERENCE_PLACE = {Miami, USA}, CONFERENCE_DATE = {02/11/2015}, BOOKTITLE = {Lecture notes in computer science}, } @INPROCEEDINGS{CRESCI_2015_INPROCEEDINGS_CTCD_336952, AUTHOR = {Cresci, S. and Tesconi, M. and Cimino, A. and Dell'Orletta, F.}, TITLE = {A Linguistically-driven Approach to Cross-Event Damage Assessment of Natural Disasters from Social Media Messages}, YEAR = {2015}, ABSTRACT = {This work focuses on the analysis of Italian social media messages for disaster management and aims at the detection of messages carrying critical information for the damage assessment task. A main novelty of this study consists in the focus on out-domain and cross-event damage detection, and on the investigation of the most relevant tweet-derived features for these tasks. We devised different experiments by resorting to a wide set of linguistic features qualifying the lexical and grammatical structure of a text as well as ad-hoc features specifically implemented for this task. We investigated the most effective features that allow to achieve the best results. A further result of this study is the construction of the first manually annotated Italian corpus of social media messages for damage assessment.}, KEYWORDS = {crisis informatics, Damage assessment, Emergency Management, feature selection, social media mining, Social Sensing}, PAGES = {6}, URL = {https://publications.cnr.it/doc/336952}, CONFERENCE_NAME = {Proceedings of the 24th international conference companion on World Wide Web. ACM, 2015}, CONFERENCE_PLACE = {Florence, Italy}, CONFERENCE_DATE = {18/05/2015}, } @INPROCEEDINGS{CUTUGNO_2015_INPROCEEDINGS_CLMC_304735, AUTHOR = {Cutugno, P. and Lucentini, R. and Marconi, L. and Chiarella, D.}, TITLE = {Relaciones sin violencia: lenguaje, estereotipos y sexismo benévolo}, YEAR = {2015}, PAGES = {200-204}, URL = {https://publications.cnr.it/doc/304735}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {9789597174295}, CONFERENCE_NAME = {XIV Simposio Internacional de Comunicación Social: retos y perspectivas}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {19-23 gennaio 2015}, BOOKTITLE = {Comunicación Social: retos y perspectivas Vol. I°}, EDITOR = {Ruiz Miyares, L. and Muñoz Alvarado, A. and Alvarez Silva, M. R. and Pérez Joa, Y. and Jackson Rodríguez, D.}, } @INPROCEEDINGS{DELGRATTA_2015_INPROCEEDINGS_DFMPRBGKQSC_342213, AUTHOR = {Del Gratta, R. and Frontini, F. and Monachini, M. and Pardelli, G. and Russo, I. and Bartolini, R. and Goggi, S. and Khan, F. and Quochi, V. and Soria, C. and Calzolari, N.}, TITLE = {Visualising Italian Language Resources: a Snapshot}, YEAR = {2015}, ABSTRACT = {This paper aims to provide a first snapshot of Italian Language Resources (LRs) and their uses by the community, as documented by the papers presented at two different conferences, LREC2014 and CLiC-it 2014. The data of the former were drawn from the LOD version of the LRE Map, while those of the latter come from manually analyzing the proceedings. The results are presented in the form of visual graphs and confirm the initial hypothesis that Italian LRs require concrete actions to enhance their visibility.}, KEYWORDS = {Italian Language Resources}, PAGES = {100-104}, URL = {https://books.openedition.org/aaccademia/1277?lang=it}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 December 2015}, BOOKTITLE = {Proceedings of the Second Italian Conference on Computational Linguistics CLiC-it 2015}, EDITOR = {Bosco, C. and Tonelli, S. and Zanzotto, F. M.}, } @INPROCEEDINGS{FERRARI_2015_INPROCEEDINGS_FSGD_346045, AUTHOR = {Ferrari, A. and Spagnolo, G. O. and Gnesi, S. and Dell'Orletta, F.}, TITLE = {CMT and FDE: tools to bridge the gap between natural language documents and feature diagrams}, YEAR = {2015}, ABSTRACT = {A business subject who wishes to enter an established technological market is required to accurately analyse the features of the products of the different competitors. Such features are normally accessible through natural language (NL) brochures, or NL Web pages, which describe the products to potential customers. Building a feature model that hierarchically summarises the different features available in competing products can bring relevant benefits in market analysis. A company can easily visualise existing features, and reason about aspects that are not covered by the available solutions. However, designing a feature model starting from publicly available documents of existing products is a time consuming and error-prone task. In this paper, we present two tools, namely Commonality Mining Tool (CMT) and Feature Diagram Editor (FDE), which can jointly support the feature model definition process. CMT allows mining common and variant features from NL descriptions of existing products, by leveraging a natural language processing (NLP) approach based on contrastive analysis, which allows identifying domain-relevant terms from NL documents. FDE takes the commonalities and variabilities extracted by CMT, and renders them in a visual form. Moreover, FDE allows the graphical design and refinement of the final feature model, by means of an intuitive GUI}, KEYWORDS = {Software Product Lines, Variability Mining, Tools}, PAGES = {402-410}, URL = {http://dl.acm.org/citation.cfm?doid=2791060.2791117}, DOI = {10.1145/2791060.2791117}, ISBN = {978-1-4503-3613-0}, CONFERENCE_NAME = {19th International Conference on Software Product Line}, CONFERENCE_PLACE = {Nashville, TN, USA}, CONFERENCE_DATE = {20-24/07/2015}, } @INPROCEEDINGS{FERRO_2015_INPROCEEDINGS_FMP_331183, AUTHOR = {Ferro, M. and Marzi, C. and Pirrelli, V.}, TITLE = {Lexical parsability and morphological structure}, YEAR = {2015}, ABSTRACT = {A classical tenet in the psycholinguistic literature on the mental lexicon is that a parsed affix presents high activation levels (and thus contributes to activation spreading to other words with the same affix), and that such levels are tightly correlated with the affix productivity. In a number of influential papers, it has been suggested that parsability criteria interact with frequency to define morphological productivity in the lexicon. For example, the frequency of a derivative (e.g. government) relative to its base (govern) is shown to be a good predictor for parsability/productivity. The higher the frequency ratio, the more likely the morphological structure to be perceived, and the associated affix to be used productively. The present contribution intends to offer a computational explanatory basis for this correlational evidence, and assess its applicability to the acquisition of complex inflectional paradigms. In those languages, like Italian and German, whose inflection is stem-based rather than word-based, there is often no single paradigmatic form which can act as a base by being properly contained in all other inflected variants. Yet, it seems intuitive to suggest that verbs that are inflected for one paradigm cell only (e.g. neighbouring), are learned earlier and more easily but exhibit lower levels of perceived inflectional structure than verbs with richer paradigms. This appears to be in good accord with experimental evidence of time latencies in lexical decision, which are shown to correlate negatively with token frequency, paradigm size and paradigm entropy. Our simulations, based on Temporal Self-Organizing Maps (TSOMs) allow us to establish an interesting connection between inflectional parsability, frequency-based paradigm structure, and acquisitional constraints on the interaction between the human processor and working memory. Self-organising topological models of the mental lexicon can mimic the spatial and temporal organization of memory structures supporting the processing of symbolic sequences, and can provide an interesting framework for testing integrative accounts of lexical processing/acquisition as the complex result of general-purpose operations on word stimuli (e.g. working memory, long-term storage, sensory-motor mapping, rehearsal, unit integration, unit analysis, executive control, time-series processing), in line with recent acquisitions on the neuro-functional architecture of the perisylvian language network in the left hemisphere of human brain. Simulations of the incremental acquisition of "mini-paradigms" (small islands of morphological contrast encompassing up to three different forms for the same verb support the hypothesis that perception of structure (parsability) and morphological productivity strongly correlate in the inflectional lexica of German and Italian. In particular, by monitoring longitudinal progress in storage and generalisation of differently distributed inflectional paradigms in the two languages, we show that: i) high-frequency forms are stored and accessed significantly earlier than low-frequency forms; ii) deeply entrenched but paradigmatically isolated forms tend to block usage of other forms in the same paradigm; iii) low-frequency evenly distributed (highly entropic) intra-paradigmatic forms are acquired later but are easily extended. Our investigation credits the proposed computational framework with psycholinguistic plausibility, and grounds parsability-based models of morphological productivity on a specific, explicit proposal of lexical architecture. This provides an explanatory basis for both psycholinguistic and linguistic accounts of morphological structure, and offers an intermediate framework for scientific inquiry bridging the gap between linguistic units and functional units in neurosciences. Finally, it makes the interesting suggestion that principles of morpheme-based organisation of the mental lexicon are compatible with a learning strategy requiring memorisation of full forms.}, KEYWORDS = {morphological structure, word processing, token/type frequency}, PAGES = {22-37}, URL = {http://mmm.lis.upatras.gr/index.php/mmm/issue/view/293/showToc}, PUBLISHER = {Università degli Studi di Bologna (Bologna, Italia)}, ISSN = {1826-7491}, CONFERENCE_NAME = {Morphology and Semantics-Ninth Mediterranean Morphology Meeting}, CONFERENCE_PLACE = {Dubrovnik (Croatia)}, CONFERENCE_DATE = {15-18/09/2013}, BOOKTITLE = {Morphology and Semantics}, EDITOR = {Audring, J. and Koutsoukos, N. and Masini, F. and Raffaelli, I.}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FBG_307909, AUTHOR = {Frontini, F. and Boukhaled, M. A. and Ganascia, J.}, TITLE = {Linguistic Pattern Extraction and Analysis for Classic French Plays}, YEAR = {2015}, ABSTRACT = {Great authors of fiction and theatre have the capacity of creating memorable characters that take life and become almost as real as living persons to the readers/audience. The study of characterization, namely of how this is achieved, is a well-researched topic in corpus stylistics: for instance (Mahlberg, 2012) attempts to identify typical lexical patterns for memorable Dickens' characters by extracting those lexical bundles that stand out (namely are overrepresented) in comparison to a general corpus. In other works, authorship attribution methods are applied to the different characters of a play to identify whether the author has been able to provide each of them with a "distinct" voice. For instance (Vogel \& Lynch, 2008) compare individual Shakespeare characters against the whole play or even against all plays of the same author. The purpose of this paper is to propose a methodology for the study characterization of several characters in French plays of the classical period. The tools developed are meant to support textual analysis by: 1) Verifying the degree of characterization of each character with respect to others. 2) Automatically inducing a list of linguistic features that are significant, representative for that character. Preliminary investigations have been conducted on plays by Moliere, cross-comparing four protagonists from four different plays. The proposed methodology relies on sequential data mining for the extraction of linguistic patterns and on correspondence analysis for comparison of patterns frequencies in each character and for the visual representation of such differences.}, KEYWORDS = {computational stylometry, thater, sequential pattern mining}, PAGES = {3}, URL = {http://lipn.univ-paris13.fr/~charnois/conscilaGenres/resumes/frontini.pdf}, CONFERENCE_NAME = {Journée ConSciLa (Confrontations en Sciences du Langage) Grammaire des genres et des styles: quelles approches privilégier ?}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {16/01/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FBG_330648, AUTHOR = {Frontini, F. and Brando, C. and Ganascia, J.}, TITLE = {Semantic Web based Named Entity Linking for Digital Humanities and Heritage Texts}, YEAR = {2015}, ABSTRACT = {This paper proposes a graph based methodology for automatically disambiguating authors' mentions in a corpus of French literary criticism. Candidate referents are identified and evaluated using a graph based named entity linking algorithm, which exploits a knowledge-base built out of two different resources (DBpedia and the BnF linked data). The algorithm expands previous ones applied for word sense disambiguation and entity linking, with good results. Its novelty resides in the fact that it successfully combines a generic knowledge base such as DBpedia with a domain specific one, thus enabling the efficient annotation of minor authors. This will help specialists to follow mentions of the same author in different works of literary criticism, and thus to investigate their literary appreciation over time.}, KEYWORDS = {named-entity linking, linked data, digital humanities}, PAGES = {77-88}, URL = {http://ceur-ws.org/Vol-1364/paper9.pdf}, VOLUME = {Vol-1364}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {SW4SH 2015 Semantic Web for Scientific Heritage 2015}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {June, 1st 2015}, BOOKTITLE = {SW4SH 2015 Semantic Web for Scientific Heritage 2015}, EDITOR = {Zucker, A. and Draelants, I. and Zucker, C. F. and Monnin, A.}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FBG_331797, AUTHOR = {Frontini, F. and Brando, C. and Ganascia, J.}, TITLE = {Domain-adapted named-entity linker using Linked Data}, YEAR = {2015}, ABSTRACT = {We present REDEN, a tool for graph-based Named Entity Linking that allows for the disambiguation of entities using domain-specific Linked Data sources and different configurations (e.g. context size). It takes TEI-annotated texts as input and outputs them enriched with external references (URIs). The possibility of customizing indexes built from various knowledge sources by defining temporal and spatial extents makes REDEN particularly suited to handle domain-specific corpora such as enriched digital editions in the Digital Humanities.}, KEYWORDS = {named-entity disambiguation, evaluation, linked data, digital humanities}, PAGES = {10}, URL = {http://ceur-ws.org/Vol-1386/named_entity.pdf}, VOLUME = {Vol-1386}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {Workshop on NLP Applications: Completing the Puzzle co-located with the 20th International Conference on Applications of Natural Language to Information Systems (NLDB 2015)}, CONFERENCE_PLACE = {Passau, Germany}, CONFERENCE_DATE = {June 17-19, 2015}, BOOKTITLE = {Proceedings of the Workshop on NLP Applications: Completing the Puzzle}, EDITOR = {Izquierdo, R.}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FQM_304304, AUTHOR = {Frontini, F. and Quochi, V. and Monachini, M.}, TITLE = {Generative Lexicon and polysemy: inducing logical alternations}, YEAR = {2015}, ABSTRACT = {The current paper brings together the results of a series of experiments for inducing regular sense alternations, or regular/ logical polysemy, from a computational lexicon based on the Generative Lexicon theory. The results are discussed in light of the potential benefits and uses of the amended algorithm.}, KEYWORDS = {Polysemy, Generative Lexicon, Logical Alternations}, PAGES = {7}, URL = {https://publications.cnr.it/doc/304304}, PUBLISHER = {MAPLEX2015 Multiple Approaches to Lexicon Conference (Yamagata, JPN)}, CONFERENCE_NAME = {MAPLEX2015 Multiple Approaches to Lexicon Conference}, CONFERENCE_PLACE = {Yamagata, Japan}, CONFERENCE_DATE = {February 9-10, 2015}, EDITOR = {Hsieh, S. and Kanzaki, K.}, } @INPROCEEDINGS{GIANNINI_2015_INPROCEEDINGS_GBGP_329374, AUTHOR = {Giannini, S. and Biagioni, S. and Goggi, S. and Pardelli, G.}, TITLE = {Mapping Italian grey communities: what is there beyond the Academy?}, YEAR = {2015}, ABSTRACT = {This research aims at verifying whether - and eventually how much - the grey literature available on the web is actually structured, accessible or even managed by systems dealing with its organization and aiming at its retrieval and storing. The utmost goal is to build up a map of non-academic communities and their mechanisms for managing, presenting and disseminating this type of material. It is a sort of journey among the streams of the Web, which channel meeting minutes, manifests, fliers, pictures, newspapers articles, journalistic services and audio/video material on various topics. These "grey" products - by conveying basic information about social and popular culture - store, represent and spread knowledge.}, KEYWORDS = {Italian Grey Literature A. 1 INTRODUCTORY AND SURVEY}, PAGES = {17-29}, URL = {http://www.textrelease.com/publications/proceedings.html}, VOLUME = {16}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-23-4}, CONFERENCE_NAME = {GL16-Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change}, CONFERENCE_PLACE = {Washington DC, USA (Library of Congress)}, CONFERENCE_DATE = {8-9 December 2014)}, BOOKTITLE = {Grey Literature Lobby: Engines and Requesters for Change}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GOGGI_2015_INPROCEEDINGS_GMFBPDBM_329370, AUTHOR = {Goggi, S. and Monachini, M. and Frontini, F. and Bartolini, R. and Pardelli, G. and De Mattei, M. and Bustaffa, F. and Manzella, G.}, TITLE = {Marine Planning and Service Platform (MAPS): An Advanced Research Engine for Grey Literature in Marine Science}, YEAR = {2015}, ABSTRACT = {The MAPS (Marine Planning and Service Platform) project is a development of the Marine project (Ricerca Industriale e Sviluppo Sperimentale Regione Liguria 2007-2013) aiming at building a computer platform for supporting a Marine Information and Knowledge System, as part of the data management activities. One of the main objective of the project is to develop a repository that should gather, classify and structure marine scientific literature and data thus guaranteeing their accessibility to researchers and institutions by means of standard protocols. We will present the scenario of the Operative Oceanography together with the technologies used to develop an advanced search engine which aims at providing rapid and efficient access to a Digital Library of oceanographic data. The case-study is also highlighting how the retrieval of grey literature from this specific marine community could be reproduced for similar communities as well, thus revealing the great impact that the processing, re-use as well as application of grey data have on societal needs/problems and their answers.}, KEYWORDS = {Marine Science Search Engine Source Data Oceanography}, PAGES = {108-114}, URL = {http://www.textrelease.com/gl16program.html}, VOLUME = {16}, PUBLISHER = {TextRelease (Amsterdam, NLD)}, ISBN = {978-90-77484-23-4}, CONFERENCE_NAME = {Sixteenth International Conference on Grey Literature Grey Literature Lobby: Engines and Requesters for Change}, CONFERENCE_PLACE = {Library of Congress Washington D. C., USA}, CONFERENCE_DATE = {December 8-9 2014}, BOOKTITLE = {Grey Literature Lobby: Engines and Requesters for Change}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GOGGI_2015_INPROCEEDINGS_GPSGB_318501, AUTHOR = {Goggi, S. and Pardelli, G. and Sassi, M. and Giannini, S. and Biagioni, S.}, TITLE = {A terminological survey on the titles of the Seventh Framework Programme (FP7)}, YEAR = {2015}, ABSTRACT = {This paper focuses on the automatic extraction of domain-specific knowledge from the European Commission projects of the 7th Framework Programme, hereinafter referred as FP7. The study is divided in three parts: the first part introduces the work starting from the building up of a corpus containing the titles of European Projects of the whole FP7 in order to obtain a relevant terminological sample for the different domains; the second describes software and methods while the third part focuses on the evaluation of results. Finally, we conclude by suggesting possible directions for further development of a comparison between terminological extraction from FP7 and FP5/FP6.}, KEYWORDS = {7th Framework Programme (FP7), Natural Language Processing, Terminology, Knowledge extraction, Grey Literature, I. 2. 7 Natural Language Processing. Text analysis, I. 2. 1 Applications and Expert Systems. Natural language interfaces}, PAGES = {223-227}, URL = {https://publications.cnr.it/doc/318501}, ISBN = {978-959-7174-28-8}, CONFERENCE_NAME = {Fourteenth International Symposium on Comunicación Social: retos y perspectivas}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {19-23 de enero 2015}, EDITOR = {Ruiz Miyares, L. and Álvarez Silva, M. R. and Muñoz Alvarado, A.}, } @INPROCEEDINGS{KHAN_2015_INPROCEEDINGS_KF_329646, AUTHOR = {Khan, F. and Frontini, F.}, TITLE = {Using Ontologies to Model Polysemy in Lexical Resources}, YEAR = {2015}, ABSTRACT = {In this article we look at how the use of ontologies can assist in analysing polysemy in natural languages. We develop a model, the Lexical-Sense-Ontology model (LSO), to represent the interaction between a lexicon and ontology, based on lemon. We use the LSO model to show how default rules can be used to represent semi-productivity in polysemy as well as discussing the kinds of ontological information that are useful for studying polysemy.}, KEYWORDS = {Polysemy, Ontology, Default Logic}, URL = {http://www.aclweb.org/anthology/W/W15/W15-0404.pdf}, CONFERENCE_NAME = {Workshop on Language and Ontologies}, CONFERENCE_PLACE = {London}, CONFERENCE_DATE = {14/04/2015}, BOOKTITLE = {Proceedings of the Workshop on Language and Ontologies}, } @INPROCEEDINGS{MARCONI_2015_INPROCEEDINGS_MCLCMM_304763, AUTHOR = {Marconi, L. and Cutugno, P. and Lucentini, R. and Chiarella, D. and Morgavi, G. and Morando, M.}, TITLE = {La tecnología como sostén de la organización de datos lingüísticos concernientes a las plantas medicinales}, YEAR = {2015}, PAGES = {605-609}, URL = {https://publications.cnr.it/doc/304763}, PUBLISHER = {Centro de Lingüística Aplicada, Ministero de Ciencia, Tecnología y Medio Ambiente (Santiago de Cuba, CUB)}, ISBN = {9789597174301}, CONFERENCE_NAME = {XIV Simposio Internacional de Comunicación Social: retos y perspectivas}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {19-23 gennaio 2015}, BOOKTITLE = {Comunicación Social: retos y perspectivas Vol. II°}, EDITOR = {Ruiz Miyares, L. and Muñoz Alvarado, A. and Alvarez Silva, M. R. and Pérez Joa, Y. and Jackson Rodríguez, D.}, } @INPROCEEDINGS{MARZI_2015_INPROCEEDINGS_MFP_329352, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Lexical emergentism and the "frequency-by-regularity" interaction}, YEAR = {2015}, ABSTRACT = {In spite of considerable converging evidence of the role of inflectional paradigms in word acquisition and processing, little efforts have been put so far into providing detailed, algorithmic models of the interaction between lexical token frequency, paradigm frequency, paradigm regularity. We propose a neurocomputational account of this interaction, and discuss some theoretical implications of preliminary experimental results.}, KEYWORDS = {morphological strucutre, frequency distribution, temporal self-orgabnising maps}, PAGES = {37-41}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84927156830\&origin=inward}, VOLUME = {1347}, PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, ISSN = {1613-0073}, CONFERENCE_NAME = {NetWordS Final Conference on Word Knowledge and Word Usage: Representations and Processes in the Mental Lexicon}, CONFERENCE_PLACE = {Pisa (Italy)}, CONFERENCE_DATE = {30-31/03 01/04 2015}, BOOKTITLE = {Word Knowledge and Word Usage 2015}, EDITOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, } @INPROCEEDINGS{NAHLI_2015_INPROCEEDINGS_NM_342436, AUTHOR = {Nahli, O. and Marchi, S.}, TITLE = {Improved Written Arabic Word Parsing through Orthographic, Syntactic and Semantic constraints}, YEAR = {2015}, ABSTRACT = {The script-based and morphological characteristics of the Arabic language increase considerably the number of alternative analyses output by any morphological parser that does not use orthographic, syntactic and semantic constraints. In order to reduce time-wasting and error-prone proliferation of multiple outputs to be filtered in a post-processing phase, we have tried to optimize word processing by providing the morphological parser with multiple levels of information. We have operated at three such levels: orthography, morpho-syntax and semantics.}, KEYWORDS = {Arabic Language, Arabic NLP, Orthography, Morpho-syntax, Semantics}, PAGES = {210-214}, URL = {http://www.aaccademia.it/elenco-libri?aaref=CLIC_2015}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {9788899200626}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 Dicembre 2015}, } @INPROCEEDINGS{PIRRELLI_2015_INPROCEEDINGS_PNBDM_333414, AUTHOR = {Pirrelli, V. and Nahli, O. and Boschetti, F. and Del Gratta, R. and Marzi, C.}, TITLE = {Computational Linguistics and Language Physiology: Insights from Arabic NLP and Cooperative Editing}, YEAR = {2015}, ABSTRACT = {Computer processing of written Arabic raises a number of challenges to traditional parsing architectures on many levels of linguistic analysis. In this contribution, we review some of these core issues and the demands they make, to suggest different strategies to successfully tackle them. In the end, we assess these issues in connection with the behaviour of neuro-biologically inspired lexical architectures known as Temporal Self-Organising Maps. We show that, far from being language-specific problems, issues in Arabic processing can shed light on some fundamental characteristics of the human language processor, such as structure-based lexical recoding, concurrent, competitive activation of output candidates and dynamic selection of optimal solutions.}, KEYWORDS = {Non-concatenative morphology, Optical Character Recognition, WordNet, Temporal Self-organising Maps, Mental Lexicon, Language neuro-physiology}, PAGES = {1-8}, URL = {http://dl.acm.org/citation.cfm?id=2802612}, DOI = {10.1145/2802612.2802637}, ISBN = {978-1-4503-3295-8}, CONFERENCE_NAME = {Third AIUCD Annual Conference-Humanities and Their Methods in the Digital Ecosystem}, CONFERENCE_PLACE = {Bologna (IT)}, CONFERENCE_DATE = {18-19/09/2014}, BOOKTITLE = {Third AIUCD Annual Conference-Humanities and Their Methods in the Digital Ecosystem}, EDITOR = {Tomasi, F. and Del Turco, R. R. and Tammaro, A. M.}, } @INPROCEEDINGS{RICHTER_2015_INPROCEEDINGS_RCDV_357144, AUTHOR = {Richter, S. and Cimino, A. and Dell'Orletta, F. and Venturi, G.}, TITLE = {Tracking the Evolution of Written Language Competence: an NLP-based Approach}, YEAR = {2015}, ABSTRACT = {In this paper, we present an NLP-based innovative approach for tracking the evolution of written language competence relying on different sets of linguistic features that predict text quality. This approach was tested on a corpus essays written by Italian L1 learners of the first and second year of the lower secondary school.}, KEYWORDS = {Evolution of Written Language Competence, multi-level linguistic analysis}, PAGES = {236-240}, URL = {http://www.italianlp.it/wp-content/uploads/2016/03/tracking-language-competence.pdf}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {978-88-99200-62-6}, CONFERENCE_NAME = {2nd Italian Conference on Computational Linguistics (CLiC-it)}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 dicembre 2015}, } @INPROCEEDINGS{RUSSO_2015_INPROCEEDINGS_RCM_332590, AUTHOR = {Russo, I. and Caselli, T. and Monachini, M.}, TITLE = {Extracting and Visualising Biographical Events from Wikipedia}, YEAR = {2015}, ABSTRACT = {This work presents a proposal for the development of a natural language processing module for event and temporal analysis of biographies as available in Wikipedia. At the current level of development, we restricted the extraction to temporally anchored events as they represent salient information which can be further used to extract additional events and facilitate their chronological ordering and the representation of a person's timeline. Visualising data about basic facts concerning groups of people helps with historical reasoning and enables comparisons among them.}, KEYWORDS = {mining biographies for structured information, visualising biographical data, temporal information}, PAGES = {111-115}, URL = {http://ceur-ws.org/Vol-1399/paper17.pdf}, VOLUME = {Vol-1399}, CONFERENCE_NAME = {BD2015 Biographical Data in a Digital World 2015}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {April 9, 2015}, BOOKTITLE = {BD2015 Biographical Data in a Digital World 2015}, EDITOR = {Braake, S. T. and Fokkens, A. and Sluijter, R. and Declerck, T. and Wandl Vogt, E.}, } @INPROCEEDINGS{RUSSO_2015_INPROCEEDINGS_RCS_331215, AUTHOR = {Russo, I. and Caselli, T. and Strapparava, C.}, TITLE = {SemEval-2015 Task 9: CLIPEval Implicit Polarity of Events}, YEAR = {2015}, ABSTRACT = {Sentiment analysis tends to focus on the po- larity of words, combining their values to de- tect which portion of a text is opinionated. CLIPEval wants to promote a more holistic approach, looking at psychological researches that frame the connotations of words as the emotional values activated by them. The implicit polarity of events is just one aspect of connotative meaning and we address it with a task that is based on a dataset of sentences annotated as instantiations of pleasant and un- pleasant events previously collected in psy- chological research as the ones on which human judgments converge.}, KEYWORDS = {sentiment analysis}, PAGES = {443-450}, URL = {http://alt.qcri.org/semeval2015/cdrom/pdf/SemEval077.pdf}, ISBN = {978-1-941643-40-2}, CONFERENCE_NAME = {Proceedings of SemEval-2015}, CONFERENCE_PLACE = {Denver, Colorado, USA}, CONFERENCE_DATE = {giugno 4-5, 2015}, } @INPROCEEDINGS{SORIA_2015_INPROCEEDINGS_S_332517, AUTHOR = {Soria, C.}, TITLE = {Towards a notion of "Digital Language Diversity"}, YEAR = {2015}, ABSTRACT = {This paper introduces the concept of digital language diversity and advocates for its increase in order to foster the digital vitality of languages, and secure their overall vitality.}, KEYWORDS = {digital language diversity, NLP, less-resourced languages, regional languages, minority languages, digital rights}, PAGES = {111-125}, URL = {https://publications.cnr.it/doc/332517}, CONFERENCE_NAME = {3rd International Conference on Linguistic and Culturaol Diversity in Cyberspace}, CONFERENCE_PLACE = {Yakutsk, Russian Federation}, CONFERENCE_DATE = {30/06/2014-03/07/2014}, BOOKTITLE = {Linguistic and Cultural Diversity in Cyberspace-Proceedings of the 3rd International Conference}, EDITOR = {Kuzmin, E. and Parshakova, A. and Ignatova, D.}, } @INPROCEEDINGS{VENTURI_2015_INPROCEEDINGS_VBDM_340387, AUTHOR = {Venturi, G. and Bellandi, T. and Dell'Orletta, F. and Montemagni, S.}, TITLE = {NLP-Based Readability Assessment of Health-Related Texts: a Case Study on Italian Informed Consent Forms}, YEAR = {2015}, ABSTRACT = {The paper illustrates the results of a case study aimed at investigating and enhancing the accessibility of Italian health-related documents by relying on advanced NLP techniques, with particular attention to informed consent forms. Results achieved show that the features automatically extracted from the linguistically annotated text and ranging across different levels of linguistic description have a high discriminative power in order to guarantee a reliable readability assessment.}, KEYWORDS = {Readability assessment, health-related information}, PAGES = {131-141}, URL = {http://www.aclweb.org/anthology/W15-2618}, ISBN = {978-1-941643-32-7}, CONFERENCE_NAME = {Sixth International Workshop on Health Text Mining and Information Analysis (Louhi)}, CONFERENCE_PLACE = {Lisbona}, CONFERENCE_DATE = {17 settembre 2015}, } @INPROCEEDINGS{ALBANESI_2015_INPROCEEDINGS_ABBG_340309, AUTHOR = {Albanesi, A. and Bellandi, A. and Benotto, G. and Giovannetti, E.}, TITLE = {Translation, Annotation and Knowledge Modelling of the Babylonian Talmud: the Traduco System}, YEAR = {2015}, ABSTRACT = {In this work, we are going to present the Traduco System, a collaborative web-based application for the translation of the Babylonian Talmud (BT) into Italian. The System has been designed around a Computer-Assisted Translation (CAT) component, constituting its core. However, Traduco is not limited to assist the translation process and to provide printing functionalities. In fact, it allows linguistic and semantic annotations and advanced searches, paving the way to the construction of a talmudic knowledge base. In order to achieve these results, the Traduco development process abided by a model that took into account aspects of Natural Language Processing and Knowledge Engineering. The component based architectural structure was implemented using the object oriented Java 2 Enterprise Edition framework.}, KEYWORDS = {Computer-Assisted Translation, Interpretation, Semantic Annotation, Babylonian Talmud}, URL = {https://dh-abstracts.library.virginia.edu/works/2399}, CONFERENCE_NAME = {Digital Humanities 2015}, CONFERENCE_PLACE = {Sydney}, CONFERENCE_DATE = {29/06-03/07/2015}, } @INPROCEEDINGS{BOSCHETTI_2015_INPROCEEDINGS_BDDMDN_295474, AUTHOR = {Boschetti, F. and Del Gratta, R. and Del Grosso, A. and Monachini, M. and Diakoff, H. and Nahli, O.}, TITLE = {Collaborative Philology on the way to Web Services: the case of CoPhiWordnet}, YEAR = {2015}, ABSTRACT = {Starting from previous initiatives of the CoPhiLab, we show how they can be reinterpreted as Web Services, especially when they become part of a wider scenario: Web Services are used to make connections between lexicons, semantic resources and a fine grained text management. Linked Open Data is chosen to be the paradigm used to link the dierent resources, but also as the modality of data presentation.}, KEYWORDS = {Collaborative Philology, Web Services, Linked Open Data, Text Services, Text Interpretation}, URL = {http://langrid.org/wlsi2015/program.html}, CONFERENCE_NAME = {The Second International Workshop on Worldwide Language Service Infrastructure, WLSI 2015}, CONFERENCE_PLACE = {Kyoto}, CONFERENCE_DATE = {22-23rd January 2015}, } @INPROCEEDINGS{DEFELICE_2015_INPROCEEDINGS_D_300634, AUTHOR = {De Felice, I.}, TITLE = {GraDes: a corpus of grasp descriptions}, YEAR = {2015}, URL = {https://publications.cnr.it/doc/300634}, CONFERENCE_NAME = {AISV 2015}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {28-30/01/2014}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_F_315607, AUTHOR = {Frontini, F.}, TITLE = {What makes them different: the extraction of distinctive linguistic patterns for the protagonists of Molière's plays}, YEAR = {2015}, ABSTRACT = {Quantitative approaches to the study of style in literature are far from a modern novelty. They have however recently gained more and more popularity, not only among computer scientists and corpus linguistics, but also among some influential literary critics. The present panorama of quantitative techniques is very rich, but often confusing, with a plethora of denominations and methodologies often difficult to reconcile; computer scientists classify their work as stylometry or computational stylistics, while linguists may use the label corpus stylistics, and finally critics like Franco Moretti will talk about macro-analysis and distant reading. This talk will try first to identify the differences between these trends, distinguishing between corpus based and corpus driven approaches on the methodological side (Quiniou et al 2012), and (following Ramsey 2011) between experimental and hermeneutical approaches. Finally we will present ongoing work conducted at Labex OBVIL on syntactic pattern extraction from theatrical characters. The proposed approach, using correspondence analysis to extract distinctive traits for each character, is imagined rather as an hermeneutical tool, in the sense that it does not seek to demonstrate that two different characters have been endowed with significantly different stylistic traits by the playwright, but it does enable the visualisation of their relative distances and the extraction of those elements that make them distinct.}, URL = {https://publications.cnr.it/doc/315607}, CONFERENCE_NAME = {Cycle des séminaires ILES LIMSI}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {03/02/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_F_329647, AUTHOR = {Frontini, F.}, TITLE = {Analyse et extraction des motifs syntaxiques dans la prose de Robert Challe et de ses apocryphes}, YEAR = {2015}, ABSTRACT = {Cette contribution presente une extraction et une analyse des motifs syntaxiques dans la prose de Robert Challe et de ses apocryphes. En particulier nous analysons les différence dans la syntaxe des contes originaux des Illustres Françaises et celle des contes apocryphes.}, KEYWORDS = {Robert Challe, authorship attribution, stilistica computazionale}, URL = {http://obvil.paris-sorbonne.fr/sites/default/files/projets/analyse_motifs_syntaxiques_if_et_apocryphes.pdf}, CONFERENCE_NAME = {Robert Challe: approches numériques des questions d'auctorialité}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {28/03/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_F_332668, AUTHOR = {Frontini, F.}, TITLE = {Mining for characterising patterns in literature using correspondence analysis: an experiment on French novels}, YEAR = {2015}, ABSTRACT = {The talk presents and describes a bottom up methodology for the detection of stylistic traits in the syntax of literary texts. The extraction of syntactic patterns is performed blindly by a sequential pattern mining algorithm, while the identification of significant and interesting features is performed later by using correspondence analysis and filtering for the most contributive patterns.}, KEYWORDS = {computational stylistics, French}, URL = {https://publications.cnr.it/doc/332668}, CONFERENCE_NAME = {Göttingen Dialog in Digital Humanities}, CONFERENCE_PLACE = {Göttingen}, CONFERENCE_DATE = {14/07/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_F_336421, AUTHOR = {Frontini, F.}, TITLE = {Trattamento automatico del linguaggio per le Digital Humanities. Riconoscimento e disambiguazione di menzioni di autori in testi di critica letteraria}, YEAR = {2015}, ABSTRACT = {L'intervento scaturisce da una collaborazione tra ILC-CNR e il Labex OBVIL di Parigi. Lo scopo del progetto è quello di adattare ed estendere algoritmi di riconoscimento, classificazione e disambiguazione di entità nominate (in particolare menzioni di autori) nel "Corpus Critique", un insieme di testi di critica letteraria francese che il Labex OBVIL sta pubblicando in edizione digitale (formato TEI). Tali algoritmi si basano su approcci TAL supervisionati e non supervisionati e sfruttano massicciamente le basi di conoscenza, sia generiche (DBpedia) che di dominio, disponibili online sotto forma di linked data; lo scopo di tali lavori è di produrre risorse testuali annotate per facilitare la ricerca nell'ambito della storia della critica letteraria e della storia delle idee in generale. Durante il seminario verranno introdotti i formati e le risorse utilizzate, i criteri e le problematiche di annotazione emersi, e gli algoritmi riconoscimento e disambiguazione di entità nominate sviluppati. Più in generale si cercherà di mostrare con alcuni casi di utilizzo quali siano i vantaggi di arricchire risorse testuali con questo livello di annotazione, nel più ampio contesto delle convergenze tra digital humanities e trattamento automatico del linguaggio. Link http://obvil.paris-sorbonne.fr/ https://github.com/cvbrandoe/REDEN/blob/master/README.md}, KEYWORDS = {Named-entity disambiguation Centrality Linked data Data fusion Digital humanities}, URL = {https://publications.cnr.it/doc/336421}, CONFERENCE_NAME = {Seminario di Cultura Digitale}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {04/11/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FB_342185, AUTHOR = {Frontini, F. and Bénard, E.}, TITLE = {The Syntax of Stage. Studying Linguistic Patterns in Molière}, YEAR = {2015}, ABSTRACT = {Theatrical dialogue is a very peculiar type of communication, namely a written text that aims to mimic orality. Great playwrights use dialogue to create iconic human types, that actors then bring to life. Characterisation, comical effects and other plot devices are often achieved through the use of specific linguistic patterns. For this reason theatrical dialogue is an interesting test bed for computer-aided literary analysis and stylometric tools. In this talk we shall analyse the application of advanced pattern extraction techniques to the study of Molière's dialogue and characters, where by "pattern" we mean sequences of lexical elements and parts of speech. In particular we shall see how different types of extractions may provide experts with different views on the texts and target different aspects of stylistic choice.}, KEYWORDS = {Computational stylistics, syntactic patterns, Molière}, URL = {http://www.uni-goettingen.de/de/525494.html}, CONFERENCE_NAME = {Göttinger philologisches Forum}, CONFERENCE_PLACE = {Göttingen, Germany}, CONFERENCE_DATE = {03/12/2015}, } @INPROCEEDINGS{FRONTINI_2015_INPROCEEDINGS_FBG_332819, AUTHOR = {Frontini, F. and Boukhaled, M. A. and Ganascia, J. G.}, TITLE = {Moliere's Raisonneurs: a quantitative study of distinctive linguistic patterns}, YEAR = {2015}, KEYWORDS = {Computational Stylistics, Correspondence analysis, Corpus linguistics, Molière}, PAGES = {114-117}, URL = {http://ucrel.lancs.ac.uk/cl2015/doc/CL2015-AbstractBook.pdf}, CONFERENCE_NAME = {Corpus Linguistics 2015}, CONFERENCE_PLACE = {Lancaster}, CONFERENCE_DATE = {21-24/07/2015}, BOOKTITLE = {Corpus Linguistics 2015-Abstract Book}, EDITOR = {Formato, F. and Hardie, A.}, } @INPROCEEDINGS{GIANNINI_2015_INPROCEEDINGS_GBGP_342303, AUTHOR = {Giannini, S. and Biagioni, S. and Goggi, S. and Pardelli, G.}, TITLE = {Grey Literature citations in the age of Digital Repositories and Open Access}, YEAR = {2015}, ABSTRACT = {The work measures grey citations in the years 2012, 2013 and 2014 and then describes the features of GL documents cited in different areas of knowledge: Computational Linguistics, Computer Science and Engineering. With the aim to survey a wide and varied range of resources, we selected a sample data based on the bibliographic references of articles contained in 4 journals - all indexed by the ISI Web of Science and with an Impact Factor over the last three years - and two proceedings of international conferences held in 2012 and 2014.}, KEYWORDS = {Grey Literature, Digital Repositories, Open Access}, PAGES = {109-110}, URL = {http://greyguide.isti.cnr.it/attachments/category/27/GL17_Program_Book.pdf}, VOLUME = {17}, ISBN = {978-90-77484-26-5}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature}, CONFERENCE_PLACE = {Amsterdam, NL}, CONFERENCE_DATE = {December 1-2}, BOOKTITLE = {GL17 Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{GOGGI_2015_INPROCEEDINGS_GPBFMMDB_342221, AUTHOR = {Goggi, S. and Pardelli, G. and Bartolini, R. and Frontini, F. and Monachini, M. and Manzella, G. and De Mattei, M. and Bustaffa, F.}, TITLE = {A semantic engine for grey literature retrieval in the oceanography domain}, YEAR = {2015}, ABSTRACT = {Here we present the final results of MAPS (Marine Planning and Service Platform), an environment designed for gathering, classifying, managing and accessing marine scientific literature and data, making it available for search to Operative Oceanography researchers of various institutions by means of standard protocols. In previous publications the general architecture of the system as well as the set of metadata (Common Data Index) used to describe the documents were presented [3]; it was shown how individual oceanographic data-sets could be indexed within the MAPS library by types of measure, measurement tools, geographic areas, and also linked to specific textual documentation. Documentation is described using the current international standards: Title, Authors, Publisher, Language, Date of publication, Body/Institution, Abstract, etc.; serial publications are described in terms of ISSN, while books are assigned ISBN; content of various types on electronic networks is described by means of doi and url. Each description is linked to the document. Thanks to this, the MAPS library already enables researchers to go from structured oceanographic data to documents describing it. But this was not enough: documents may contain important information that has not been encoded in the metadata. Thus an advanced Search Engine was put in place that uses semantic-conceptual technologies in order to extract key concepts from unstructured text such as technical documents (reports and grey literature) and scientific papers and to make them indexable and searchable by the end user in the same way as the structured data (such as oceanographic observations and metadata) is. More specifically once a document is uploaded in the MAPS library, key domain concepts in documents are extracted via a natural language processing pipeline and used as additional information for its indexing. The key term identification algorithm is based on marine concepts that were pre-defined in a domain ontology, but crucially it also allows for the discovery of new related concepts. So for instance starting from the domain term salinity, related terms such as sea salinity and average sea salinity will also be identified as key terms and used for indexing and searching documents. A hybrid search system is then put in place, where users can search the library by metadata or by free text queries. In the latter case, the NLP pipeline performs an analysis of the text of the query, and when key concepts are matched, the relevant documents are presented. The results may be later refined by using other structured information (e.g. date of publication, area, ...). Currently a running system has been put in place, with data from satellites, buoys and sea stations; such data is documented and searchable by its relevant metadata and documentation. Results of quantitative evaluation in terms of information retrieval measures will be presented in the poster; more specifically, given an evaluation set defined by domain experts and composed of pre-defined queries together with documents that answer such queries, it will be shown how the system is highly accurate in retrieving the correct documents from the library. Though this work focuses on oceanography, its results may be easily extended to other domains; more generally, the possibility of enhancing the visibility and accessibility of grey literature via its connection to the data it describes and to an advanced full text indexing are of great relevance for the topic of this conference.}, KEYWORDS = {Information Extraction, Search Engine, Oceanography}, PAGES = {76-77}, URL = {https://publications.cnr.it/doc/342221}, VOLUME = {17}, ISBN = {978-90-77484-26-5}, CONFERENCE_NAME = {Seventeenth International Conference on Grey Literature. A New Wave of Textual and Non-Textual Grey Literature}, CONFERENCE_PLACE = {Amsterdam}, CONFERENCE_DATE = {December 1-2}, BOOKTITLE = {GL17 Program Book}, EDITOR = {Farace, D. and Frantzen, J.}, } @INPROCEEDINGS{MONTEMAGNI_2015_INPROCEEDINGS_MWN_367807, AUTHOR = {Montemagni, S. and Wieling, M. and Nerbonne, J.}, TITLE = {The contribution of dialectometry to the study of the dialects of Italy. A case study on Tuscan}, YEAR = {2015}, ABSTRACT = {We will illustrate the extent to which the recent advances of dialectometry can help to gain insight into the nature of linguistic variation - both synchronically and diachronically - in the study of the dialects of Tuscany, which have a special status in the complex puzzle of Italian dialects. This will be done by discussing the results achieved in a case study carried out over the last five years based on the corpus of dialectal data of the Atlante Lessicale Toscano ('Lexical Atlas of Tuscany', henceforth ALT, Giacomelli et al., 2000), a regional linguistic atlas focusing on dialec tal variation throughout Tuscany, a region where both Tuscan and non-Tuscan dialects are spoken.}, KEYWORDS = {dialectometry, lexical atlas, italian dialects, Tuscany, Atlante Lessicale Toscano}, URL = {http://media.leidenuniv.nl/legacy/montemagni-wieling-nerbonne.pdf}, CONFERENCE_NAME = {Italian Dialect Meeting 2015 \& CIDSM X}, CONFERENCE_PLACE = {Leiden University-Centre for Linguistics}, CONFERENCE_DATE = {23 June 2015}, } @INPROCEEDINGS{SORIA_2015_INPROCEEDINGS_S_332521, AUTHOR = {Soria, C.}, TITLE = {Towards an Alliance for Digital Language Diversity: Vision, Goals, and Challenges}, YEAR = {2015}, ABSTRACT = {In order to foster the world's digital language diversity, and to ensure equal digital opportunities for languages, we encourage the creation of an Alliance for Digital Language Diversity, i.e. a network of different stakeholders involved in the creation and deployment of data. The Alliance needs - and presupposed - educational activities aimed at building the necessary digital skills and creating the psychological self-confidence necessary for speakers to produce data using their mother tongue.}, KEYWORDS = {digital language diversity, less-resourced languages, minority languages, digital rights, data production}, URL = {https://publications.cnr.it/doc/332521}, CONFERENCE_NAME = {Ugra Global Expert Meeting on Multilingualism in Cyberspace}, CONFERENCE_PLACE = {Khany-Maniysk, Russia}, CONFERENCE_DATE = {4-9/07/2015}, } @INPROCEEDINGS{VENTURI_2015_INPROCEEDINGS_VRMSTFB_340388, AUTHOR = {Venturi, G. and Rinnone, S. and Montemagni, S. and Sassi, M. and Terranova, G. and Flore, E. and Bellandi, T.}, TITLE = {Language technologies for automatic readability assessment of health-related Information: a preliminary investigation into the informed consent forms used in a regional health service}, YEAR = {2015}, ABSTRACT = {Rationale: Within an information society, where everyone should be able to access all available information, improving access to written language is becoming more and more a central issue. This is the case for health-related information which should be accessible to all members of the society, including people who have reading difficulties as a result of a low education level or of language-based learning disabilities or because the language of the text is not their native language. Moreover, the breakdown of doctor-patient communication is one of the most frequent cause of adverse events. Research questions: We conducted a preliminary investigation to assess the readability of a corpus of informed consent forms used before a clinical procedure in the hospitals of a Regional Healthcare Service. Secondary goals include the comparison of readability across specialties and healthcare trusts. Methods: Providing complex scientific information in a way that is comprehensible to a lay person is a challenge that nowadays can be addressed by resorting to advanced Natural Language Processing (NLP) techniques, which make it possible to monitor the linguistic complexity of texts at the syntactic and lexical levels and to support their simplification, whenever needed. The study has been carried out by combining NLP-enabled feature extraction and state-of-the-art machine learning algorithms. To this end we used READ-IT, the first NLP-based readability assessment tool for Italian. Results: We analysed 584 documents, covering 29 specialties, for a total of 607.790 word tokens, currently used at the 36 public hospitals in Tuscany. Although the readability level of all documents in the corpus is low, both at the lexical and syntactic level, significant differences can be observed between specialties and healthcare trust releasing the forms. With the readability level ranging between 0 (easy-to-read) and 100 (difficult-to-read), it resulted that the pediatric informed consent documents are the most easy-to-read forms (with an average score of 75) while the most difficult-to read documents are documents of the surgical area (whose average score is 80) (standard deviation 2). Discussion: The state of the art resulting from this preliminary study shows that NLP-based readability assessment tools can help to measure the linguistic complexity of informed consent forms and guide the editor to identify linguistically complex passages that need to be simplified, either syntactically or lexically. The use of an assessment tool designed for the general language is the main limitation of the study and should be addressed through the customization of the tool to assess the readability of the healthcare jargon. A further step of the research consider also the design of a guidance to prepare readable informed consent forms.}, KEYWORDS = {Readability assessment, health-related information}, URL = {http://static1.squarespace.com/static/561c0d01e4b0b5ad2e65cc48/t/561d44dfe4b089431662d174/1444758751213/LibrettoProgramma.pdf}, CONFERENCE_NAME = {ISCOME 2015 Conference: "The Golden Bridge: Communication and Patient Safety"}, CONFERENCE_PLACE = {Montecatini Terme}, CONFERENCE_DATE = {15-16 giugno 2015}, } @TECHREPORT{CUCURULLO_2015_TECHREPORT_C_353217, AUTHOR = {Cucurullo, S.}, TITLE = {Sviluppo di funzioni software per il recupero di testi dell'Archivio Testuale dell'ILC e conversione in un formato di rappresentazione XML/TEI}, YEAR = {2015}, ABSTRACT = {Il presente rapporto documenta le attività svolte nell'ambito della Convenzione Operativa relativa allo sviluppo di funzioni software per il recupero di testi dell'Archivio Testuale dell'ILC e la conversione in un formato di rappresentazione XML/TEI, stipulata all'interno dell'Accordo di Collaborazione Scientifica ILC-CNR - Accademia della Crusca. In particolare, il rapporto si focalizza sui seguenti punti oggetto della Convenzione Operativa: 1. definizione di un formato di rappresentazione XML/TEI che tenga conto da un lato della tipologia di annotazioni presenti nei testi di partenza e dall'altro delle analisi ed elaborazioni a cui i testi convertiti dovranno essere sottoposti; 2. sviluppo di procedure di conversione dal formato dei "Periodici Milanesi" al formato XML/TEI e verifica dei risultati mediante parsing XML. Il Rapporto ripercorre le diverse fasi del lavoro, con particolare attenzione all'analisi dell'archivio testuale di partenza e ai risultati raggiunti, per arrivare a una discussione delle questioni che rimangono al momento aperte e degli sviluppi che possono prospettarsi per tali attività.}, KEYWORDS = {banca-dati testuale, Periodici Milanesi}, PAGES = {43}, URL = {https://publications.cnr.it/doc/353217}, } @TECHREPORT{CUCURULLO_2015_TECHREPORT_C_353218, AUTHOR = {Cucurullo, S.}, TITLE = {Sviluppo di funzioni software per il recupero di testi dell'Archivio Testuale dell' ILC e conversione in un formato di rappresentazione XML/TEI Fase 2}, YEAR = {2015}, ABSTRACT = {Il presente rapporto documenta le attività svolte nell'ambito della Convenzione Operativa relativa allo sviluppo di funzioni software per il recupero di testi dell'Archivio Testuale dell'ILC e la conversione in un formato di rappresentazione XML/TEI, stipulata all'interno dell'Accordo di Collaborazione Scientifica ILC-CNR - Accademia della Crusca. In particolare, questo rapporto si focalizza sul seguente oggetto della Convenzione Operativa: "sviluppo di procedure di conversione dal formato DBT al formato XML/TEI, secondo le indicazioni contenute nel Report relativo alla Fase 1 e verifica dei risultati mediante parsing XML costituito dal corpus dell'800 e '900 di testi estratto dal Patrimonio Testuale ILC, la cui composizione è stata concordata con l'Accademia della Crusca". Buona parte delle soluzioni adottate per quella tipologia di testi è stata scelta per essere utilizzata anche in testi non lemmatizzati, come nel caso di opere di autori italiani dell'Ottocento e del Novecento. La strutturazione generale del documento XML TEI in header e body e la definizione dei principali TAG utilizzati è condivisa sia dai testi di questo Corpus che da quelli lemmatizzati dei Periodici Milanesi, da cui siamo partiti perché presentavano una maggiore casistica di codifiche e contemporaneamente un formato di origine più lontano nel tempo. Si tratta infatti di formati e supporti di memorizzazione che precedono l'era del Personal Computer e che hanno già subito la trasformazione da EBCDIC ad ASCII.}, KEYWORDS = {Archivi Testuali}, PAGES = {21}, URL = {https://publications.cnr.it/doc/353218}, } @TECHREPORT{MARZI_2015_TECHREPORT_M_330235, AUTHOR = {Marzi, C.}, TITLE = {Word knowledge and word usage-Representations and processes in the mental lexicon}, YEAR = {2015}, ABSTRACT = {The final NetWordS Conference, held on the 30th and 31st of March, and 1st of April 2015 in Pisa, was convened by Prof. Pier Marco Bertinetto, Dr. Vito Pirrelli and Dr. Claudia Marzi, and brought together 91 participants (scholars, Post-Docs, PhD students) from numerous European, and some non-European, countries. A 3-day schedule involved all participants in a focused, cross-disciplinary discussion on representations and processes in the mental lexicon. People are known to understand, memorise and parse words in a context-sensitive, opportunistic way, by caching their most habitual and productive processing patterns into routinized behavioural schemes, similarly to what we observe for sequences of coordinated motor acts. Speakers, however, do not only take advantage of token-based information such as frequency of individual, holistically stored words, or episodic memories of word usage, but they are also able to organise stored word forms through abstract paradigmatic structures (or word families) whose overall size and distribution are important determinants of lexical categorisation, inference and productivity. Lexical organisation is, in fact, not necessarily functional to descriptive economy and minimisation of storage, but appears to be influenced by more dynamic, communicationoriented functions such as memorisation, prediction-based recognition and production. Lending support to this view, usage-based approaches to word processing have recently offered novel explanatory frameworks that capitalise on the stable correlation patterns between lexical representations on the one hand and process-based operations that make representations functional to communicative exchanges on the other hand. By focusing on the battery of cognitive functions supporting verbal communication (ranging from input recoding to rehearsal, access, recall and coactivation) and by exploring their psycholinguistic correlates and neuroanatomical substrates, these approaches promote a new view of language architecture as an emergent property of the interaction between language-specific input conditions and low-level, domain-specific cognitive predispositions.}, KEYWORDS = {word knowledge, word usage, mental lexicon, interdisciplinary approach}, PAGES = {2-12}, URL = {http://www.networds-esf.eu/uploads/NetWordS/Science_Meeting_Scientific_Report_5810.pdf}, } @MISC{BARONI_2015_MISC_B_349786, AUTHOR = {Baroni, P.}, TITLE = {2015-1-IT02-KA204-015090 DLDP: Interactive Web Site}, YEAR = {2015}, ABSTRACT = {Sito Web interattivo di DLDP - Digital Language Diversity Project (Programma Erasmus+ | Accordo di Sovvenzione N° 2015-1-IT02-KA204-015090), realizzato con Drupal, sviluppato in inglese, italiano, basco, finlandese, francese, tedesco e spagnolo}, KEYWORDS = {Sito web}, URL = {http://www.dldp.eu}, } @MISC{BARONI_2015_MISC_B_483772, AUTHOR = {Baroni, P.}, TITLE = {CLARIN-IT Web Site}, YEAR = {2015}, ABSTRACT = {Sito Web del Consorzio Nazionale CLARIN-IT, realizzato con Drupal, sviluppato in inglese e italiano}, KEYWORDS = {CLARIN, National Consortium}, URL = {https://www.clarin-it.it}, } @MISC{BARONI_2015_MISC_B_483785, AUTHOR = {Baroni, P.}, TITLE = {LaRI Web Site}, YEAR = {2015}, ABSTRACT = {Sito Web del Gruppo di Ricerca del CNR-ILC "LaRI - Risorse e Infrastrutture Linguistiche", realizzato con WordPress, sviluppato in italiano e inglese}, KEYWORDS = {risorse linguistiche, infrastrutture linguistiche}, URL = {http://lari.ilc.cnr.it}, } @MISC{CININI_2015_MISC_CCM_390864, AUTHOR = {Cinini, A. and Cutugno, P. and Marconi, L.}, TITLE = {Sviluppo di una banca dati strutturata di trascrizioni di parlato di singoli soggetti anziani monitorati nel tempo}, YEAR = {2015}, ABSTRACT = {L'Istituto di Linguistica Computazionale "Antonio Zampolli" del Consiglio Nazionale delle Ricerche, ILC-CNR, in qualità di partner del progetto Ninfa "iNtelligent Integrated Network For Aged people" e nell'ambito del WP3 "Analisi e test di implementazione di deficit cognitivo attraverso l'analisi del linguaggio", ha realizzato una banca dati strutturata costituita da un corpus di registrazioni e di trascrizioni dei singoli soggetti anziani monitorati nel tempo.}, KEYWORDS = {Natural Language Processing, Cognitive Impairment, trattamento automatico del linguaggio, analisi del linguaggio}, URL = {https://publications.cnr.it/doc/390864}, } @MISC{DANCONA_2015_MISC_DBNFCBDM_390659, AUTHOR = {D'Ancona, C. and Bozzi, A. and Nahli, O. and Farina, M. and Coda, E. and Boschetti, F. and Del Grosso, A. M. and Marchi, S.}, TITLE = {Banca dati testuale Greek into Arabic}, YEAR = {2015}, ABSTRACT = {Banca dati testuale con la codifica XML della pericopatura dei testi Greco-Arabo di alcuni trattati delle Enneadi di Plotino.}, KEYWORDS = {Digital Humanities, Computational Philology, Greek into Arabic, http: //g2a. ilc. cnr. it}, URL = {http://g2a.ilc.cnr.it/}, } @MISC{DELGROSSO_2015_MISC_D_390562, AUTHOR = {Del Grosso, A. M.}, TITLE = {Una applicazione Web per lo studio specialistico dei testi. Il modello adottato e i risultati fino ad oggi ottenuti}, YEAR = {2015}, ABSTRACT = {Il lavoro si concentra sulla realizzazione di una libreria di moduli software relativi ad una applicazione Web per la Textual Scholarship. Tale libreria è basata su un modello che considera la molteplicità degli approcci per analizzare un testo, soprattutto, ma non esclusivamente, antico. In questo quadro il modello prende in considerazione elementi di carattere filologico e linguistico fra i quali anche quelli che derivano dalla Linguistica computazionale. Dal momento che è impensabile che un sistema, sia pure complesso e articolato in sottosistemi, possa ambire a intervenire in qualunque punto della filiera delle analisi scientifiche sui testi, il lavoro è particolarmente apprezzabile perché impostato sulla base di una architettura informatica multi-modulare. In tal modo è possibile inserire nel sistema un cospicuo numero di moduli software ed è aperto alla possibilità di inserirne (o farne inserire) molti altri a seconda di specifiche necessità. Fra i moduli più significativi, sono già attivi quelli per: 1) l'annotazione di parti di testo (dalla singola parola ad una espressione completa, ecc.); 2) la classificazione delle annotazioni stesse secondo una tipologia indicata dall'utente (per esempio, annotazione di tipo semantico, morfologico, ontologico, ecc.); 3) la produzione di indici e concordanze; 4) l'allineamento fra testo e eventuale traduzione (antica o moderna); 5) l'estrazione di named entity (NER). Il modello e i moduli realizzati hanno mostrato grande efficacia in almeno 3 progetti: 1) Progetto PRIN 2008 "Edizione digitale dei manoscritti di F. de Saussure; 2) Progetto ERC advanced grant "Greek into Arabic: Philosophical Concepts and Linguistic Bridges"; 3) Progetto "Traduzione Italiana del Talmud Babilonese". L'applicazione parzialmente già realizzata prevede un auspicabile sviluppo nei prossimi anni con sperimentazioni su testi manoscritti di autori moderni e contemporanei, oltre che su opere antiche e medievali per finalità di critica testuale, è interamente open source e sviluppata con l'utilizzo di standard internazionali, quali,tra l'altro, il sistema di mark-up TEI.}, KEYWORDS = {computational philology, digital humanities, software enginnering}, URL = {https://publications.cnr.it/doc/390562}, } @MISC{MANCINI_2015_MISC_MPDL_390658, AUTHOR = {Mancini, L. and Pedretti, I. and Del Grosso, A. M. and Luzzi, D.}, TITLE = {Banca dati testuale codifica delle lettere Cristoforo Clavius}, YEAR = {2015}, ABSTRACT = {Banca dati testuale delle lettere di Cristoforo Clavio derivante dal lavoro di codifica fatto adottando il vocabolario XML e le linee guida della text encoding initiative (TEI). L'attività è frutto del progetto Clavius on The Web.}, KEYWORDS = {digital humanities, Cristoforo Clavio, computational philology, TEI-XML}, URL = {http://claviusontheweb.it}, } @MISC{MARCHI_2015_MISC_MD_390657, AUTHOR = {Marchi, S. and Del Grosso, A. M.}, TITLE = {Greek into Arabic philological Web platform}, YEAR = {2015}, ABSTRACT = {Piattaforma filologico-computazionale sviluppata nell'ambito del progetto ERC 2009 Advanced Grant n. 249431. Titolo: Greek into Arabic. Philosophical concepts and linguistic bridges.}, KEYWORDS = {computational philology, digital humanities, ERC, Greek into Arabic}, URL = {http://g2a.ilc.cnr.it/}, } @MISC{NAHLI_2015_MISC_N_390722, AUTHOR = {Nahli, O.}, TITLE = {Banca dati dell'analisi morfo-sintattica del testo "Aflūṭīn ʻinda al-ʻArab", ʻAbd al-Raḥmān Badawī, Cairo 1955, 1966}, YEAR = {2015}, ABSTRACT = {Banca dati testuali con l'analisi morfo-sintattica del testo "Afl???n ?inda l-?Arab"; editore ?A. Badaw?, D?r al-Nah?at al-?arabiyya, Cairo 1966}, KEYWORDS = {analisi morfo-sintattica, Lingua araba, Greek Into Arabic}, URL = {http://g2a.ilc.cnr.it:8080/Teologia_Wapp/Home.xhtml?centerPage=teologia}, } @MISC{NAHLI_2015_MISC_N_390727, AUTHOR = {Nahli, O.}, TITLE = {Aggiornamenti banca dati del Motore morfologico Aramorph}, YEAR = {2015}, ABSTRACT = {AraMorph's components are essentially two: the rule engine for morphological analysis and a repository of linguistic resources mainly composed of three lexicons: i) the dictStems lexicon, which contains 38.600 lemmas; ii) the dictPrefixes lexicon, which consists of sequences of proclitics and inflectional prefixes; iii) the dictSuffixes lexicon, which consists of sequences of inflectional suffixes and enclitics. These lexica are accompanied by three compatibility tables used for checking combinations of A (proclitics+prefixes), B (stems) and C (suffixes+enclitics). To cut down on arabic parse overgeneration, one has to enforce further restrictions in compatibility tables, e.g. the verb's ability to accept nominative and accusative pronouns, and to select a rational subject. We then augmented verb entries with subcategorization information such as case assignment and the restriction on rational subjects. At the same time, it was necessary to update compatibility tables.}, KEYWORDS = {analisi morfo-sintattica, Lingua araba, Aramorph}, URL = {http://hdl.handle.net/20.500.11752/ILC-94}, } @ARTICLE{BRUNATO_2014_ARTICLE_BV_311157, AUTHOR = {Brunato, D. and Venturi, G.}, TITLE = {Le tecnologie linguistico-computazionali nella misura della leggibilità di testi giuridici}, YEAR = {2014}, ABSTRACT = {Il presente contributo illustra una innovativa metodologia per il calcolo della leggibilità di un testo giuridico basata su strumenti di Trattamento Automatico del Linguaggio ed espressamente rivolta alla sua semplificazione. Inserendoci nel più ampio filone di ricerche che affronta il tema dell'accessibilità della lingua del diritto, discutiamo con esempi tratti da testi reali, il caso specifico della prosa burocratico-amministrativa dal momento che l'accessibilità a tali documenti costituisce un elemento chiave della comunicazione istituzioni-cittadini. A nostra conoscenza, tale studio rappresenta il primo tentativo volto a mostrare come tecnologie linguistico-computazionali allo stato dell'arte per la lingua italiana incomincino ad essere mature per costituire non solo un ausilio per definire automaticamente la leggibilità di testi giuridici ma anche una guida per una loro stesura semplificata. Tali funzionalità saranno illustrate grazie a READ-IT, il primo e al momento unico strumento di valutazione della leggibilità oggi esistente per la lingua italiana basato su strumenti di Trattamento Automatico del Linguaggio.}, PAGES = {111-142}, URL = {https://publications.cnr.it/doc/311157}, VOLUME = {XXIII}, PUBLISHER = {Edizioni Scientifiche Italiane (Firenze, Italia)}, ISSN = {0390-0975}, JOURNAL = {Informatica e diritto}, } @ARTICLE{CHERSI_2014_ARTICLE_CFPP_283372, AUTHOR = {Chersi, F. and Ferro, M. and Pezzulo, G. and Pirrelli, V.}, TITLE = {Topological Self-Organization and Prediction Learning Support Both Action and Lexical Chains in the Brain}, YEAR = {2014}, ABSTRACT = {A growing body of evidence in cognitive psychology and neuroscience suggests a deep interconnection between sensory-motor and language systems in the brain. Based on recent neurophysiological findings on the anatomo-functional organization of the fronto-parietal network, we present a computational model showing that language processing may have reused or co-developed organizing principles, functionality, and learning mechanisms typical of premotor circuit. The proposed model combines principles of Hebbian topological self-organization and prediction learning. Trained on sequences of either motor or linguistic units, the network develops independent neuronal chains, formed by dedicated nodes encoding only context-specific stimuli. Moreover, neurons responding to the same stimulus or class of stimuli tend to cluster together to form topologically connected areas similar to those observed in the brain cortex. Simulations support a unitary explanatory framework reconciling neurophysiological motor data with established behavioral evidence on lexical acquisition, access, and recall.}, KEYWORDS = {Motor chains, Lexical chains, Serial working memory, Computational modeling, Self-organizing maps, Somatotopic organization, Prediction}, PAGES = {476-491}, URL = {http://onlinelibrary.wiley.com/doi/10.1111/tops.12094/abstract?deniedAccessCustomisedMessage=\&userIsAuthenticated=false}, VOLUME = {6}, DOI = {10.1111/tops.12094}, PUBLISHER = {Cognitive Science Society, Inc (Hoboken, NJ, Stati Uniti d'America)}, ISSN = {1756-8757}, JOURNAL = {Topics in cognitive science (Print)}, } @ARTICLE{CUTUGNO_2014_ARTICLE_CMMCM_282735, AUTHOR = {Cutugno, P. and Marconi, L. and Morgavi, G. and Chiarella, D. and Morando, M.}, TITLE = {Analysis of new collaborative writing within Web 2. 0}, YEAR = {2014}, ABSTRACT = {In recent years, the transition from Web 1.0 to Web 2.0 enabled the creation of content by the users of the Network: social networks, blogs, forums, chats and wikis have arisen.. Phenomena, such as collaborative/collective writing, already born at the beginning of the 20th century, found their natural setting, a wide audience of reference of writers and readers in multiple languages within the Web 2.0. In this paper our goal is to verify if and how the characteristics of the textual analysis of narrative plots can be used for the analysis of collaborative narrative texts. In particular, we will check if features like correctness, completeness, consistency and coherence together with tools for statistical analysis of language suitable for analysing the new collaborative writing 2.0.}, PAGES = {91-97}, URL = {https://publications.cnr.it/doc/282735}, VOLUME = {22}, PUBLISHER = {WSEAS Press (Wisconsin (Stati Uniti d'America), Stati Uniti d'America)}, ISSN = {1790-5109}, JOURNAL = {Recent Advances in Computer Engineering A Series of Reference Books and Textbooks}, } @ARTICLE{DEFELICE_2014_ARTICLE_D_285274, AUTHOR = {De Felice, I.}, TITLE = {«Possibilities of action» in language: affordances and verbal polysemy}, YEAR = {2014}, PAGES = {179-191}, URL = {https://publications.cnr.it/doc/285274}, VOLUME = {1}, PUBLISHER = {Il Mulino (Italia, Italia)}, ISSN = {2279-7777}, JOURNAL = {Reti, Saperi, Linguaggi. Italian Journal of Cognitive Sciences}, } @ARTICLE{DEFELICE_2014_ARTICLE_D_285275, AUTHOR = {De Felice, I.}, TITLE = {La sinestesia linguistica nella poesia latina}, YEAR = {2014}, ABSTRACT = {The main purpose of this study is to explore linguistic synaesthesia in Latin poetic language. Through the analysis of a poetic corpus, which consists of works of Catullus, Horace, Lucretius, Ovid, Vergil, all occurrences of twenty Latin synaesthetic adjectives (previously extracted by Aeneid and De Rerum Natura) were retrieved; all lemmas co-occurring with these adjectives in nominal phrases were then classified into the following categories according to their meaning in context: monoaesthetic, synaesthetic (touch, temperature perception, taste, smell, sight, motion perception, hearing), pseudo-synaesthetic (i.e. psycho-moral), abstract. The research not only shows how much linguistic synaesthesia is present in Latin poetry, but also demonstrates that Latin synaesthesias comply with cross-linguistic tendencies, especially with regard to the hypothesis of directional hierarchy.}, PAGES = {61-107}, URL = {https://publications.cnr.it/doc/285275}, VOLUME = {52}, PUBLISHER = {ETS (Pisa, Italia)}, ISSN = {0085-6827}, JOURNAL = {Studi e saggi linguistici}, } @ARTICLE{DEFELICE_2014_ARTICLE_D_289638, AUTHOR = {De Felice, I.}, TITLE = {From hands to handles: How objects' orientation affects grasp descriptions}, YEAR = {2014}, PAGES = {109-115}, URL = {http://www.neapolisanit.eu/neascience/wp-content/uploads/2014/12/ATTI_AISC_2014_ROMA2.pdf}, VOLUME = {5}, ISSN = {2282-6009}, JOURNAL = {Nea Science-Giornale Italiano di neuroscienze, psicologia e riabilitazione}, } @ARTICLE{DELLORLETTA_2014_ARTICLE_DMV_285640, AUTHOR = {Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Assessing document and sentence readability in less resourced languages and across textual genres}, YEAR = {2014}, ABSTRACT = {In this paper, we tackle three underresearched issues of the automatic readability assessment literature, namely the evaluation of text readability in less resourced languages, with respect to sentences (as opposed to documents) as well as across textual genres. Different solutions to these issues have been tested by using and refining READ-IT, the first advanced readability assessment tool for Italian, which combines traditional raw text features with lexical, morpho-syntactic and syntactic information. In READ-IT readability assessment is carried out with respect to both documents and sentences, with the latter constituting an important novelty of the proposed approach: READ-IT shows a high accuracy in the document classification task and promising results in the sentence classification scenario. By comparing the results of two versions of READ-IT, adopting a classification- versus ranking-based approach, we also show that readability assessment is strongly influenced by textual genre; for this reason a genre-oriented notion of readability is needed. With classification-based approaches, reliable results can only be achieved with genre-specific models: Since this is far from being a workable solution, especially for less resourced languages, a new ranking method for readability assessment is proposed, based on the notion of distance.}, KEYWORDS = {readability assessment, less resourced languages, multi-level linguistic annotation, textual genres}, PAGES = {163-193}, URL = {http://www.ingentaconnect.com/content/jbp/itl/2014/00000165/00000002/art00005}, VOLUME = {165}, DOI = {10.1075/itl.165.2.03del}, PUBLISHER = {Peeters Publishers (Leuven, Belgio)}, ISSN = {1783-1490}, JOURNAL = {ITL. Internationaler technischer Literaturanzeiger (Online)}, } @ARTICLE{GOGGI_2014_ARTICLE_GPGB_284602, AUTHOR = {Goggi, S. and Pardelli, G. and Giannini, S. and Biagioni, S.}, TITLE = {Grey Literature in European Commission Projects}, YEAR = {2014}, ABSTRACT = {The survey is focused on the documentation produced by the European Commission (EC) projects involved in the Framework Programme for Research and Technological Development (hereafter FP7) and managed by the Italian National Research Council (hereafter CNR). In particular, the Grey Literature (GL) available on CORDIS and European Projects websites was analysed. In order to verify how it is managed and whether it is compliant with EC recommendations, some categories were introduced to identify, measure and evaluate the usability and availability of projects production. Data was obtained from a sample of European projects websites.}, KEYWORDS = {Grey Literature, European Commission Projects}, PAGES = {133-144}, URL = {http://www.greynet.org/thegreyjournal/previousissues.html}, VOLUME = {10}, PUBLISHER = {TextRelease (Amsterdam, Paesi Bassi)}, ISSN = {1574-1796}, JOURNAL = {The Grey journal (Print)}, } @ARTICLE{MARZI_2014_ARTICLE_MFK_288212, AUTHOR = {Marzi, C. and Ferro, M. and Keuleers, E.}, TITLE = {Perception of typicality in the lexicon: Wordlikeness, lexical density and morphonotactic constraints}, YEAR = {2014}, ABSTRACT = {The extent to which a symbolic time-series (a sequence of sounds or letters) is a typical word of a language, referred to as WORDLIKENESS, has been shown to have effects in speech perception and production, reading proficiency, lexical development and lexical access, short-term and long-term verbal memory. Two quantitative models have been suggested to account for these effects: serial phonotactic probabilities (the likelihood for a given symbolic sequence to appear in the lexicon) and lexical density (the extent to which other words can be obtained from a target word by changing, deleting or inserting one or more symbols in the target). The two measures are highly correlated and thus easy to be confounded in measuring their effects in lexical tasks. In this paper, we propose a computational model of lexical organisation, based on Self-Organising Maps with Hebbian connections defined over a temporal layer (TSOMs), providing a principled algorithmic account of effects of lexical acquisition, processing and access, to further investigate these issues. In particular, we show that (morpho-)phonotactic probabilities and lexical density, though correlated in lexical organisation, can be taken to focus on different aspects of speakers' word processing behaviour and thus provide independent cognitive contributions to our understanding of the principles of perception of typicality that govern lexical organisation.}, KEYWORDS = {wordlikeness, lexical access, word processing, frequency, memory}, PAGES = {171-191}, URL = {http://www.scopus.com/record/display.url?eid=2-s2.0-84919701117\&origin=inward}, VOLUME = {40}, PUBLISHER = {Zavod za lingvistiku Filozofskog fakulteta (Zagreb, Croazia)}, ISSN = {0586-0296}, JOURNAL = {Suvremena lingvistika}, } @ARTICLE{MARZI_2014_ARTICLE_MFP_287289, AUTHOR = {Marzi, C. and Ferro, M. and Pirrelli, V.}, TITLE = {Morphological structure through lexical parsability}, YEAR = {2014}, ABSTRACT = {The emergence of morphological structure in lexical acquisition is analysed in the computational framework of Temporal Self-Organising Maps (TSOMs), to provide an explanatory basis for both psycholinguistic and linguistic accounts of lexical parsability. The investigation we propose is grounded on the hypothesis that perception of morphological structure (parsability) and frequency strongly correlate in the acquisition of inflectional paradigms. Analysis of experimental results of word acquisition obtained by artificially varying training conditions, allows us to understand developmental competition between fully-inflected word forms, and to investigate a hierarchy of frequency effects. The computational and theoretical implications of such a memory-based view of the relationship between frequency and perception, and its potential to account}, KEYWORDS = {inflectional paradigms, morphological structure, token/type frequency, word processing}, PAGES = {263-290}, URL = {http://www.rivisteweb.it/doi/10.1418/78410}, VOLUME = {XIII}, DOI = {10.1418/78410}, PUBLISHER = {Il Mulino, Bologna (Italia)}, ISSN = {1720-9331}, JOURNAL = {Lingue e linguaggio}, } @ARTICLE{PIRRELLI_2014_ARTICLE_P_288043, AUTHOR = {Pirrelli, V.}, TITLE = {Review of "Computational Paralinguistics: Emotion, Affect and Personality in Speech and Language Processing" (by Schuller & Batliner, Wiley Publishing 2013)}, YEAR = {2014}, KEYWORDS = {Paralinguistics, Pragmatics, Language usage}, URL = {http://www.computingreviews.com/review/review_review.cfm?review_id=142608}, PUBLISHER = {Association for Computing Machinery (New York, N. Y, Stati Uniti d'America)}, ISSN = {1530-6585}, JOURNAL = {Computing reviews (Online)}, } @ARTICLE{SASSI_2014_ARTICLE_SBP_280559, AUTHOR = {Sassi, M. and Biagioni, S. and Pardelli, G.}, TITLE = {A Linguistic and Gender Approach to 1841 Tuscany Population Census}, YEAR = {2014}, ABSTRACT = {The Census of 1841 in Tuscany was the first official data registry which tried to describe Tuscan population as a whole on the basis of the Granducato's territory. With the use of special ad-hoc created forms, all demographic and socioeconomic characteristics of families and single persons in "Granducato di Toscana" were described. Work is developed in five points: (1) informatics retrieval of linguistic information from Tuscany of 1800 focused by the arts and craftsmanship more in use in families of that time; (2) gender division of works and craftsmanship; (3) observation of lexical disparity in the four communities and terminological curiosities of that historical period; (4) actually no longer existing craftsmanship; and (5) diachronic analysis of communities, where possible. In this scenario, the authors will introduce the methodology they used for data analysis. Tables and figures will be used to better focus different moments and results of the work. A Glossary in Appendix will contain the English translation of the Italian terms extracted from the Corpus.}, KEYWORDS = {Tuscany Population Census, sociological analysis, gender analysis, the 19th work terminology, linguistic statistics}, PAGES = {318-329}, URL = {http://www.davidpublishing.com/show.html?16049}, VOLUME = {12}, PUBLISHER = {USA-China Business Review (Journal), Inc (New York, NY, Stati Uniti d'America)}, ISSN = {1539-8080}, JOURNAL = {US-China foreign language}, } @ARTICLE{SORIA_2014_ARTICLE_SCMQBCMOP_285553, AUTHOR = {Soria, C. and Calzolari, N. and Monachini, M. and Quochi, V. and Bel, N. and Choukri, K. and Mariani, J. and Odijk, J. and Piperidis, S.}, TITLE = {The language resource Strategic Agenda: the FLaReNet synthesis of community recommendations}, YEAR = {2014}, ABSTRACT = {The main purpose of this paper is to serve as a landmark for future research and in particular for future strategic, infrastructural and coordination initiatives. It presents a preliminary plan for actions and infrastructures that could become the basis for future initiatives in the sector of Language Resources and Technologies (LRTs). The FLaReNet Language Resource Strategic Agenda presents a set of recommen- dations for the development and progress of LRT in Europe, as issued from a three- year consultation of the FLaReNet European project. Recommendations cover a broad range of topics and activities, spanning over production and use of language resources, licensing, maintenance and preservation issues, infrastructures for language resour- ces, resource identification and sharing, evaluation and validation, interoperability and policy issues. The intended recipients belong to a large set of players and stakeholders in LRT, ranging from individuals to research and education institutions, to policy- makers, funding agencies, SMEs and large companies, service and media providers}, KEYWORDS = {Strategic agenda, Language resources planning, Recommended priority actions}, PAGES = {753-775}, URL = {https://publications.cnr.it/doc/285553}, VOLUME = {48}, DOI = {10.1007/s10579-014-9279-y}, PUBLISHER = {Springer (Dordrecht, Paesi Bassi)}, ISSN = {1574-020X}, JOURNAL = {Language resources and evaluation (Print)}, } @ARTICLE{WIELING_2014_ARTICLE_WMNB_285543, AUTHOR = {Wieling, M. and Montemagni, S. and Nerbonne, J. and Baayen, R. H.}, TITLE = {Lexical differences between Tuscan dialects and standard Italian: Accounting for geographic and socio-demographic variation using generalized additive mixed modeling}, YEAR = {2014}, ABSTRACT = {This study uses a generalized additive mixed-effects regression model to predict lexical differences in Tuscan dialects with respect to standard Italian. We used lexical information for 170 concepts used by 2,060 speakers in 213 locations in Tuscany. In our model, geographical position was found to be an important predictor, with locations more distant from Florence having lexical forms more likely to differ from standard Italian. In addition, the geographical pattern varied significantly for low- versus high-frequency concepts and older versus younger speakers. Younger speakers generally used variants more likely to match the standard language. Several other factors emerged as significant. Male speakers as well as farmers were more likely to use lexical forms different from standard Italian. In contrast, higher-educated speakers used lexical forms more likely to match the standard. The model also indicates that lexical variants used in smaller communities are more likely to differ from standard Italian. The impact of community size, however, varied from concept to concept. For a majority of concepts, lexical variants used in smaller communities are more likely to differ from the standard Italian form. For a minority of concepts, however, lexical variants used in larger communities are more likely to differ from standard Italian. Similarly, the effect of the other community- and speaker-related predictors varied per concept. These results clearly show that the model succeeds in teasing apart different forces influencing the dialect landscape and helps us to shed light on the complex interaction between the standard Italian language and the Tuscan dialectal varieties. In addition, this study illustrates the potential of generalized additive mixed-effects regression modeling applied to dialect data.*}, KEYWORDS = {Tuscan dialects, lexical variation, generalized additive modeling, mixed-effects regression modeling, geographical variation}, PAGES = {669-692}, URL = {http://www.linguisticsociety.org/files/wieling.pdf}, VOLUME = {90}, PUBLISHER = {Linguistic Society of America [etc. ] (Washington, DC [etc. ], Stati Uniti d'America)}, ISSN = {0097-8507}, JOURNAL = {Language (Baltimore)}, } @INCOLLECTION{BOSCHETTI_2014_INCOLLECTION_B_288045, AUTHOR = {Boschetti, F.}, TITLE = {Corpus Linguistics and Greek}, YEAR = {2014}, PAGES = {391-394}, URL = {https://publications.cnr.it/doc/288045}, ISBN = {9789004225978}, BOOKTITLE = {Encyclopedia of Ancient Greek Language and Linguistics (3 vols)}, EDITOR = {Giannakis, G. K.}, } @INCOLLECTION{BOSCHETTI_2014_INCOLLECTION_B_308246, AUTHOR = {Boschetti, F.}, TITLE = {Strumenti on-line per l'analisi e l'annotazione di testi letterari ed epigrafici bilingui}, YEAR = {2014}, ABSTRACT = {Il presente contributo illustra alcuni metodi e strumenti per l'allineamento di testi bilingui e descrive in particolare il sistema sviluppato presso l'Istituto di Linguistica Computazionale «A. Zampolli» del Consiglio Nazionale delle Ricerche di Pisa, corredato di funzioni specifiche per l'epigrafia digitale. Lo strumento informatico facilita l'interrogazione e la visualizzazione dei passi in parallelo, oltre a permettere allo studioso di annotare singole parole o porzioni più estese di testo che si corrispondono, a giudizio delle studioso stesso, in modo più o meno fedele nelle due lingue.}, KEYWORDS = {Digital Epigraphy, Cooperative Philology}, PAGES = {1-9}, URL = {http://www.edizionicafoscari.unive.it/col/exp/30/59/Archivistica/3}, VOLUME = {3}, PUBLISHER = {Edizioni Ca' Foscari (Venezia, ITA)}, ISBN = {978-88-97735-94-6}, BOOKTITLE = {Memoria poetica e poesia della memoria-La versificazione epigrafica dall'antichità all'umanesimo}, EDITOR = {Pistellato, A.}, } @INCOLLECTION{BOZZI_2014_INCOLLECTION_B_322719, AUTHOR = {Bozzi, A.}, TITLE = {Computer-assisted Scholarly Editing of Manuscripts Sources}, YEAR = {2014}, ABSTRACT = {The contribution will concentrate on the specific aspect of textual criticism. I realize this is a discipline that could be defined as being 'very exclusive,' as the scholars are not numerically equivalent to the community of people working in other Humanities disciplines; for example historians, philosophers, or those dealing with the history and criticism of literature. However, if we consider that textual criticism covers a very large period (Ancient, Medieval and Modern times) and many languages, there is also an increase in the population of specialists.}, KEYWORDS = {Computer-aided Textual Scholarship, Textual Criticism, Computational Philology}, PAGES = {99-115}, URL = {http://www.oapen.org/search?identifier=515678}, PUBLISHER = {Amsterdam University Press (Amsterdam, NLD)}, ISBN = {978-90-896-4564-7}, BOOKTITLE = {New Publication Cultures in the Humanities. Exploring the Paradigm Shift}, EDITOR = {Dávidházi, P.}, } @INCOLLECTION{CALZOLARI_2014_INCOLLECTION_CNMQST_286868, AUTHOR = {Calzolari and Nicoletta and Monachini, M. and Quochi, V. and Soria, C. and Toral, A.}, TITLE = {Lexicons, Terminologies, Ontologies: Reflections from Experiences in Resource Construction}, YEAR = {2014}, ABSTRACT = {This contribution aims at highlighting the strong interconnection between lexicons, terminologies and ontologies and especially the fundamental role that ontologies and lexica mutually play. Our view is that lexical resources are evolving in nature, from ontologically based lexicons we are going towards lexically based ontologies. We explore different instantiations of the current trend of using formal ontologies as a core module of computational lexicons, presenting the advantages especially in multilingual and terminological contexts. We present work showing that the lexical knowledge already present in non formal computational lexicons can be exploited to derive or enrich a formal ontology without much manual effort. In the terminology domain, we describe the construction of a resource for biology, directly linked to a parallel domain-ontology, that combines characteristics of both lexicons and terminologies, so that is can allow for intelligent access to content. Finally, we describe our experience in two projects in which formal ontologies play a central role in the context of multilingual computational lexicons, where the ontology is what acts as the glue among the different monolingual lexicons and what provides cross-lingual reasoning capabilities.}, KEYWORDS = {Computational Lexicons, Ontology, Terminology, Interoperability, Standards}, PAGES = {103-121}, URL = {http://www.springer.com/computer/ai/book/978-3-642-45326-7}, VOLUME = {8003}, DOI = {10.1007/978-3-642-45327-4_7}, PUBLISHER = {Springer (Berlin Heidelberg, DEU)}, ISBN = {978-3-642-45326-7}, BOOKTITLE = {Language, Culture, Computation. Computational Linguistics and Linguistics. Essays Dedicated to Yaacov Choueka on the Occasion of His 75th Birthday, Part III}, EDITOR = {Dershowitz, N. and Nissan, E.}, } @INCOLLECTION{RUSSO_2014_INCOLLECTION_RC_288041, AUTHOR = {Russo, I. and Caselli, T.}, TITLE = {Converging evidences on the eventivity of Italian nouns}, YEAR = {2014}, ABSTRACT = {This paper aims at shedding lights on the complex semantic concept of "event noun". Starting with the working hypothesis that linguistic context and corpus-based distributional information can be decisive, we propose a measure for eventivity that relies on syntagmatic cues. By means of a comparison between syntagmatic evidence obtained from a corpus study and speakers' judgments, we have identified a measure of eventivity for nouns. The comparison with annotated data proves its soundness.}, KEYWORDS = {event nominals, syntagmatic cues, degree of eventivity}, PAGES = {179-200}, URL = {https://publications.cnr.it/doc/288041}, PUBLISHER = {Düsseldorf University Press (Düsseldorf, DEU)}, ISBN = {978-3-943460-87-2}, BOOKTITLE = {Meaning, frames, and conceptual representation}, EDITOR = {Gamerschlag, T. and Gerland, D. and Osswald, R. and Wiebke, P.}, } @EDITORIAL{BOSCO_2014_EDITORIAL_BCDFMS_330112, AUTHOR = {Bosco, C. and Cosi, P. and Dell'Orletta, F. and Falcone, M. and Montemagni, S. and Simi, M.}, TITLE = {Proceedings of the Fourth International Workshop EVALITA 2014}, YEAR = {2014}, KEYWORDS = {Trattamento Automatico del Linguaggio, Speech Processing, Lingua Italiana}, PAGES = {167}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-EVALITA-2014.pdf}, PUBLISHER = {Pisa University Press (Pisa, ITA)}, ISBN = {978-88-67414-72-7}, } @EDITORIAL{ELMOHAJIR_2014_EDITORIAL_EACAEPZE_330677, AUTHOR = {El Mohajir, M. and Al Achhab, M. and Chahhou, M. and Arioua, M. and El Mohajir, B. and Pirrelli, V. and Zarghili, A. and El Far, M.}, TITLE = {Proceedings of IEEE-CiST14-Third IEEE International Colloquium in Information Science and Technology (CIST)}, YEAR = {2014}, ABSTRACT = {The 3rd international IEEE Colloquium on Information Science and Technology (CIST'14) is part of the IEEE CONFERENCE SERIES that are held in Morocco, and is sponsored by the IEEE Morocco Section and the IEEE Morocco Computer \& Communication Joint Chapter, and the UAE IEEE Student Branch. The 2014 edition was organized in collaboration with the Faculty of Sciences of Tetuan, the national school of applied sciences of Tetuan and the University of Abdelmalek Essaadi. IEEE CIST is emerging as a key annual event that aims to serve as a forum to promote the exchange of the latest advances achieved by IT researchers, IT decision makers, IT managers, application designers and software engineers in the domain of information science and related technology. Computing challenges, models, applications and IT solutions will be discussed from the perspectives of academia, industry and government. In addition to the main conference topics, IEEE CIST will also provide a platform for supporting innovative and original contributions in three complementary disciplines that are: Arabic natural language processing, Information and multimedia processing and Internet of Things. We would like to extend our most sincere thanks and gratitude to the keynote speakers of IEEE CIST'14 for their important added value to this edition and to the Scientific Committee Members who helped us in the review process. We would like also to express our thanks to the IEEE Computer Society for their support through their Distinguished Lecturers Programs. We are also very glad to express our most sincere gratitude for the organizing committee members for their full dedication and professional organization of this edition. The success of this colloquium will be mainly attributed to the authors who contributed with their posters and talks. We hope that CIST will continue to offer a privileged context for participants to develop new ways and methods to achieve our objectives in advancing our research and projects. We can together achieve more and face more efficiently the challenges of the current millennium.}, PAGES = {440}, URL = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6996097}, VOLUME = {CFP1467R-ART}, DOI = {10.1109/CIST.2014.7016582}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-4799-5979-2}, } @EDITORIAL{FRANCESCONI_2014_EDITORIAL_FMPVW_310637, AUTHOR = {Francesconi, E. and Montemagni, S. and Peters, W. and Venturi, G. and Wyner, A.}, TITLE = {Proceedings of the Fourth Workshop on Semantic Processing of Legal Texts}, YEAR = {2014}, PAGES = {33}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/workshops/LREC2014Workshop-SPLeT%20Proceedings.pdf}, PUBLISHER = {PARIGI: ELRA (Parigi, FRA)}, ISBN = {978-2-9517408-8-4}, } @EDITORIAL{PIRRELLI_2014_EDITORIAL_PR_300048, AUTHOR = {Pirrelli, V. and Raffaelli, I.}, TITLE = {Special Issue of Suvremena Lingvistika}, YEAR = {2014}, PAGES = {127-235}, URL = {https://publications.cnr.it/doc/300048}, PUBLISHER = {Croatian Philological Society (Zagreb, HRV)}, } @EDITORIAL{PRETORIUS_2014_EDITORIAL_PSB_285396, AUTHOR = {Pretorius, L. and Soria, C. and Baroni, P.}, TITLE = {Proceedings of the Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)}, YEAR = {2014}, ABSTRACT = {Proceedings del Workshop su Collaborazione e Computazione per le Lingue con Risorse Insufficienti nell'era dei Dati Aperti Collegati (CCURL 2014 | Reykjavik, 26/05/2014)}, KEYWORDS = {under-resourced languages}, PAGES = {107}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, } @INPROCEEDINGS{ABRATE_2014_INPROCEEDINGS_ADGLLMMPP_282569, AUTHOR = {Abrate, M. and Del Grosso, A. M. and Giovannetti, E. and Lo Duca, A. and Luzzi, D. and Mancini, L. and Marchetti, A. and Pedretti, I. and Piccini, S.}, TITLE = {Sharing Cultural Heritage: the Clavius on the Web Project}, YEAR = {2014}, ABSTRACT = {In the last few years the amount of manuscripts digitized and made available on the Web has been constantly increasing. However, there is still a considarable lack of results concerning both the explicitation of their content and the tools developed to make it available. The objective of the Clavius on the Web project is to develop a Web platform exposing a selection of Christophorus Clavius letters along with three different levels of analysis: linguistic, lexical and semantic. The multilayered annotation of the corpus involves a XML-TEI encoding followed by a tokenization step where each token is univocally identified through a CTS urn notation and then associated to a part-of-speech and a lemma. The text is lexically and semantically annotated on the basis of a lexicon and a domain ontology, the former structuring the most relevant terms occurring in the text and the latter representing the domain entities of interest (e.g. people, places, etc.). Moreover, each entity is connected to linked and non linked resources, including DBpedia and VIAF. Finally, the results of the three layers of analysis are gathered and shown through interactive visualization and storytelling techniques. A demo version of the integrated architecture was developed.}, KEYWORDS = {language technologies for digital cultural heritage, lexica and ontologies, data visualization}, PAGES = {8}, URL = {https://publications.cnr.it/doc/282569}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {LREC 2014-The 9th edition of the Language Resources and Evaluation Conference}, CONFERENCE_PLACE = {Reykjavik}, CONFERENCE_DATE = {26-31 maggio 2014}, } @INPROCEEDINGS{ANTICO_2014_INPROCEEDINGS_AQMM_286882, AUTHOR = {Antico, G. and Quochi, V. and Monachini, M. and Martinelli, M.}, TITLE = {Marrying Technical Writing with LRT}, YEAR = {2014}, ABSTRACT = {In the last years the Technical Writer operational scenarios and the workflow sensibly changed; specifically,"free style" writing - or manual writing - has become outdated and technical writing is now much more concerned with structured management of content than in the past. Technical writing has become more demanding due to a number of factors among which the rise and spread of mobile devices usage. This paper discusses the new needs of technical writing and content management business and how LRT can help it improve quality and productivity.}, KEYWORDS = {controlled language, technical writing, content management systems}, PAGES = {19-25}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may 2014}, EDITOR = {Isahara, H. and Lee, K. C. S. and Nam, S.}, } @INPROCEEDINGS{BARBAGLI_2014_INPROCEEDINGS_BLDMV_294078, AUTHOR = {Barbagli, A. and Lucisano, P. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, TITLE = {Tecnologie del linguaggio e monitoraggio dell'evoluzione delle abilità di scrittura nella scuola secondaria di primo grado}, YEAR = {2014}, ABSTRACT = {L'ultimo decennio ha visto l'affermarsi a livello internazionale dell'uso di tecnologie del linguaggio per lo studio dei processi di apprendimento. Questo contributo, che si colloca all'interno di una ricerca più ampia di pedagogia sperimentale, riporta i primi e promettenti risultati di uno studio finalizzato al monitoraggio dell'evoluzione del processo di apprendimento della lingua italiana condotto a partire dalle produzione scritte degli studenti con strumenti di annotazione linguistica automatica e di estrazione di conoscenza.}, PAGES = {23-27}, URL = {http://www.italianlp.it/wp-content/uploads/2014/12/Tecnologie-del-linguaggio-per-la-scuola.pdf}, DOI = {10.12871/CLICIT201415}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics (CLiC-it 2014)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 dicembre 2014}, BOOKTITLE = {Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{BARTOLINI_2014_INPROCEEDINGS_BQDRM_286944, AUTHOR = {Bartolini, R. and Quochi, V. and De Felice, I. and Russo, I. and Monachini, M.}, TITLE = {From Synsets to Videos: Enriching ItalWordNet Multimodally}, YEAR = {2014}, ABSTRACT = {The paper describes the multimodal enrichment of ItalWordNet action verbs' entries by means of an automatic mapping with a conceptual ontology of action types instantiated by video scenes (ImagAct). The two resources present significative differences as well as interesting complementary features, such that a mapping of these two resources can lead to a an enrichment of IWN, through the connection between synsets and videos apt to illustrate the meaning described by glosses. Here, we describe an approach inspired by ontology matching methods for the automatic mapping of ImagAct video scenes onto ItalWordNet. The experiments described in the paper are conducted on Italian, but the same methodology can be extended to other languages for which WordNets have been created, since ImagAct is available also for English, Chinese and Spanish. This source of multimodal information can be exploited to design second language learning tools, as well as for language grounding in action recognition in video sources and potentially for robotics.}, KEYWORDS = {Action ontology, Multimodality, WordNet}, PAGES = {3110-3117}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {LREC 2014. European Language Resources Association ELRA: Paris (Francia)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{BELLANDI_2014_INPROCEEDINGS_BABBG_311736, AUTHOR = {Bellandi, A. and Albanesi, D. and Bellusci, A. and Bozzi, A. and Giovannetti, E.}, TITLE = {The Talmud System: a Collaborative Web Application for the Translation of the Babylonian Talmud Into Italian}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/311736}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics (CLiC-it 2014)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 dicembre 2014}, BOOKTITLE = {Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{BELLANDI_2014_INPROCEEDINGS_BBCG_282568, AUTHOR = {Bellandi, A. and Bellusci, A. and Cappelli, A. and Giovannetti, E.}, TITLE = {Graphic Visualization in Literary Text Interpretation}, YEAR = {2014}, ABSTRACT = {We here illustrate a possible approach combining existing technologies for Natural Language Processing (NLP), Knowledge Representation and Reasoning (KRR) and Data Visualization in a coherent Decision Support System (DSS). The approach to the development of the system we are working on can be articulated in two main steps: the customization and integration of existing tools for automatic text annotation (at least linguistic, lexicographic and semantic) and the construction of a user-friendly and highly expressive GUI. The interface should allow a user to: upload her/his own text, run the desired annotation tools, visually interact with the resulting multilayered network to: i) proof-read the results of the automatic annotations, ii) manually add missing elements and/or relations between elements and, finally, iii) formulate and verify specific interpretative hypotheses.}, KEYWORDS = {Computational Hermeneutics, Text processing, Knowledge Representation and Reasoning, Data Visualization}, PAGES = {392-397}, URL = {https://publications.cnr.it/doc/282568}, DOI = {10.1109/IV.2014.62}, PUBLISHER = {IEEE (New York, USA)}, CONFERENCE_NAME = {18th International Conference on Information Visualisation}, CONFERENCE_PLACE = {Parigi}, CONFERENCE_DATE = {15, 16, 17, 18 luglio 2014}, BOOKTITLE = {Information Visualisation}, EDITOR = {Banissi, E. and Bannatyne, M. W. M. and Marchese, F. T. and Sarfraz, M. and Ursyn, A. and Venturini, G. and Wyeld, T. G. and Cvek, U. and Trutschl, M. and Grinstein, G. and Geroimenko, V. and Kenderdine, S. and Bouali, F.}, } @INPROCEEDINGS{BELLANDI_2014_INPROCEEDINGS_BBCG_282571, AUTHOR = {Bellandi, A. and Bellusci, A. and Carniani, E. and Giovannetti, E.}, TITLE = {Content Elicitation: Towards a New Paradigm for the Analysis and Interpretation of Texts}, YEAR = {2014}, ABSTRACT = {In this paper we show how semantic technologies can be exploited, with the help of user friendly interfaces, to identify and structure the knowledge embedded in literary texts. The proposed approach, that we have called Content Elicitation, supports the experts in defining hierarchical and associative relationships between semantically annotated chunks of text denoting relevant entities, allowing visual structuring of knowledge, which can be edited by different experts in a collaborative way. This knowledge, formally coded as an ontology, can then be used by scholars and students as a guide for the analysis of the text and for the discovery of potential novel interpretations. We are testing and evaluating this approach on the Babylonian Talmud, due to its historical, linguistic, semantic and structural richness.}, KEYWORDS = {semantic annotation, knowledge representation, text ontology, content elicitation, literary computing, data visualization}, URL = {https://publications.cnr.it/doc/282571}, DOI = {10.2316/P.2014.810-031}, PUBLISHER = {Acta press (Calgary, CAN)}, CONFERENCE_NAME = {The 13th IASTED International Conference on Software Engineering}, CONFERENCE_PLACE = {Innsbruck}, CONFERENCE_DATE = {17-19 febbraio 2014}, EDITOR = {Hamza, M. H.}, } @INPROCEEDINGS{BELLUSCI_2014_INPROCEEDINGS_BBBCGM_311735, AUTHOR = {Bellusci, A. and Bellandi, A. and Benotto, G. and Cappelli, A. and Giovannetti, E. and Marchi, S.}, TITLE = {Towards a Decision Support System for Text Interpretation}, YEAR = {2014}, ABSTRACT = {This article illustrates the first steps towards the implementation of a Decision Support System aimed to recreate a research environment for scholars and provide them with computational tools to assist in the processing and interpretation of texts. While outlining the general characteristics of the system, the paper presents a minimal set of user requirements and provides a possible use case on Dante's Inferno.}, KEYWORDS = {DDS, XML, text interpretation, literary computing}, PAGES = {58-62}, URL = {http://clic.humnet.unipi.it/proceedings/vol1/CLICIT2014112.pdf}, VOLUME = {I}, DOI = {10.12871/CLICIT2014118}, ISBN = {9788867414727}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics (CLiC-it 2014)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 dicembre 2014}, BOOKTITLE = {Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{BIZZONI_2014_INPROCEEDINGS_BBDDMC_286958, AUTHOR = {Bizzoni, Y. and Boschetti, F. and Diakoff, H. and Del Gratta, R. and Monachini, M. and Crane, G.}, TITLE = {The Making of Ancient Greek WordNet}, YEAR = {2014}, ABSTRACT = {This paper describes the process of creation and review of a new lexico-semantic resource for the classical studies: AncientGreekWord- Net. The candidate sets of synonyms (synsets) are extracted from Greek-English dictionaries, on the assumption that Greek words translated by the same English word or phrase have a high probability of being synonyms or at least semantically closely related. The process of validation and the web interface developed to edit and query the resource are described in detail. The lexical coverage of Ancient Greek WordNet is illustrated and the accuracy is evaluated. Finally, scenarios for exploiting the resource are discussed.}, KEYWORDS = {Ancient Greek, Multilingualism, Classical Philology}, PAGES = {1140-1147}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, CONFERENCE_NAME = {LREC 2014. European Language Resources Association ELRA: Paris (Francia)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_B_288048, AUTHOR = {Boschetti, F.}, TITLE = {Acquisizione e Creazione di Risorse Plurilingui per gli Studi di Filologia Classica in Ambienti Collaborativi}, YEAR = {2014}, ABSTRACT = {Questo articolo illustra metodi e strumenti per l'acquisizione e l'estensione di risorse digitali plurilingui per gli studi classici, sviluppati in collaborazione tra il CoPhiLab dell'Ilc-Cnr e il Perseus Project della Tufts University. Si descrivono tre linee di intervento: a) la progettazione e l'implementazione di un sistema di correzione dell'output dell'Ocr applicato al Greco antico; b) la creazione e la valutazione di un nucleo di synsets per Ancient Greek WordNet e c) l'allineamento di un campione di testi greci e latini con le relative traduzioni italiane.}, KEYWORDS = {Greco Antico, OCR, WordNet, Allineamento}, PAGES = {55-67}, URL = {https://publications.cnr.it/doc/288048}, PUBLISHER = {CLEUP (Padova, ITA)}, ISBN = {9788867872602}, CONFERENCE_NAME = {AIUCD 2013}, CONFERENCE_DATE = {2014}, BOOKTITLE = {Collaborative Research Practices and Shared Infrastructures for Humanities Computing-2nd Aiucd Annual Conference, Aiucd 2013 Padua, Italy, 11-12 December 2013-Proceedings of Revised Papers}, EDITOR = {Agosti, M. and Tomasi, F.}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_B_288052, AUTHOR = {Boschetti, F.}, TITLE = {La localizzazione del Perseus Project in lingua italiana}, YEAR = {2014}, ABSTRACT = {Si illustra il progetto di localizzazione in lingua italiana dell'infrastruttura per lo studio dei classici greci e latini costituita dal Perseus Project (Tufts University, Medford, MA), usando risorse per l'analisi della nostra lingua sviluppate presso l'ILC-CNR di Pisa e mettendo a disposizione nuovi componenti software per la visualizzazione e l'annotazione di testi bilingui.}, KEYWORDS = {filologia computazionale, allineamento, lessico dinamico}, PAGES = {221-234}, URL = {http://digilab2.let.uniroma1.it/ojs/index.php/Quaderni_DigiLab/issue/view/12}, VOLUME = {24}, DOI = {10.13133/978-88-98533-27-5}, ISBN = {978-88-98533-27-5}, CONFERENCE_NAME = {AIUCD 2012}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {13-14 dicembre 2012}, BOOKTITLE = {Digital Humanities: progetti italiani ed esperienze di convergenza multidisciplinare-Atti del convegno annuale dell'Associazione per l'Informatica Umanistica e la Cultura Digitale (AIUCD) Firenze, 13-14 dicembre 2012}, EDITOR = {Ciotti, F.}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BCDLPPVML_288050, AUTHOR = {Boschetti, F. and Cimino, A. and Dell'Orletta, F. and Lebani, G. E. and Passaro, L. and Picchi, P. and Venturi, G. and Montemagni, S. and Lenci, A.}, TITLE = {Computational Analysis of Historical Documents: An Application to Italian War Bulletins in World War I and II}, YEAR = {2014}, ABSTRACT = {World War (WW) I and II represent crucial landmarks in the history on mankind: They have affected the destiny of whole generations and their consequences are still alive throughout Europe. In this paper we present an ongoing project to carry out a computational analysis of Italian war bulletins in WWI and WWII, by applying state-of-the-art tools for NLP and Information Extraction. The annotated texts and extracted information will be explored with a dedicated Web interface, allowing for multidimensional access and exploration of historical events through space and time.}, KEYWORDS = {World War I}, PAGES = {70-75}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/workshops/LREC2014Workshop-LRT4HDA%20Proceedings.pdf}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, CONFERENCE_NAME = {LREC 2014}, CONFERENCE_PLACE = {Reykjavik}, CONFERENCE_DATE = {26 May}, BOOKTITLE = {Proceedings of workshop on Language resources and technologies for processing and linking historical documents and archives-Deploying Linked Open Data in Cultural Heritage-LREC 2014, 26 May, Reykjavik, Iceland}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BDL_288070, AUTHOR = {Boschetti, F. and Del Gratta, R. and Lamé, M.}, TITLE = {Computer Assisted Annotation of Themes and Motifs in Ancient Greek Epigrams: First Steps}, YEAR = {2014}, ABSTRACT = {This paper aims at illustrating some tools to assist the manual annotation of themes and motifs in literary and epigraphic epigrams for the PRIN 2010/2011 Memorata Poetis Project.}, KEYWORDS = {Filologia collaborativa}, PAGES = {83-86}, URL = {http://clic.humnet.unipi.it/it/atti.html}, VOLUME = {1}, DOI = {10.12871/CLICIT2014158}, PUBLISHER = {Pisa University Press (Pisa, ITA)}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 dicembre 2014}, BOOKTITLE = {The First Italian Conference on Computational Linguistics-Proceedings}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{BRUNATO_2014_INPROCEEDINGS_B_311792, AUTHOR = {Brunato, D.}, TITLE = {Complessità necessaria o stereotipi del "burocratese"? Un'indagine sulla leggibilità del linguaggio amministrativo da una prospettiva linguistico-computazionale}, YEAR = {2014}, ABSTRACT = {Questo contributo intende presentare una metodologia di ricostruzione del profilo linguistico di un corpus di testi amministrativi basata sull'uso delle tecnologie linguistico-computazionali e finalizzata alla specializzazione di un indice di leggibilità "avanzato" sulle caratteristiche di questi testi. Tale metodologia, documentata in [3], si propone di indagare la variazione linguistica tramite il monitoraggio di parametri estratti automaticamente dal testo sottoposto ad analisi linguistica multi-livello. La complessità della lingua della pubblica amministrazione, soprattutto nei documenti rivolti al cittadino, è un problema ben noto e, malgrado le molteplici iniziative in favore di un linguaggio più chiaro ed efficace, tratti tipici del "burocratese" continuano a persistere, anche quando non imposti da requisiti di legittimità e precisione. Un ausilio alla semplificazione può venire dai sistemi per la misurazione della leggibilità del testo, come suggerito anche dai manuali di stile ispirati alla letteratura del Plain Language. Tuttavia le formule tradizionali, quali Gulpease [2], si limitano ad approssimare la complessità testuale, in quanto considerano esclusivamente parametri del testo superficiali, come la lunghezza della frase e della parola. Più recentemente, è emersa una nuova generazione di indici di leggibilità, fondati su metodologie di Trattamento Automatico del Linguaggio, che riescono a intercettare i luoghi di complessità del testo in maniera più granulare, computando un ampio spettro di parametri linguistici, che risultano anche maggiormente implicati nei processi di comprensione. È il caso di READ-IT [1], lo strumento utilizzato in questo studio. L'analisi linguistica ha esplorato la distribuzione di caratteristiche lessicali, morfo-sintattiche e sintattiche, estratte automaticamente da un "corpus parallelo monolingue" di testi amministrativi, ovvero internamente suddiviso in due sotto-corpora: uno costituito da testi autentici delle pubbliche amministrazioni e uno dalle relative versioni semplificate, frutto di un lavoro di riscrittura coordinato da linguisti. Queste caratteristiche, già risultate predittive del livello di leggibilità di testi giornalistici, sono state selezionate allo scopo di verificare l'incidenza delle peculiarità della scrittura amministrativa nella caratterizzazione della leggibilità. I risultati hanno permesso infatti di discriminare tra aspetti di complessità "ineliminabile" e tratti tipici del burocratese: i primi sono rappresentati da quei parametri che, pur indicativi di maggior complessità nella lingua comune, risultano similmente distribuiti nelle due sottovarietà; i secondi sono invece quelli che contraddistinguono solo la varietà dei testi originali, dunque quelli su cui hanno agito le riscritture. Questi dati potranno supportare tanto lo sviluppo di indici di leggibilità adattati alle peculiarità del linguaggio amministrativo, quanto le ricerche più attuali sulla semplificazione semiautomatica del testo.}, URL = {http://www.csfls.it/silfi2014/wp-content/uploads/2014/08/Atti-SILFI-2014-Riassunti.pdf}, ISBN = {978-88-96312-56-8}, CONFERENCE_NAME = {XIII Congresso della SILFI (Società Internazionale di Linguistica e Filologia Italiana)}, CONFERENCE_PLACE = {Palermo}, CONFERENCE_DATE = {22-24 settembre 2014}, BOOKTITLE = {La lingua variabile nei testi letterari, artistici e funzionali contemporanei. Analisi, interpretazione, traduzione}, EDITOR = {Di Studi Filologici, C. and Siciliani, L. and Di Scienze Umanistiche, D. and Di Palermo, U. D. S.}, } @INPROCEEDINGS{BRUNATO_2014_INPROCEEDINGS_BDVM_294073, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Venturi, G. and Montemagni, S.}, TITLE = {Defining an annotation scheme with a view to automatic text simplification}, YEAR = {2014}, ABSTRACT = {This paper presents the preliminary steps of ongoing research in the field of automatic text simplification. In line with current approaches, we propose here a new annotation scheme specifically conceived to identify the typologies of changes an original sentence undergoes when it is manually simplified. Such a scheme has been tested on a parallel corpus available for Italian, which we have first aligned at sentence level and then annotated with simplification rules.}, PAGES = {87-92}, URL = {http://www.italianlp.it/wp-content/uploads/2014/12/Text-simplification.pdf}, DOI = {10.12871/CLICIT2014118}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics (CLiC-it 2014)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 dicembre 2014}, BOOKTITLE = {Proceedings of the First Italian Conference on Computational Linguistics (CLiC-it 2014)}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{CHIARELLA_2014_INPROCEEDINGS_CCMMM_282739, AUTHOR = {Chiarella, D. and Cutugno, P. and Marconi, L. and Morando, M. and Morgavi, G.}, TITLE = {La pesca, la caza, la agricultura y el bosque: una organización de datos lingüísticos por un léxico con referencia al mundo fang}, YEAR = {2014}, ABSTRACT = {El lenguaje es una forma privilegiada de desarrollo de ideas, expresión de sentimientos y conocimientos, enfrentarse con otros y también de cohesión social; el saber es generalmente procesado y transmitido a través del lenguaje. Cada persona está tan acostumbrada a vivir en su propia realidad cultural que la cree universal; cada cultura tiende a ponerse al centro del mundo y considerarse como un punto de referencia y medida de todas las otras. Un fenómeno evidente de la lengua fang es la falta de "palabras - conceptos" en sentido occidental, o palabras que abarcan una cantidad de objetos que tienen características en común.En la estructuración del diccionario hemos elegido algunos elementos de la cultura fang como organización y jerarquía social, constitución de la familia, ámbito económico, ámbito artístico, ámbito literario, ámbito de la medicina; además hemos examinado unos aspectos de la representación del tiempo y del espacio, algunas topologías de danza y juegos y unos elementos descriptivos del bosque.}, KEYWORDS = {Fang Spagnolo lessici specifici}, PAGES = {16}, URL = {https://publications.cnr.it/doc/282739}, PUBLISHER = {Centro Cultural Africano "Fernando Ortiz (Santiago de Cuba, CUB)}, ISBN = {9789592840195}, CONFERENCE_NAME = {XIII° Conferencia Internacional de Cultura Africana y Afroamericana}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {12-16 Aprile 2014}, } @INPROCEEDINGS{CIGNONI_2014_INPROCEEDINGS_CGMF_288032, AUTHOR = {Cignoni, L. and Giuffra, V. and Minozzi, S. and Fornaciari, G.}, TITLE = {CLIL Funerary Archaeology Courses for First-Cycle and Second-Cycle Degree Students}, YEAR = {2014}, ABSTRACT = {This paper reports on the differences between two specialized funerary archaeology courses conducted by a native language teacher from the Institute for Computational Linguistics of the National Research Council in Pisa and a subject specialist in paleopathology and funerary archaeology from the Division of Palaeopathology, Department of Translational Research on New Technologies in Medicine and Surgery of Pisa University. Lessons addressed to first cycle three-year Bachelor's degree undergraduates who were studying archaeology, art history, natural and environmental sciences took place in the second semester of the year 2012-2013. Classes in the same discipline and addressed to students from the same faculties had been held a year earlier for a second cycle twoyear Master's degree course. The classes were delivered in English using CLIL (exploitation of a vehicular foreign language to teach a special subject) associated with blended learning methodology (combination of face-to-face instructor-led training with web-based technology). Appropriate teaching materials selected by the two teachers covered a wide range of topics, from the study of death to ancient burials, rites, and dynamics of human settlements, as well as evidence of past human societies recovered by excavations. In particular, ancient Roman funerary customs (inhumation, cremation) and Medieval mortuary practices and burials were studied, alongside artifacts such as weapons, jewellery, and pottery vessels recovered from archaeological sites both in Italy and in Britain. Collaboration between language teacher and subject specialist was crucial for the selection of the reading and listening materials, for the correction of the oral and written work assigned to the students, and for the intervention on the part of the subject teacher to clarify points that had been raised, to assist the students during the individual presentations, pairwork or group discussions, and to encourage their work. Two researchers collaborating with the subject specialist also contributed to the lessons by presenting studies they had performed in their area of expertise and by assisting the students during the discussions. These student-centred tasks were aimed at accomplishing important educational goals such as student motivation, improved cognitive and academic performance, enhanced access to online learning resources, peer learning and collaboration. The 2012-2013 course proved to be much more interactive and challenging than the previous one, owing to the major emphasis given to the more practical aspects, in preparation for the fieldwork in archaeology and bioarchaeology, which was carried out in the summer of 2013, working with their peers from Ohio State University and other Universities in the USA, Canada and Australia. Particular attention was devoted to the language of funerary archaeology, and the trainees extracted definitions from the texts they were using to enrich an ongoing English-Italian glossary of funerary archaeology terms. The most important items and sentence structures of the English language were studied and revised, and an English grammar containing contextualized examples drawn from specialized works in that domain was enriched with new material. Student exchanges under different European and international programmes have emphasized on the need for specialist knowledge in specific thematic areas, alongside an oral and written command of a foreign language.}, KEYWORDS = {funerary archaeology, CLIL, Roman and Medieval archaeology, University education, collaborative learning}, URL = {https://publications.cnr.it/doc/288032}, PUBLISHER = {International Association of Technology, Education and Development (IATED) Academy (www. iated. org, ESP)}, ISBN = {978-84-616-8412-0}, CONFERENCE_NAME = {INTED (International Technology, Education and Development Conference)}, CONFERENCE_PLACE = {Valencia}, CONFERENCE_DATE = {10-12 March 2014}, } @INPROCEEDINGS{CIGNONI_2014_INPROCEEDINGS_CMS_281790, AUTHOR = {Cignoni, L. and Marinelli, R. and Spadoni, G. P.}, TITLE = {A CLIL/blended learning approach for cruise tourism courses in Italy using lexical/semantic databases and information technology resources}, YEAR = {2014}, ABSTRACT = {In this paper we discuss the possibility of exploiting specialized texts for cruise shipping, hotel and catering management courses in English to be held in nautical and other senior high schools in Italy, more and more involved in the promising and strongly developing field of tourism. The courses will be carried out using a content and language integrated (CLIL) approach, and will be run by native language teachers working alone or in collaboration with instructional supporting experts in the different sectors. The aims of the courses are to study the maritime terminology related to ships and navigation (crew members, safety and security systems on board, etc.) and, in particular, to the cruise ship industry and hospitality operations environment (passenger mobility on board and ashore during excursions, etc.). Students will become acquainted with the language of routine operations, giving directions, understanding commands in emergency situations, reporting on weather forecasts, and with the terms and definitions belonging to the tourist activity specialized in the management of cruise ships and passengers. The scarce number of text books available for maritime English makes it necessary to supply Italian students with a variety of material in paper and computer format, so as to help them expand their vocabulary in the foreign language with greater confidence and proficiency. The texts will include shipping and cruise shipping books and magazines, manuals, contracts, technical documents, cruise line and tour operator websites, passenger blogs, and other texts of the cruise community. The trainees, constantly exposed to the language, will work individually, in pairs and in groups, at the presence of English teachers and operators in the field of cruise tourism, and will perform activities that cover the four communication skills of reading, writing, listening and speaking (gap filling, matching, summarizing, etc.). They will use modern technological equipment including computers, ipads, and other devices, incorporated in the classrooms according to a blended learning approach, which combines face-to-face and on-line education. Students can explore the meanings of single words by consulting the English lexical semantic database WordNet implemented at Princeton University, alongside the Italian terminological database Mariterm containing data belonging to the navigation and sea transport domains, as well as visualized images. Both databases are managed by user-friendly tools that can be easily accessed by teachers and students. Mariterm can be constantly enriched and updated with new information in the different sectors of maritime English. Finally, a grammar illustrating the most important items of the English language will be made available to the students, who can copy it on a file and expand it with contextualized examples extracted from the texts they will be reading and share the outcomes with their peers. English has been internationally accepted as the language for communication, and is therefore particularly important for exchanges among those who wish to work in the tourism industry.}, KEYWORDS = {maritime English, tourism, CLIL, lexical semantic databases, blended learning}, PAGES = {6552-6559}, URL = {https://publications.cnr.it/doc/281790}, VOLUME = {1}, PUBLISHER = {International Association of Technology, Education and Development (IATED) (Valencia, ESP)}, ISBN = {978-84-616-8412-0}, CONFERENCE_NAME = {INTED2014. 8th International Technology, Education and Development Conference}, CONFERENCE_PLACE = {Valencia (Spain)}, CONFERENCE_DATE = {10th-12th of March, 2014}, BOOKTITLE = {INTED2014. 8th International Technology, Education and Development Conference. Valencia (Spain), 10th-12th of March, 2014. Proceedings}, EDITOR = {Chova, L. G. and Martínez, A. L. and Torres, I. C.}, } @INPROCEEDINGS{CIMINO_2014_INPROCEEDINGS_CCDT_294105, AUTHOR = {Cimino, A. and Cresci, S. and Dell'Orletta, F. and Tesconi, M.}, TITLE = {Linguistically-motivated and Lexicon Features for Sentiment Analysis of Italian Tweets}, YEAR = {2014}, ABSTRACT = {In this paper we describe our approach to EVALITA 2014 SENTIment POLarity Classification (SENTIPOLC) task. We participated only in the Polarity Classification sub-task. By resorting to a wide set of general-purpose features qualifying the lexical and grammatical structure of a text, automatically created ad-hoc lexicons and existing free available resources, we achieved the second best accuracy.}, KEYWORDS = {Lexicons resources}, URL = {https://publications.cnr.it/doc/294105}, CONFERENCE_NAME = {The 4th Conference for Evaluation of NLP and Speech Tools for Italian (EVALITA)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {2014}, } @INPROCEEDINGS{DEFELICE_2014_INPROCEEDINGS_DBRQM_291282, AUTHOR = {De Felice, I. and Bartolini, R. and Russo, I. and Quochi, V. and Monachini, M.}, TITLE = {Evaluating ImagAct-WordNet mapping for English and Italian through videos}, YEAR = {2014}, ABSTRACT = {In this paper we present the results of the evaluation of an automatic mapping between two lexical resources, WordNet/ItalWordNet and ImagAct, a conceptual ontology of action types instantiated by video scenes. Results are compared with those obtained from a previous experiment performed only on Italian data. Differences between the two evaluation strategies, as well as between the quality of the mappings for the two languages considered in this paper, are iscussed.}, KEYWORDS = {Language Resources (LRs)}, PAGES = {128-131}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-CLICit-2014.pdf}, DOI = {10.12871/CLICIT2014126}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-88-67-41472-7}, CONFERENCE_NAME = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 \& the Fourth International Workshop EVALITA 2014. Pisa University Press srl: Pisa (Italia)}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 December 2014, Pisa}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{DEFELICE_2014_INPROCEEDINGS_DDM_292073, AUTHOR = {De Felice, I. and Donati, M. and Marotta, G.}, TITLE = {CLaSSES: a new digital resource for Latin epiraphy}, YEAR = {2014}, ABSTRACT = {CLaSSES (Corpus for Latin Sociolinguistic Studies on Epigraphic textS) is an annotated corpus for quantitative and qualitative sociolinguistic analyses on Latin inscriptions. It allows specific researches on phonological and morphophonological phenomena of non-standard Latin forms with crucial reference to the typology of the text, its origin and chronological collocation. This paper presents the first macrosection of CLaSSES, focused on the inscriptions from the archaicearly period.}, PAGES = {132-137}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-EVALITA-2014.pdf}, PUBLISHER = {Pisa University Press (Pisa, ITA)}, CONFERENCE_NAME = {CLiC-it. La Prima Conferenza Italiana di Linguistica Computazionale}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {9-10/12/2014}, BOOKTITLE = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 and the Fourth International Workshop EVALITA 2014}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{DELGRATTA_2014_INPROCEEDINGS_DFKMS_285395, AUTHOR = {Del Gratta, R. and Frontini, F. and Khan, F. and Mariani, J. and Soria, C.}, TITLE = {The LREMap for Under-Resourced Languages}, YEAR = {2014}, ABSTRACT = {A complete picture of currently available language resources and technologies for the under-resourced languages of Europe is still lacking. Yet this would help policy makers, researchers and developers enormously in planning a roadmap for providing all languages with the necessary instruments to act as fully equipped languages in the digital era. In this paper we introduce the LRE Map and show its utility for documenting available language resources and technologies for under-resourced languages. The importance of the serialization of the LREMap into (L)LOD along with the possibility of its connection to a wider world is also introduced.}, KEYWORDS = {language resources, less-resourced languages, linguistic linked open data}, PAGES = {78-83}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, CONFERENCE_NAME = {Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)}, CONFERENCE_PLACE = {Reykjavik}, CONFERENCE_DATE = {26/05/2014}, BOOKTITLE = {Proceedings of the Workshop on Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL 2014)}, EDITOR = {Pretorius, L. and Soria, C. and Baroni, P.}, } @INPROCEEDINGS{DELGRATTA_2014_INPROCEEDINGS_DN_318313, AUTHOR = {Del Gratta, R. and Nahli, O.}, TITLE = {Enhancing Arabic WordNet with the use on Princeton WordNet and a bilingual dictionary}, YEAR = {2014}, ABSTRACT = {This paper describes an heuristic-based approach to enhance existing WordNets with freely available bilingual resources. The approach has been applied to the Arabic WordNet using the AraMorph bilingual dictionary as bilingual resource, but its guidelines are quite general to be effectively applied to other languages. The English words extracted from the bilingual resource are checked against Princeton WordNet in order to quantify their coverage and to select only those words which share the same set of synsets. This strongly reduces the number of Arabic words of the pairs. These latter are then checked against the Arabic WordNet to make new words emerge and -possibly- add new synonyms.}, KEYWORDS = {WordNet, Arabic, English, Bilingual Resource, Enhancement}, PAGES = {278-284}, URL = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=\&arnumber=7016632}, DOI = {10.1109/CIST.2014.7016632}, PUBLISHER = {IEEE Communications Society (Piscataway, USA)}, ISBN = {978-1-4799-5978-5}, CONFERENCE_NAME = {ANLP IEEE CIST14}, CONFERENCE_PLACE = {Tetuan, Morocco}, CONFERENCE_DATE = {20-22/10/ 2014}, BOOKTITLE = {3rd International IEEE Colloquium on Information Science and Technology; From 20th to 22nd of October 2014 Tetuan-Chefchaouen Morocco}, } @INPROCEEDINGS{DELGRATTA_2014_INPROCEEDINGS_DPS_281039, AUTHOR = {Del Gratta, R. and Pardelli, G. and Sara, G.}, TITLE = {The LRE Map disclosed}, YEAR = {2014}, ABSTRACT = {This paper describes a serialization of the LRE Map database according to the RDF model. Due to the peculiar nature of the LRE Map, many ontologies are necessary to model the map in RDF, including newly created and reused ontologies. The importance of having the LRE Map in RDF and its connections to other open resources is also addressed.}, KEYWORDS = {Language Resource, LOD, Metadata}, PAGES = {3534-3541}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {EUROPEAN LANGUAGE RESOURCES ASSOC-ELRA FRANCE (Parigi, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {Ninth International Conference on Language Resources and Evaluation (LREC'14)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may 2014}, BOOKTITLE = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{DELGROSSO_2014_INPROCEEDINGS_DMMP_288069, AUTHOR = {Del Grosso, A. M. and Marchi, S. and Murano, F. and Pesini, L.}, TITLE = {A collaborative tool for philological research: experiments on Ferdinand de Saussure's manuscripts}, YEAR = {2014}, ABSTRACT = {The present paper describes a philological-computational tool developed by the Istituto di Linguistica Computazionale (ilc - cnr) of Pisa, aimed at creating a digital edition of Ferdinand de Saussure's unpublished manuscripts. Since the use of a digital edition and of the most modern computer technology allows a more in-depth research, the ilc is developing a set of digital tools in order to take ad- vantage of both the documents and the related information added by the scientific community. The integration exploits the Java enterprise platform by organizing the different features in modules. Thus, the tool meets the following requirements: (i) converting legacy digital resources into valid Xml documents (tei compliant); (ii) parallel visualization among imported texts and related images; (iii) search and in- dexing; (iv) handling of variant readings; and (v) collaborative annotation.}, KEYWORDS = {Computational and collaborative philology}, PAGES = {163-175}, URL = {https://publications.cnr.it/doc/288069}, PUBLISHER = {CLEUP (Padova, ITA)}, ISBN = {978-88-6787-260-2}, CONFERENCE_NAME = {Aiucd}, CONFERENCE_PLACE = {Padova}, CONFERENCE_DATE = {11-12 december 2013}, BOOKTITLE = {Collaborative Research Practices and Shared Infrastructures for Humanities Computing}, EDITOR = {Agosti, M. and Tomasi, F.}, } @INPROCEEDINGS{DELGROSSO_2014_INPROCEEDINGS_DN_295187, AUTHOR = {Del Grosso, A. M. and Nahli, O.}, TITLE = {Towards a flexible open-source software library for multi-layered scholarly textual studies: An Arabic case study dealing with semi-automatic language processing}, YEAR = {2014}, ABSTRACT = {This paper presents both the general model and a case study of the Computational and Collaborative Philology Library (CoPhiLib), an ongoing initiative underway at the Institute for Computational Linguistics (ILC) of the National Research Council (CNR), Pisa, Italy. The library, designed and organized as a reusable, abstract and open-source software component, aims at solving the needs of multi-lingual and cross-lingual analysis by exposing common Application Programming Interfaces (APIs). The core modules, coded by the Java programming language, constitute the groundwork of a Web platform designed to deal with textual scholarly needs. The Web application, implemented according to the Java Enterprise specifications, focuses on multi-layered analysis for the study of literary documents and related multimedia sources. This ambitious challenge seeks to obtain the management of textual resources, on the one hand by abstracting from current language, on the other hand by decoupling from the specific requirements of single projects. This goal is achieved thanks to methodologies declared by the "agile process", and by putting into effect suitable use case modeling, design patterns, and component-based architectures. The reusability and flexibility of the system have been tested on an Arabic case study: the system allows users to choose the morphological engine (such as AraMorph or Al-Khalil), along with linguistic granularity (i.e. with or without declension). Finally, the application enables the construction of annotated resources for further statistical engines (training set).}, KEYWORDS = {Design, Information Engineering, Design Patterns, Text Processing, Arabic Natural Language Processing}, PAGES = {285-290}, URL = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?tp=\&arnumber=7016633\&queryText%3Ddel+grosso+philology}, DOI = {10.1109/CIST.2014.7016633}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-4799-5978-5}, CONFERENCE_NAME = {Third IEEE International Colloquium in Information Science and Technology (CIST)}, CONFERENCE_PLACE = {Tetuan, Morocco}, CONFERENCE_DATE = {20-22/10/2014}, BOOKTITLE = {IEEE Cinference Publications-Catalog Number: CFP1467R-ART}, EDITOR = {El Mohajir, M. and Al Achhab, M. and Chahhou, M. and Mounir, A. and El Mohajir, B. and Pirrelli, V. and Zarghili, A. and Elfar, M.}, } @INPROCEEDINGS{DELLORLETTA_2014_INPROCEEDINGS_DVCM_285670, AUTHOR = {Dell'Orletta, F. and Venturi, G. and Cimino, A. and Montemagni, S.}, TITLE = {T2K: a System for Automatically Extracting and Organizing Knowledge from Texts}, YEAR = {2014}, ABSTRACT = {In this paper, we present T2K, a suite of tools for automatically extracting domain-specific knowledge from collections of Italian and English texts. T2K (Text-To-Knowledge v2) relies on a battery of tools for Natural Language Processing (NLP), statistical text analysis and machine learning which are dynamically integrated to provide an accurate and incremental representation of the content of vast repositories of unstructured documents. Extracted knowledge ranges from domain-specific entities and named entities to the relations connecting them and can be used for indexing document collections with respect to different information types. T2K also includes "linguistic profiling" functionalities aimed at supporting the user in constructing the acquisition corpus, e.g. in selecting texts belonging to the same genre or characterized by the same degree of specialization or in monitoring the "added value" of newly inserted documents. T2K is a web application which can be accessed from any browser through a personal account which has been tested in a wide range of domains.}, KEYWORDS = {Natural Language Processing, Information Extraction, Knowledge Management}, PAGES = {2062-2070}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/590_Paper.pdf}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {International Conference on Language Resources and Evaluation (LREC)}, CONFERENCE_PLACE = {Reykjavik}, CONFERENCE_DATE = {26-31 maggio 2014}, } @INPROCEEDINGS{DELLORLETTA_2014_INPROCEEDINGS_DWCVM_294084, AUTHOR = {Dell'Orletta, F. and Wieling, M. and Cimino, A. and Venturi, G. and Montemagni, S.}, TITLE = {Assessing the readability of sentences: which corpora and features?}, YEAR = {2014}, ABSTRACT = {The paper investigates the problem of sentence readability assessment, which is modelled as a classification task, with a specific view to text simplification. In particular, it addresses two open issues connected with it, i.e. the corpora to be used for training, and the identification of the most effective features to determine sentence readability. An existing readability assessment tool developed for Italian was specialized at the level of training corpus and learning algorithm. A maximum entropy-based feature selection and ranking algorithm (grafting) was used to identify to the most relevant features: it turned out that assessing the readability of sentences is a complex task, requiring a high number of features, mainly syntactic ones.}, PAGES = {163-173}, URL = {http://acl2014.org/acl2014/W14-18/pdf/W14-1820.pdf}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-941643-03-7}, CONFERENCE_NAME = {9th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2014)}, CONFERENCE_PLACE = {Baltimore, Maryland, USA}, CONFERENCE_DATE = {26 giugno 2014}, BOOKTITLE = {Proceedings of 9th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2014)}, } @INPROCEEDINGS{FERRARI_2014_INPROCEEDINGS_FDSG_294419, AUTHOR = {Ferrari, A. and Dell'Orletta, F. and Spagnolo, G. O. and Gnesi, S.}, TITLE = {Measuring and improving the completeness of natural language requirements}, YEAR = {2014}, ABSTRACT = {[Context and motivation] System requirements specifications are normally written in natural language. These documents are required to be complete with respect to the input documents of the requirements definition phase, such as preliminary specifications, transcripts of meetings with the customers, etc. In other terms, they shall include all the relevant concepts and all the relevant interactions among concepts expressed in the input documents. [Question/Problem] Means are required to measure and improve the completeness of the requirements with respect to the input documents. [Principal idea/results] To measure this completeness, we propose two metrics that take into account the relevant terms of the input documents, and the relevant relationships among terms. Furthermore, to improve the completeness, we present a natural language processing tool named Completeness Assistant for Requirements (CAR), which supports the definition of the requirements: the tool helps the requirements engineer in discovering relevant concepts and interactions. [Contribution] We have performed a pilot test with CAR, which shows that the tool can help improving the completeness of the requirements with respect to the input documents. The study has also shown that CAR is actually useful in the identification of specific/alternative system behaviours that might be overseen without the tool. © 2014 Springer International Publishing Switzerland.}, KEYWORDS = {natural language processing, relation extraction, Requirements analysis}, PAGES = {23-38}, URL = {https://link.springer.com/chapter/10.1007%2F978-3-319-05843-6_3#citeas}, VOLUME = {8396}, DOI = {10.1007/978-3-319-05843-6_3}, ISBN = {978-3-319-05843-6}, CONFERENCE_NAME = {REFSQ 2014, Requirements Engineering: Foundation for Software Quality. 20th International Working Conference}, CONFERENCE_PLACE = {Essen, Germany}, CONFERENCE_DATE = {7-10 April 2014}, BOOKTITLE = {Requirements Engineering: Foundation for Software Quality 20th International Working Conference, REFSQ 2014, Essen, Germany, April 7-10, 2014. Proceedings}, EDITOR = {Salinesi, C. and Van De Weerd, I.}, } @INPROCEEDINGS{FRONTINI_2014_INPROCEEDINGS_FQM_291452, AUTHOR = {Frontini, F. and Quochi, V. and Monachini, M.}, TITLE = {Polysemy alternations extraction using the PAROLE SIMPLE CLIPS Italian lexicon}, YEAR = {2014}, ABSTRACT = {This paper presents the results of an experiment of polysemy alternations induction from a lexicon (Utt and Pad´o, 2011; Frontini et al., 2014), discussing the results and proposing an amendment in the original algorithm.}, KEYWORDS = {Language Resources and Technologies}, PAGES = {175-179}, URL = {http://clic.humnet.unipi.it/proceedings/Proceedings-CLICit-2014.pdf}, DOI = {10.12871/CLICIT2014134}, PUBLISHER = {Pisa University Press srl (Pisa, ITA)}, ISBN = {978-88-67-41472-7}, CONFERENCE_NAME = {Proceedings of the First Italian Conference on Computational Linguistics CLiC-it 2014 \& the Fourth International Workshop EVALITA 2014}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {9-11 December 2014, Pisa}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{FRONTINI_2014_INPROCEEDINGS_FQPUM_286984, AUTHOR = {Frontini, F. and Quochi, V. and Padó, S. and Utt, J. and Monachini, M.}, TITLE = {Polysemy Index for Nouns: an Experiment on Italian using the PAROLE SIMPLE CLIPS Lexical Database}, YEAR = {2014}, ABSTRACT = {An experiment is presented to induce a set of polysemous basic type alternations (such as ANIMAL-FOOD, or BUILDING-INSTITUTION) by deriving them from the sense alternations found in an existing lexical resource. The paper builds on previous work and applies those results to the Italian lexicon PAROLE SIMPLE CLIPS. The new results show how the set of frequent type alternations that can be induced from the lexicon is partly different from the set of polysemy relations selected and explicitly applied by lexicographers when building it. The analysis of mismatches shows that frequent type alternations do not always correspond to prototypical polysemy relations, nevertheless the proposed methodology represents a useful tool offered to lexicographers to systematically check for possible gaps in their resource.}, KEYWORDS = {Polysemy, lexical resources, semantics}, PAGES = {2955-2963}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, BOOKTITLE = {LREC 2014 Ninth International Conference on Language Resources and Evaluation Proceedings}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{GAGGIOLI_2014_INPROCEEDINGS_GCSPTBCFCTDGTRR_283374, AUTHOR = {Gaggioli, A. and Cipresso, P. and Serino, S. and Pioggia, G. and Tartarisco, G. and Baldus, G. and Corda, D. and Ferro, M. and Carbonaro, N. and Tognetti, A. and De Rossi, D. and Giakoumis, D. and Tzovaras, D. and Riera, A. and Riva, G.}, TITLE = {A decision support system for real-time stress detection during virtual reality exposure}, YEAR = {2014}, ABSTRACT = {Virtual Reality (VR) is increasingly being used in combination with psycho-physiological measures to improve assessment of distress in mental health research and therapy. However, the analysis and interpretation of multiple physiological measures is time consuming and requires specific skills, which are not available to most clinicians. To address this issue, we designed and developed a Decision Support System (DSS) for automatic classification of stress levels during exposure to VR environments. The DSS integrates different biosensor data (ECG, breathing rate, EEG) and behavioral data (body gestures correlated with stress), following a training process in which self-rated and clinical-rated stress levels are used as ground truth. Detected stress events for each VR session are reported to the therapist as an aggregated value (ranging from 0 to 1) and graphically displayed on a diagram accessible by the therapist through a web-based interface.}, KEYWORDS = {Psychological Stress, Psychophysiology, Virtual Reality, Decision Support System, Biosensors}, PAGES = {114-120}, URL = {https://publications.cnr.it/doc/283374}, VOLUME = {196}, DOI = {10.3233/978-1-61499-375-9-114}, PUBLISHER = {IOS Press (Tokyo, Paesi Bassi)}, ISSN = {0926-9630}, CONFERENCE_NAME = {Medicine Meets Virtual Reality (MMVR21)}, BOOKTITLE = {Medicine Meets Virtual Reality}, EDITOR = {Westwood, J. D.}, } @INPROCEEDINGS{GOGGI_2014_INPROCEEDINGS_GPGB_280394, AUTHOR = {Goggi, S. and Pardelli, G. and Giannini, S. and Biagioni, S.}, TITLE = {Grey Literature in European Commission Projects}, YEAR = {2014}, ABSTRACT = {The survey is focused on the documentation produced by the European Commission (EC) projects involved in the Framework Programme for Research and Technological Development (hereafter FP7) and managed by the Italian National Research Council (hereafter CNR). In particular, the Grey Literature (GL) available on CORDIS and European Projects websites was analysed. In order to verify how it is managed and whether it is compliant with EC recommendations, some categories were introduced to identify, measure and evaluate the usability and availability of projects production. Data was obtained from a sample of European projects websites.}, KEYWORDS = {Grey Literature. European Commission Projects, A. 1 INTRODUCTORY AND SURVEY}, PAGES = {98-109}, URL = {https://publications.cnr.it/doc/280394}, VOLUME = {15}, ISBN = {978-90-77484-22-7}, CONFERENCE_NAME = {GL15-Fifteenth International Conference on Grey Literature. The Grey Audit: a Field Assessment in Grey Literature}, CONFERENCE_PLACE = {Bratislava, Slovakia}, CONFERENCE_DATE = {2-3 December 2013}, BOOKTITLE = {The Grey Audit: a Field Assessment in Grey Literature}, EDITOR = {Farace, D. J. and Frantzen, J. and Service, G. I. L. N.}, } @INPROCEEDINGS{KHAN_2014_INPROCEEDINGS_KBF_286824, AUTHOR = {Khan, F. and Boschetti, F. and Frontini, F.}, TITLE = {Using lemon to Model Lexical Semantic  Shift in Diachronic Lexical Resources}, YEAR = {2014}, ABSTRACT = {In this paper we propose a model, called lemonDIA, for representing lexical semantic change using the lemon framework and based on the ontological notion of the perdurant. Namely we extend the notion of sense in lemon by adding a temporal dimension and then define a class of perdurant entities that represents a shift in meaning of a word and which contains different related senses. We start by discussing the general problem of semantic shift and the utility of being able to easily access and represent such information in diachronic lexical resources. We then describe our model and illustrate it with examples.}, KEYWORDS = {lemon, linked data, OWL, ontologies, perdurants, semantic shift}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/workshops/LREC2014Workshop-LDL2014%20Proceedings.pdf}, CONFERENCE_NAME = {3rd Workshop on Linked Data in Linguistics: Multilingual Knowledge Resources and Natural Language Processing (LDL2014)}, CONFERENCE_PLACE = {Reykjavik}, CONFERENCE_DATE = {May 27th, 2014}, BOOKTITLE = {Proceedings of the 3rd Workshop on Linked Data in Linguistics (LDL-2014)}, EDITOR = {Chiarcos, C. and McCrae, J. P. and Osenova, P. and Vertan, C.}, } @INPROCEEDINGS{LAM_2014_INPROCEEDINGS_LT_319595, AUTHOR = {Lamé, M. and Tanca, C.}, TITLE = {Hi-storytelling: Street Museum & Speaking Stones! A Study Case}, YEAR = {2014}, ABSTRACT = {How could museums go down the streets, taking advantage of the historical primary sources, scattered everywhere, impossible to bring back inside the collections, such as inscriptions in situ or reused? We explore the inscription as a dispositive of information and communication and we apply its message to a fictional story telling on contemporary social networks.}, KEYWORDS = {epigrafia, epigrafica digitale, dispostivo epigrafico, social network, musei, storytelling}, URL = {http://mwf2014.museumsandtheweb.com/paper/hi-storytelling-street-museum-speaking-stones-a-study-case/}, CONFERENCE_NAME = {MWF2014: Museums and the Web Florence 2014}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {18-21 febbraio 2014}, EDITOR = {Cherry, R. and Proctor, N.}, } @INPROCEEDINGS{LYDING_2014_INPROCEEDINGS_LSBBCDDLP_289308, AUTHOR = {Lyding, V. and Stemle, E. and Borghetti, C. and Brunello, M. and Castagnoli, S. and Dell'Orletta, F. and Dittmann, H. and Lenci, A. and Pirrelli, V.}, TITLE = {The PAISÀ Corpus of Italian Web Texts}, YEAR = {2014}, ABSTRACT = {PAIS`A is a Creative Commons licensed, large web corpus of contemporary Italian. We describe the design, harvesting, and processing steps involved in its creation.}, PAGES = {36-43}, URL = {http://aclweb.org/anthology/W14-04}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, CONFERENCE_NAME = {Corpus annotation, Tree-bank, Corpus design, Corpus harvesting}, CONFERENCE_PLACE = {Gothenburg. Sweden}, CONFERENCE_DATE = {April 26, 2014}, BOOKTITLE = {Proceedings of the 9th Web as Corpus Workshop (WaC-9)}, EDITOR = {Bildhauer, F. and Schäfer, R.}, } @INPROCEEDINGS{MARCONI_2014_INPROCEEDINGS_M_286627, AUTHOR = {Marconi, L.}, TITLE = {La tecnología como auxilio en la creación de un diccionario de una lengua oral de Africa}, YEAR = {2014}, ABSTRACT = {Questo studio, riferito alla lingua fang della Guinea Equatoriale, intende mostrare come la tecnologia possa fornire un valido aiuto nello sviluppo di strumenti per la salvaguardia di lingue orali. Il fang, parlato da più di 1000000 di persone, è parlato soprattutto in Gabon. Camerun, Giunea Equatoriale e nell'estremo nordest del Congo e a Sao Tomé. Si considera suddiviso in sei dialetti principali: ntumu, okak, achí, meké, mveñ, nzaman. In Guinea Equatoriale risulta la più parlata tra le lingue nazionali e quella più omogenea, ciò nonostante si possono individuare due varietà: l'oka e l'ntumu. Il fang, lingua quasi esclusivamente orale, nonostante i numerosi parlanti è in pericolo di estinzione poiché non sta realizzando una evoluzione autonoma e usa lo spagnolo per descrivere elementi della società attuale; il fang è la lingua della comunicazione quotidiana, usata nelle comunicazioni private, in famiglia e nelle situazioni di lavoro non intellettuale. Il fattore più potente nella salvaguardia di una lingua è quello di utilizzarla, anche se in modo non perfetto, la sua introduzione in ogni occasione della vita fino a che il suo uso sia percepito come qualcosa di naturale e non percepito come artificiale; è essenziale quindi dirigere gli sforzi della trasmissione della lingua e della cultura alle generazioni più giovani realizzando strumenti (dizionari, file audio, ecc.) per proteggere la lingua e la cultura. Nell'articolo vengono fatte considerazioni sui dizionari cartacei ed elettronici, descritti gli scopi per la realizzazione di un dizionario elettronico, individuati i possibili destinatari del dizionario bilingue e descritte le linee guida per la progettazione, l'implementazione e la consultazione. Viene infine descritto sommariamente il dizionario realizzato fornendo informazioni sull'alfabeto adottato, sulla categorizzazione delle parole, sul tono, sulle varietà considerate e su elementi inclusi nella parte elettronica quali il suono e infine una descrizione degli ingressi sia per la parte fang-spagnolo che per la parte spagnolo-fang.}, KEYWORDS = {diccionarios electrónicos, lengua fang, español}, URL = {https://publications.cnr.it/doc/286627}, PUBLISHER = {Ediciones Catedra 2014 (Santiago de Cuba, CUB)}, ISBN = {9789592840195}, CONFERENCE_NAME = {XIII° Conferencia Internacional de Cultura Africana y Afroamericana}, CONFERENCE_PLACE = {Santiago de Cuba}, CONFERENCE_DATE = {12-16 Aprile 2014}, } @INPROCEEDINGS{MARZI_2014_INPROCEEDINGS_MNF_295178, AUTHOR = {Marzi, C. and Nahli, O. and Ferro, M.}, TITLE = {Word Processing for Arabic Language: A reappraisal of morphology induction through adaptive memory self-organisation strategies}, YEAR = {2014}, ABSTRACT = {Modelling the mental lexicon focuses on processing and storage dynamics, since lexical organisation relies on the process of input recoding and adaptive strategies for long-term memory organisation. A fundamental issue in word processing is represented by the emergence of the morphological organisation level in the lexicon, based on paradigmatic relations between fully-stored word forms. Morphology induction can be defined as the task of identifying morphological formatives within morphologically complex word forms. In the computational framework we propose here (TSOMs), based on Self-Organising Maps with Hebbian connections defined over a temporal layer, the identification/perception of surface morphological relations involves the alignment of recoded representations of morphologically-related input words. Facing a non-concatenative morphology such as the Arabic inflectional system prompts a reappraisal of morphology induction through adaptive organisation strategies, which affect both lexical representations and long-term storage. We will show how a strongly adaptive self-organisation during training is conducive to emergent relations between stored word forms, and to high accuracy rates in generalising knowledge of stored words to unknown forms.}, KEYWORDS = {Non-concatenative morphological structure, lexical storage and access, SOMs, word recoding and processing, adaptive strategies, morphology}, PAGES = {241-247}, URL = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=7016626\&punumber%3D6996097}, DOI = {10.1109/CIST.2014.7016626}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-4799-5979-2}, CONFERENCE_NAME = {Third IEEE International Colloquium in Information Science and Technology (CIST)}, CONFERENCE_PLACE = {Tetuan (Morocco)}, CONFERENCE_DATE = {20-22/10/2014}, BOOKTITLE = {IEEE Conference Publications-Catalog Number: CFP1467R-ART}, EDITOR = {El Mohajir, M. and Al Achhab, M. and Chahhou, M. and Mounir, A. and El Mohajir, B. and Pirrelli, V. and Zarghili, A. and Elfar, M.}, } @INPROCEEDINGS{MONEGLIA_2014_INPROCEEDINGS_MBFGKMP_286990, AUTHOR = {Moneglia, M. and Brown, S. and Frontini, F. and Gagliardi, G. and Khan, F. and Monachini, M. and Panunzi, A.}, TITLE = {The IMAGACT Visual Ontology. an Extendable Multilingual Infrastructure for the Representation of Lexical Encoding of Action}, YEAR = {2014}, ABSTRACT = {Action verbs have many meanings, covering actions in different ontological types. Moreover, each language categorizes action in its own way. One verb can refer to many different actions and one action can be identified by more than one verb. The range of variations within and across languages is largely unknown, causing trouble for natural language processing tasks. IMAGACT is a corpus-based ontology of action concepts, derived from English and Italian spontaneous speech corpora, which makes use of the universal language of images to identify the different action types extended by verbs referring to action in English, Italian, Chinese and Spanish. This paper presents the infrastructure and the various linguistic information the user can derive from it. IMAGACT makes explicit the variation of meaning of action verbs within one language and allows comparisons of verb variations within and across languages. Because the action concepts are represented with videos, extension into new languages beyond those presently implemented in IMAGACT is done using competence-based judgments by mother-tongue informants without intense lexicographic work involving underdetermined semantic description}, KEYWORDS = {Lexicon, Lexical Database, Ontologies}, PAGES = {3425-3432}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{MONTEMAGNI_2014_INPROCEEDINGS_M_330111, AUTHOR = {Montemagni, S.}, TITLE = {DH@ILC: linee di attività e ricerca}, YEAR = {2014}, ABSTRACT = {Le principali linee di ricerca e sviluppo dell'ILC nel settore delle DH possono essere ricondotte ai seguenti filoni: acquisizione e conservazione di testi; progettazione e sviluppo di risorse e strumenti per il trattamento automatico di lingue classiche e varietà storiche della lingua; progettazione e sviluppo di strumenti per l'analisi del testo; costruzione di un'infrastruttura italiana per la ricerca nell'ambito delle scienze umane e sociali.}, KEYWORDS = {Digital Humanities, Trattamento Automatico del Linguaggio, Risorse Linguistiche}, PAGES = {101-111}, URL = {https://publications.cnr.it/doc/330111}, PUBLISHER = {CLEUP (Padova, ITA)}, ISBN = {9788867872602}, CONFERENCE_NAME = {2nd AIUCD Annual Conference}, CONFERENCE_PLACE = {Padova, Italy}, CONFERENCE_DATE = {11-12 December 2013}, BOOKTITLE = {Collaborative Research Practices and Shared Infrastructures for Humanities Computing}, EDITOR = {Agosti, M. and Tomasi, F.}, } @INPROCEEDINGS{MORGAVI_2014_INPROCEEDINGS_MNMCFCM_312489, AUTHOR = {Morgavi, G. and Nerino, R. and Marconi, L. and Cutugno, P. and Ferraris, C. and Cinini, A. and Morando, M.}, TITLE = {NINFA iNtelligent Integrated Network For Aged people}, YEAR = {2014}, ABSTRACT = {In this paper we present the NINFA project outline and its preliminary developments. The project is based on a service platform suited for elder people called the Virtual Village Network, whose user interface allows to deliver at home different services. i.e.: user supervision, communication and interaction among users for social inclusion, exergame delivering, monitoring of the wellness status. The preliminary work done on ICT technologies acceptability issues and on the implementation of the User Interface (UI) and of the Human Computer Interface (HCI) is presented. The HCI we developed is particularly suited for elderly people and motor impaired patients because the interaction is managed only by finger/hand gestures and vocal control. Furthermore, the relationship between a sets of motor, linguistic and cognitive parameters evaluated during exergame execution and the wellness status of the user is investigated.}, KEYWORDS = {service platform, elder people, at home services, exergame delivering, tele-monitoring, wellness status, human computer interface}, URL = {https://publications.cnr.it/doc/312489}, VOLUME = {11}, DOI = {10.1007/978-3-319-18374-9_25}, PUBLISHER = {Springer (London, GBR)}, ISBN = {978-3-319-18374-9}, CONFERENCE_NAME = {5° Forum Italiano per l'Ambient Assisted Living-ForitAAL}, CONFERENCE_PLACE = {Catania, Italy}, CONFERENCE_DATE = {2-5 Settembre 2014}, BOOKTITLE = {Ambient assisted living, Italian Forum 2014}, EDITOR = {Andò, P. B. and Siciliano, P. P.}, } @INPROCEEDINGS{PALLOTTI_2014_INPROCEEDINGS_PFAMF_287029, AUTHOR = {Pallotti, G. and Frontini, F. and Affè, F. and Monachini, M. and Ferrari, S.}, TITLE = {Presenting a System of Human-Machine Interaction for Performing Map Tasks}, YEAR = {2014}, ABSTRACT = {A system for human machine interaction is presented, that offers second language learners of Italian the possibility of assessing their competence by performing a map task, namely by guiding the a virtual follower through a map with written instructions in natural language. The underlying natural language processing algorithm is described, and the map authoring infrastructure is presented.}, KEYWORDS = {Language learning, human machine interaction, map tasks}, PAGES = {3963-3966}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {2}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{PANUNZI_2014_INPROCEEDINGS_PDGJMMQR_285381, AUTHOR = {Panunzi, A. and De Felice, I. and Gregori, L. and Jacoviello, S. and Monachini, M. and Moneglia, M. and Quochi, V. and Russo, I.}, TITLE = {Translating action verbs using a dictionary of images: the IMAGACT ontology}, YEAR = {2014}, ABSTRACT = {Action verbs have many meanings, covering actions in different ontological types. Moreover, each language categorizes action in its own way. One verb can refer to many different actions and one action can be identified by more than one verb. The range of variations within and across languages is largely unknown, causing trouble in all translation tasks. IMAGACT is a corpus-based ontology of action concepts, derived from English and Italian spontaneous speech corpora, which makes use of the universal language of images to identify the different action types extended by verbs referring to action in English, Italian, Chinese and Spanish. This paper presents the IMAGACT search interface and the various kinds of linguistic information the user can derive from it. IMAGACT makes explicit the variation of meaning of action verbs within one language and allows comparisons of verb variations within and across languages. Because the action concepts are represented with videos, extension into new languages beyond those presently implemented in IMAGACT is done using competence-based judgments by mother-tongue informants, without intense lexicographic work involving underdetermined semantic descriptions.}, KEYWORDS = {Action verbs, Image ontology, Multilingual dictionary, Computer-aided translation}, PAGES = {1163-1170}, URL = {http://euralex2014.eurac.edu/en/callforpapers/Documents/EURALEX%202014_gesamt.pdf}, DOI = {10.13140/2.1.3719.2320}, PUBLISHER = {EURAC (Bolzano, ITA)}, ISBN = {978-88-88906-97-3}, CONFERENCE_NAME = {XVI EURALEX International Congress: The User in Focus}, CONFERENCE_PLACE = {Bolzano}, CONFERENCE_DATE = {15-19/07/2014}, BOOKTITLE = {Proceedings of the XVI EURALEX International Congress: The User in Focus}, EDITOR = {Abel, A. and Vettori, C. and Ralli, N.}, } @INPROCEEDINGS{PEDRETTI_2014_INPROCEEDINGS_PDGMPALM_340200, AUTHOR = {Pedretti, I. and Del Grosso, A. and Giovannetti, E. and Mancini, L. and Piccini, S. and Abrate, M. and Lo Duca, A. and Marchetti, A.}, TITLE = {The Clavius on the Web Project: Digitization, Annotation and Visualization of Early Modern Manuscripts}, YEAR = {2014}, ABSTRACT = {This paper describes the full procedure adopted in the context of the Clavius on the Web project, which aims to help Web users to appraise the importance of specific manuscripts by going beyond their digital reproduction. The proposed approach is based on the multilayered explication of linguistic, lexical and semantic data representing the innermost nature of the analyzed manuscripts. The final purpose of the project is to gather and display the results of the three layers of analysis through interactive visualization techniques and export them as Linked Data. All the analyses rely on the XML/TEI encoding of the text, followed by a CTS-based tokenization. As a working example for this paper, the analysis of a portion of a manuscript provided by Historical Archives of the Pontifical Gregorian University will be illustrated. The text is a letter written in Latin and sent by Botvitus Nericius to Christophorus Clavius in 1598 from Madrid.}, KEYWORDS = {Clavius, Data Visualization, Early Modern Manuscripts, Lexica and Ontologies, Linked Open Data, NLP for Latin, Promotion of Cultural Heritage}, PAGES = {7}, URL = {http://dl.acm.org/citation.cfm?id=2802636}, DOI = {10.1145/2802612.2802636}, PUBLISHER = {ACM, Association for computing machinery (New York, USA)}, ISBN = {978-1-4503-3295-8}, CONFERENCE_NAME = {Third AIUCD Annual Conference on Humanities and Their Methods in the Digital Ecosystem (AIUCD '14)}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {18-19/09/2014}, BOOKTITLE = {Proceedings of the Third AIUCD Annual Conference on Humanities and Their Methods in the Digital Ecosystem (AIUCD '14)}, EDITOR = {Tomasi, F. and Del Turco, R. R. and Tammaro, A. M.}, } @INPROCEEDINGS{PICCINI_2014_INPROCEEDINGS_PRG_282583, AUTHOR = {Piccini, S. and Ruimy, N. and Giovannetti, E.}, TITLE = {Il primo lessico elettronico della terminologia saussuriana}, YEAR = {2014}, ABSTRACT = {Il lavoro che intendiamo presentare si iscrive all'interno di un Progetto di Ricerca Nazionale finanziato dal governo italiano, intitolato "Per una edizione digitale dei manoscritti di Ferdinand de Saussure", e volto a creare un prototipo di edizione digitale degli scritti autografi del grande linguista ginevrino. A tal fine, ogni manoscritto è stato digitalizzato, classificato e dotato di un link ipertestuale che rimanda alla sua trascrizione. Un sistema di gestione dei testi permette di consultare e annotare il singolo manoscritto e di effettuare studi filologici e critici sul corpus digitalizzato. Le concordanze per forma e per lemma prodotte per l'insieme dei testi forniscono un insieme di termini caratteristici dei quali viene descritta la semantica. Accanto alla realizzazione di tale piattaforma filologica digitale, uno degli aspetti innovativi del progetto consiste nella creazione del primo thesaurus-lessico elettronico della terminologia linguistica saussuriana. Quest'ultima riceve per la prima volta una rappresentazione strutturata, con una definizione del contenuto semantico di ciascuno dei termini chiave del pensiero del maestro ginevrino ed un quadro esplicito della natura e dell'importanza dei legami che li uniscono. Tale fascio di informazioni dovrebbe contribuire in modo significativo a meglio conoscere ed interpretare il pensiero del padre della linguistica moderna. L'architettura del lessico è ispirata al modello lessicale SIMPLE. Tra i modelli lessicali di maggior rilievo (WordNet, EuroWordNet, ItalWordNet, FrameNet, Pattern Dictionary, SIMPLE e Brandeis Semantic Ontology) esso è apparso essere il più adeguato, in quanto si distingue per alcuni importanti ed innovati aspetti. Il modello SIMPLE, infatti, ha permesso la realizzazione di lessici elettronici multilivello armonizzati per dodici lingue europee, imponendosi così come standard de facto nell'ambito della Lessicografia Computazionale. Successivamente ha fortemente ispirato lo standard ISO per i lessici del TAL Lexical Markup Framework. La strutturazione del lessico ha necessitato anzitutto della creazione di una ontologia lessicale di dominio. A tal fine è stato adottato un approccio centrifugo: in un primo momento sono stati identificati i concetti centrali del dominio di interesse, i quali sono stati poi generalizzati o specificati. L'ontologia così modellizzata è attualmente costituita da 43 tipi semantici ed ha una profondità di 4 livelli. Alcune classi semantiche sono state poi ulteriormente "specificate" in termini di tratti e/o relazioni semantiche obbligatorie (definitorie). Un insieme di cinquantotto relazioni semantiche permette di collegare le istanze delle differenti classi ontologiche, mentre trentadue tratti semantici codificano informazioni tipiche di una classe semantica nella sua interezza o di una specifica istanza. La semantica lessicale di ciascuna delle istanze di una classe ontologica è rappresentata in una entrata lessicale, nella quale una ed una sola accezione di un termine, semplice o complesso, viene riccamente definita attraverso un vasto insieme di informazioni formalizzate ed altamente strutturate, che coprono un ampio ventaglio di aspetti semantici. Il senso analizzato è anzitutto associato alla definizione data da Saussure stesso, e laddove non disponibile, a quelle di R. Godel e R. Engler. Per ogni lemma vengono specificati anche il periodo di attestazione, le fonti nelle quali occorre, la sua frequenza di occorrenza e le collocazioni nelle quali appare. La classificazione ontologica del termine rappresenta un dato essenziale al quale si aggiungono l'informazione concernente il dominio d'uso, il tipo di evento denotato (qualora si tratti di un evento), dei tratti semantici distintivi e una vasta rete di relazioni semantico-lessicali. Il modello lessicale adottato, infatti, conferisce una attenzione particolare ai legami che esistono tra i differenti termini. La multidimensionalità intrinseca al senso di ogni lemma è colta e formalizzata attraverso un insieme di relazioni semantiche specifiche del modello SIMPLE e ispirate alla Struttura Qualia della teoria del Lessico Generativo. Esse offrono un quadro preciso della natura dei legami (intra ed extra categoriali) che sussistono tra le unità lessicali contenute nella base di dati sia sull'asse paradigmatico (iperonimia, iponimia, meronimia ed olonimia) sia sull'asse sintagmatico (fornendo in particolare informazioni sull'origine e la funzione dell'entità denotata). Inoltre, accanto alle classiche relazioni di sinonimia, antonimia e di derivazione morfologica, sono state create delle relazioni specifiche, al fine di formalizzare nel modo più preciso possibile i legami particolari esistenti tra i termini del dominio della conoscenza e più generalmente la sua organizzazione concettuale. Dei termini predicativi contenuti nel lessico viene descritta anche la struttura argomentale con indicazioni sul ruolo semantico e sulle restrizioni semantiche degli argomenti introdotti. Allo stato attuale, la popolazione del lessico è costituita da 500 entrate lessicali (379 nomi, 113 aggettivi e 8 verbi): si tratta principalmente dei termini proposti da Godel e Engler e di alcune parole-chiave estratte dagli Écrits de linguistique générale. In una fase successiva verrà integrata la nuova terminologia dei manoscritti attualmente studiati. In una prima fase i dati lessicali sono stai gestiti in una piattaforma MS ACCESS. Più recentemente, per esigenze di standardizzazione ed interoperabilità, è stata effettuata una migrazione sulla piattaforma Protégé-OWL. Owl è, infatti, il linguaggio standard del W3C per la rappresentazione e la condivisone di ontologie sul Web. Il sistema di gestione Access, tuttavia, non è stato abbandonato: il lessico, attualmente, è ospitato in entrambe le piattaforme grazie ad un software che consente la perfetta sincronizzazione dei dati. Questa provvisoria "doppia ubicazione" permette, intanto, di sfruttare in modo ottimale le potenzialità dei due sistemi di gestione. Una tale strutturazione informatizzata dei dati lessicali offre numerosi vantaggi. In fase di creazione del lessico, essa permette uno sviluppo collaborativo rigoroso, anche a distanza, e lo stoccaggio di una grande quantità di dati; favorisce una rappresentazione sistematica dei fenomeni linguistici ed assicura la coerenza e la completezza dell'informazione codificata. Inoltre, consente di operare costanti controlli di coerenza formale dell'informazione e di effettuare delle modifiche o delle correzioni sull'insieme dei dati. La strutturazione multidimensionale dei concetti del dominio di conoscenza e la rappresentazione semantica altamente strutturata, ricca, diversificata ed espressiva ne fanno uno strumento di ricerca lessicale particolarmente performante. Varie tipologie di ricerca possono essere effettuate molto semplicemente e rapidamente su qualsiasi dato memorizzato, sia esso una relazione, un tratto o una unità semantica e questi dati possono essere interrogati sia singolarmente sia in combinazione. Si ha quindi la possibilità di estrarre gruppi di unità lessicali che condividono una data proprietà o una combinazione di proprietà, secondo dei criteri stabiliti dallo studioso in relazione alle esigenze della sua ricerca. A titolo di esempio, verranno presentate alcune delle numerose ricerche che possono essere effettuate sui dati lessicali all'interno della base di dati. A nostro parere, il thesaurus-lessico semantico della terminologia saussuriana costituisce, per i motivi esposti, uno strumento di grande utilità per gli esperti del settore. Inoltre, l'organizzazione strutturata della conoscenza lessicale, favorendo la ricerca semantica, dovrebbe contribuire in modo significativo ad una più approfondita conoscenza del vocabolario del maestro e pertanto degli aspetti più complessi ed originali del suo pensiero.}, KEYWORDS = {Saussure, SIMPLE, lessici computazionali}, URL = {https://publications.cnr.it/doc/282583}, CONFERENCE_NAME = {XXXVII Convegno della Società Italiana di Glottologia, "Il lessico nella teoria e nella storia linguistica"}, CONFERENCE_PLACE = {Firenze}, CONFERENCE_DATE = {25-27/10/2012}, } @INPROCEEDINGS{PIPERIDIS_2014_INPROCEEDINGS_PPSRCHCDMG_288462, AUTHOR = {Piperidis, S. and Papageorgiou, H. and Spurk, C. and Rehm, G. and Choukri, K. and Hamon, O. and Calzolari, N. and Del Gratta, R. and Magnini, B. and Girardi, C.}, TITLE = {META-SHARE: One year after}, YEAR = {2014}, ABSTRACT = {This paper presents META-SHARE (www.meta-share.eu), an open language resource infrastructure, and its usage since its Europe-wide deployment in early 2013. META-SHARE is a network of repositories that store language resources (data, tools and processing services) documented with high-quality metadata, aggregated in central inventories allowing for uniform search and access. META-SHARE was developed by META-NET (www.meta-net.eu) and aims to serve as an important component of a language technology marketplace for researchers, developers, professionals and industrial players, catering for the full development cycle of language technology, from research through to innovative products and services. The observed usage in its initial steps, the steadily increasing number of network nodes, resources, users, queries, views and downloads are all encouraging and considered as supportive of the choices made so far. In tandem, take-up activities like direct linking and processing of datasets by language processing services as well as metadata transformation to RDF are expected to open new avenues for data and resources linking and boost the organic growth of the infrastructure while facilitating language technology deployment by much wider research communities and industrial sectors.}, KEYWORDS = {Infrastructures, language resources identification, language resources documentation, metadata, language resources sharing, language resources licensing}, PAGES = {1532-1538}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {LREC'14}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {May, 26-31}, BOOKTITLE = {Proceedings of the Ninth International Conference on Language Resources and Evaluation}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{PIRRELLI_2014_INPROCEEDINGS_PMF_290601, AUTHOR = {Pirrelli, V. and Marzi, C. and Ferro, M.}, TITLE = {Two-dimensional Wordlikeness Effects in Lexical Organisation}, YEAR = {2014}, ABSTRACT = {The main focus of research on wordlikeness has been on how serial processing strategies affect perception of similarity and, ultimately, the global network of associative relations among words in the mental lexicon. Comparatively little effort has been put so far, however, into an analysis of the reverse relationship: namely, how global organisation effects influence the speakers' perception of word similarity and of words' internal structure. In this paper, we explore the relationship between the two dimensions of wordlikeness (the "syntagmatic" and the "paradigmatic" one), to suggest that the same set of principles of memory organisation can account for both dimensions.}, KEYWORDS = {wordlikeness, lexical access, word processing, frequency, memory}, PAGES = {301-305}, URL = {http://clic.humnet.unipi.it/it/atti.html}, VOLUME = {1}, DOI = {10.12871/CLICIT2014158}, ISBN = {978-8-86741-472-7}, CONFERENCE_NAME = {First Italian Conference on Computational Linguistics CLiC-it 2014 \& Fourth International Workshop EVALITA 2014}, CONFERENCE_PLACE = {Pisa, Italy}, CONFERENCE_DATE = {9-11/12/2014}, BOOKTITLE = {The First Italian Conference on Computational Linguistics-Proceedings}, EDITOR = {Basili, R. and Lenci, A. and Magnini, B.}, } @INPROCEEDINGS{RHEM_2014_INPROCEEDINGS_RUCM_287035, AUTHOR = {Rhem, G. and Uzkoreit, H. and Calzolari, N. and Monachini, M.}, TITLE = {The Strategic Impact of META-NET on the Regional, National and International Level}, YEAR = {2014}, ABSTRACT = {This article provides an overview of the dissemination work carried out in META-NET from 2010 until early 2014; we describe its impact on the regional, national and international level, mainly with regard to politics and the situation of funding for LT topics. This paper documents the initiative's work throughout Europe in order to boost progress and innovation in our field.}, KEYWORDS = {LR National/International Projects, Infrastructural/Policy Issues, Multilinguality, Machine Translation}, PAGES = {1517-1524}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {9th International Conference on Language Resources and Evaluation, LREC 2014}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{SIMI_2014_INPROCEEDINGS_SBM_329779, AUTHOR = {Simi, M. and Bosco, C. and Montemagni, S.}, TITLE = {Less is More? Towards a Reduced Inventory of Categories for Training a Parser for the Italian Stanford Dependencies}, YEAR = {2014}, ABSTRACT = {Stanford Dependencies (SD) represent nowadays a de facto standard as far as dependency annotation is concerned. The goal of this paper is to explore pros and cons of different strategies for generating SD annotated Italian texts to enrich the existing Italian Stanford Dependency Treebank (ISDT). This is done by comparing the performance of a statistical parser (DeSR) trained on a simpler resource (the augmented version of the Merged Italian Dependency Treebank or MIDT+) and whose output was automatically converted to SD, with the results of the parser directly trained on ISDT. Experiments carried out to test reliability and effectiveness of the two strategies show that the performance of a parser trained on the reduced dependencies repertoire, whose output can be easily converted to SD, is slightly higher than the performance of a parser directly trained on ISDT. A non-negligible advantage of the first strategy for generating SD annotated texts is that semi-automatic extensions of the training resource are more easily and consistently carried out with respect to a reduced dependency tagset. Preliminary experiments carried out for generating the collapsed and propagated SD representation are also reported.}, KEYWORDS = {Italian Treebank, Harmonization and Merging of Resources, Stanford Dependencie s}, URL = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/818_Paper.pdf}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-8-4}, CONFERENCE_NAME = {Ninth International Conference on Language Resources and Evaluation (LREC'14)}, CONFERENCE_PLACE = {Reykjavik, Iceland}, CONFERENCE_DATE = {26-31 May 2014}, BOOKTITLE = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Loftsson, H. and Maegaard, B. and Mariani, J. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{ABRATE_2014_INPROCEEDINGS_ADGLMMPP_282564, AUTHOR = {Abrate, M. and Del Grosso, A. M. and Giovannetti, E. and Lo Duca, A. and Marchetti, A. and Mancini, L. and Pedretti, I. and Piccini, S.}, TITLE = {Il Progetto Clavius on the Web: tecnologie linguistico-semantiche al servizio del patrimonio documentale e degli archivi storici}, YEAR = {2014}, ABSTRACT = {L'obiettivo del progetto Clavius on the Web è quello di valorizzare una parte dei manoscritti conservati dall'Archivio storico della Pontificia Università Gregoriana (APUG). Il progetto prende in esame alcuni manoscritti relativi a Christophorus Clavius (1538-­1612), matematico e astronomo gesuita. I manoscritti sono digitalizzati, trascritti, tradotti e analizzati dal punto di vista linguistico, lessicale e semantico. La terminologia e le entità di dominio individuate nel testo sono strutturate in un lessico e una ontologia, e collegate a risorse già disponibili sulla Rete secondo i principi dei Linked Data. La loro visualizzazione sul Web è implementata mediante tecniche di Data Visualization: la prima si basa sulla corrispondenza tra testo, traduzione e immagine mentre la seconda ha lo scopo di mostrare i contenuti delle analisi, con particolare enfasi alla navigazione delle risorse lessicali e ontologiche prodotte durante le varie fasi.}, URL = {https://publications.cnr.it/doc/282564}, CONFERENCE_NAME = {AIUCD 2014-Terzo convegno annuale: La metodologia della ricerca umanistica nell'ecosistema digitale}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {18-19 Settembre 2014}, BOOKTITLE = {AIUCD2014-La metodologia della ricerca umanistica nell'ecosistema digitale-Abstracts 3rd annual conference, 18-19 settembre 2014}, EDITOR = {Rossi, F. and Tomasi, F.}, } @INPROCEEDINGS{BELEFFI_2014_INPROCEEDINGS_BS_319421, AUTHOR = {Beleffi, E. and Sassi, M.}, TITLE = {La sicurezza del paziente sui quotidiani in Italia: indagine preliminare sui termini e l'andamento degli eventi}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/319421}, CONFERENCE_NAME = {FORUM RISK MANAGEMENT IN SANITÀ 2014}, CONFERENCE_PLACE = {Arezzo}, CONFERENCE_DATE = {25-28 novembre 2014}, } @INPROCEEDINGS{BIAGIONI_2014_INPROCEEDINGS_BDGP_280391, AUTHOR = {Biagioni, S. and Deluca, R. and Giannini, S. and Pardelli, G.}, TITLE = {I sistemi informativi della Biblioteca dell'Area della Ricerca di Pisa}, YEAR = {2014}, ABSTRACT = {Description of the CNR Library, (Pisa, Italy) and its services.}, KEYWORDS = {Sistemi informativi per biblioteche, Servizi bibliotecari}, URL = {https://publications.cnr.it/doc/280391}, CONFERENCE_NAME = {Seminario rivolto agli alunni dell'Istituto Tecnico Economico "F. Carrara" di Lucca, organizzato dall'Istituto di Linguistica Computazionale "A. Zampolli" del CNR di Pisa}, CONFERENCE_PLACE = {Pisa, Area della Ricerca CNR}, CONFERENCE_DATE = {31 marzo 2014}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_B_288065, AUTHOR = {Boschetti, F.}, TITLE = {OCR: instruments linguistiques pour améliorer la précision de la reconnaissance optique des caractères dans le cas du grec ancien et de l'arabe}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/288065}, CONFERENCE_NAME = {Workshop International en Traitement Automatique de la Langue Arabe}, CONFERENCE_PLACE = {Fes, Marocco}, CONFERENCE_DATE = {8 maggio 2014}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_B_288071, AUTHOR = {Boschetti, F.}, TITLE = {L'edizione scientifica digitale del testo letterario e del testo epigrafico: convergenze e divergenze-Parte I}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/288071}, CONFERENCE_NAME = {Risorse digitali e strumenti collaborativi per le Scienze dell'Antichità}, CONFERENCE_PLACE = {Venezia}, CONFERENCE_DATE = {2-3 ottobre 2014}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_B_288073, AUTHOR = {Boschetti, F.}, TITLE = {Extracting Information Related To Writings From Traditional Paper Corpora}, YEAR = {2014}, URL = {http://bit.ly/11ps2tD}, CONFERENCE_NAME = {EAGLE 2014 International Conference}, CONFERENCE_PLACE = {Paris}, CONFERENCE_DATE = {29-30 settembre 2014}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BCD_288061, AUTHOR = {Boschetti, F. and Caruso, L. and Del Grosso, A. M.}, TITLE = {Euporia: Un'esperienza di Filologia Collaborativa a scuola, per promuovere il circolo virtuoso fra ricerca e didattica}, YEAR = {2014}, URL = {https://publications.cnr.it/doc/288061}, CONFERENCE_NAME = {Internet Festival}, CONFERENCE_PLACE = {Pisa}, CONFERENCE_DATE = {11 ottobre 2014}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BDMNP_288372, AUTHOR = {Boschetti, F. and Del Gratta, R. and M