@ARTICLE{ALZETTA_2023_ARTICLE_ADMPV_488202, 
 AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Miaschi, A. and Prat, E. and Venturi, G.}, 
 TITLE = {Tell me how you write and I'll tell you what you read: a study on the writing style of book reviews}, 
 YEAR = {2023}, 
 ABSTRACT = {Purpose: The authors' goal is to investigate variations in the writing style of book reviews published on different social reading platforms and referring to books of different genres, which enables acquiring insights into communication strategies adopted by readers to share their reading experiences. Design/methodology/approach: The authors propose a corpus-based study focused on the analysis of A Good Review, a novel corpus of online book reviews written in Italian, posted on Amazon and Goodreads, and covering six literary fiction genres. The authors rely on stylometric analysis to explore the linguistic properties and lexicon of reviews and the authors conducted automatic classification experiments using multiple approaches and feature configurations to predict either the review's platform or the literary genre. Findings: The analysis of user-generated reviews demonstrates that language is a quite variable dimension across reading platforms, but not as much across book genres. The classification experiments revealed that features modelling the syntactic structure of the sentence are reliable proxies for discerning Amazon and Goodreads reviews, whereas lexical information showed a higher predictive role for automatically discriminating the genre. Originality/value: The high availability of cultural products makes information services necessary to help users navigate these resources and acquire information from unstructured data. This study contributes to a better understanding of the linguistic characteristics of user-generated book reviews, which can support the development of linguistically-informed recommendation services. Additionally, the authors release a novel corpus of online book reviews meant to support the reproducibility and advancements of the research.}, 
 KEYWORDS = {Stylometric analysis, Genre detection, Natural language processing, Book reviews}, 
 PAGES = {23}, 
 URL = {https://www.emerald.com/insight/content/doi/10.1108/JD-04-2023-0073/full/html}, 
 VOLUME = {79}, 
 DOI = {10.1108/JD-04-2023-0073}, 
 PUBLISHER = {Emerald (Bingley, Regno Unito)}, 
 ISSN = {0022-0418}, 
 JOURNAL = {Journal of documentation}, 
}

@ARTICLE{MIASCHI_2023_ARTICLE_MABDV_488203, 
 AUTHOR = {Miaschi, A. and Alzetta, C. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, 
 TITLE = {Testing the Effectiveness of the Diagnostic Probing Paradigm on Italian Treebanks}, 
 YEAR = {2023}, 
 ABSTRACT = {The outstanding performance recently reached by neural language models (NLMs) across many natural language processing (NLP) tasks has steered the debate towards understanding whether NLMs implicitly learn linguistic competence. Probes, i.e., supervised models trained using NLM representations to predict linguistic properties, are frequently adopted to investigate this issue. However, it is still questioned if probing classification tasks really enable such investigation or if they simply hint at surface patterns in the data. This work contributes to this debate by presenting an approach to assessing the effectiveness of a suite of probing tasks aimed at testing the linguistic knowledge implicitly encoded by one of the most prominent NLMs, BERT. To this aim, we compared the performance of probes when predicting gold and automatically altered values of a set of linguistic features. Our experiments were performed on Italian and were evaluated across BERT's layers and for sentences with different lengths. As a general result, we observed higher performance in the prediction of gold values, thus suggesting that the probing model is sensitive to the distortion of feature values. However, our experiments also showed that the length of a sentence is a highly influential factor that is able to confound the probing model's predictions.}, 
 KEYWORDS = {Neural language model, Probing tasks, Treebanks}, 
 PAGES = {19}, 
 URL = {https://www.mdpi.com/2078-2489/14/3/144}, 
 VOLUME = {14}, 
 DOI = {10.3390/info14030144}, 
 PUBLISHER = {MDPI (Basel, Svizzera)}, 
 ISSN = {2078-2489}, 
 JOURNAL = {Information (Basel)}, 
}

@ARTICLE{SALES_2023_ARTICLE_SATD_488204, 
 AUTHOR = {Sales, S. S. and Alzetta, C. and Tatay, C. M. and Dell'Orletta, F.}, 
 TITLE = {Analysing Deception in Witness Memory Though Linguistic Styles in Spontaneous Language}, 
 YEAR = {2023}, 
 ABSTRACT = {The act of lying and its detection have raised interest in many fields, from the legal system to our daily lives. Considering that testimonies are commonly based on linguistic parameters, natural language processing, a research field concerned with programming computers to process and analyse natural language texts or speech, is a topic of interest on this front. This study aimed to examine the linguistic styles of simulated deception and true testimonies collected with the aim of studying witness memory. Study participants were asked to act as a witness of a crime by retelling the story they had just read. Cognitive interviewing techniques were used to collect testimony under two conditions: truth and simulated deception. A sample of 48 participants volunteered to participate in the study. Analyses of the linguistic indicators and content were carried out. Specifically, we performed a comparison of testimonies of the same participant by condition to analyse the variation between (i) lexical and (ii) linguistic features and (iii) content and speech characteristics (disfluencies) depending on the narrative condition. Concerning lexical properties, adjectives were the most-varying grammatical category between truthful and deceptive testimonies. Furthermore, in the linguistic analysis, we observed that truthful testimonies were generally longer than deceptive ones in terms of the number of words and sentences and also characterised by more articulated sentence structures, and these differences were also statistically significant. Regarding the analysis of the content, cognitive criteria (details) and admitting lack of memory were more present in truthful statements. By providing an objective measure, these results are of interest in developing NLP tools for assessing the credibility of testimonies in forensics.}, 
 KEYWORDS = {Natural language processing, Simulated deception, Stylometric analysis}, 
 PAGES = {26}, 
 URL = {https://www.mdpi.com/2076-3425/13/2/317}, 
 VOLUME = {13}, 
 DOI = {10.3390/brainsci13020317}, 
 PUBLISHER = {Molecular Diversity Preservation International (Basel)}, 
 ISSN = {2076-3425}, 
 JOURNAL = {Brain sciences}, 
}

@INPROCEEDINGS{MIASCHI_2021_INPROCEEDINGS_MABDV_463833, 
 AUTHOR = {Miaschi, A. and Alzetta, C. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, 
 TITLE = {Probing tasks under pressure}, 
 YEAR = {2021}, 
 ABSTRACT = {Probing tasks are frequently used to evaluate whether the representations of Neural Language Models (NLMs) encode linguistic information. However, it is still questioned if probing classification tasks really enable such investigation or they simply hint for surface patterns in the data. We present a method to investigate this question by comparing the accuracies of a set of probing tasks on gold and automatically generated control datasets. Our results suggest that probing tasks can be used as reliable diagnostic methods to investigate the linguistic information encoded in NLMs representations.}, 
 KEYWORDS = {Neural Language Models, Linguistic probing, Treebanks}, 
 PAGES = {1-7}, 
 URL = {http://ceur-ws.org/Vol-3033/paper29.pdf}, 
 VOLUME = {3033}, 
 PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, 
 ISSN = {1613-0073}, 
 CONFERENCE_NAME = {8th Italian Conference on Computational Linguistics (CLIC-it 2021)}, 
 CONFERENCE_PLACE = {Milano}, 
 CONFERENCE_DATE = {29/06-01/07/2022}, 
 BOOKTITLE = {CEUR workshop proceedings}, 
}

@ARTICLE{ALZETTA_2020_ARTICLE_ADMV_463828, 
 AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, 
 TITLE = {Linguistically-driven Selection of Difficult-to-Parse Dependency Structures}, 
 YEAR = {2020}, 
 ABSTRACT = {The paper illustrates a novel methodology meeting a twofold goal, namely quantifying the reliability of automatically generated dependency relations without using gold data on the one hand, and identifying which are the linguistic constructions negatively affecting the parser performance on the other hand. These represent objectives typically investigated in different lines of research, with different methods and techniques. Our methodology, at the crossroads of these perspectives, allows not only to quantify the parsing reliability of individual dependency types but also to identify and weight the contextual properties making relation instances more or less difficult to parse. The proposed methodology was tested in two different and complementary experiments, aimed at assessing the degree of parsing difficulty across (a) different dependency relation types, and (b) different instances of the same relation. The results show that the proposed methodology is able to identify difficult-to-parse dependency relations without relying on gold data and by taking into account a variety of intertwined linguistic factors. These findings pave the way to novel applications of the methodology, both in the direction of defining new evaluation metrics based purely on automatically parsed data and towards the automatic creation of challenge sets.}, 
 KEYWORDS = {Linguistic Complexity, Syntactic Parsing, Evaluation metrics}, 
 PAGES = {37-60}, 
 URL = {https://journals.openedition.org/ijcol/719}, 
 VOLUME = {6}, 
 DOI = {10.4000/ijcol.719}, 
 PUBLISHER = {aAccademia University Press, Torino (Italia)}, 
 ISSN = {2499-4553}, 
 JOURNAL = {Italian Journal of Computational Linguistics}, 
}

@INPROCEEDINGS{ALZETTA_2020_INPROCEEDINGS_ADMOSV_444113, 
 AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Osenova, P. and Simov, K. and Venturi, G.}, 
 TITLE = {Quantitative linguistic investigations across universal dependencies treebanks}, 
 YEAR = {2020}, 
 ABSTRACT = {The paper illustrates a case study aimed at identifying cross-lingual quantitative trends in the distribution of dependency relations in treebanks for typologically different languages. Preliminary results show interesting differences rooted either in language-specific peculiarities or cross-lingual annotation inconsistencies, with a potential impact on different application scenarios.}, 
 KEYWORDS = {Universal Dependencies Treebanks, Cross-linguistic analysis, Typology}, 
 PAGES = {1-7}, 
 URL = {http://ceur-ws.org/Vol-2769/paper_59.pdf}, 
 VOLUME = {2769}, 
 PUBLISHER = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen (Aachen, Germania)}, 
 ISSN = {1613-0073}, 
 ISBN = {979-12-80136-28-2}, 
 CONFERENCE_NAME = {7th Italian Conference on Computational Linguistics (CLiC-it)}, 
 CONFERENCE_PLACE = {Online}, 
 CONFERENCE_DATE = {1-3/03/2021}, 
 BOOKTITLE = {CEUR workshop proceedings}, 
}

@INPROCEEDINGS{ALZETTA_2020_INPROCEEDINGS_AMDKT_442044, 
 AUTHOR = {Alzetta, C. and Miaschi, A. and Dell'Orletta, F. and Koceva, F. and Torre, I.}, 
 TITLE = {PRELEARN @ EVALITA 2020: Overview of the Prerequisite Relation Learning Task for Italian}, 
 YEAR = {2020}, 
 ABSTRACT = {The Prerequisite Relation Learning (PRELEARN) task is the EVALITA 2020 shared task on concept prerequisite learning, which consists of classifying prerequisite relations between pairs of concepts distinguishing between prerequisite pairs and non-prerequisite pairs. Four sub-tasks were defined: two of them define different types of features that participants are allowed to use when training their model, while the other two define the classification scenarios where the proposed models would be tested. In total, 14 runs were submitted by 3 teams comprising 9 total individual participants.}, 
 KEYWORDS = {nlp, prerequisite learning, shared task}, 
 URL = {http://ceur-ws.org/Vol-2765/paper164.pdf}, 
 CONFERENCE_NAME = {Seventh Evaluation Campaign of Natural Language Processing and Speech Tools for Italian (EVALITA)}, 
 CONFERENCE_DATE = {17/12/2020}, 
}

@INPROCEEDINGS{MIASCHI_2020_INPROCEEDINGS_MABDV_442040, 
 AUTHOR = {Miaschi, A. and Alzetta, C. and Brunato, D. and Dell'Orletta, F. and Venturi, G.}, 
 TITLE = {Is Neural Language Model Perplexity Related to Readability?}, 
 YEAR = {2020}, 
 ABSTRACT = {This paper explores the relationship between Neural Language Model (NLM) perplexity and sentence readability. Starting from the evidence that NLMs implicitly acquire sophisticated linguistic knowledge from a huge amount of training data, our goal is to investigate whether perplexity is affected by linguistic features used to automatically assess sentence readability and if there is a correlation between the two metrics. Our findings suggest that this correlation is actually quite weak and the two metrics are affected by different linguistic phenomena.}, 
 KEYWORDS = {nlp, neural language models, readability}, 
 URL = {http://ceur-ws.org/Vol-2769/paper_57.pdf}, 
 ISBN = {979-12-80136-28-2}, 
 CONFERENCE_NAME = {Seventh Italian Conference on Computational Linguistics}, 
 CONFERENCE_DATE = {01-03/03/2021}, 
}

@ARTICLE{ALZETTA_2019_ARTICLE_ADMV_423880, 
 AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, 
 TITLE = {INFERRING QUANTITATIVE TYPOLOGICAL TRENDS FROM MULTILINGUAL TREEBANKS. A CASE STUDY}, 
 YEAR = {2019}, 
 ABSTRACT = {In the past decades, linguistic typology went through a renewing phase that involved a significant change in the research questions and methods of the discipline, which is now interested in fine-grained features underlying language diversity. In this paper, we propose a novel approach to address the newly defined needs of linguistic typology by extracting qualitative and quantitative information about a wide range of features from multilingual annotated corpora based on Natural Language Processing methods and techniques. We tested our method in a case study focusing on word order variation in two widely investigated constructions, VERB-SUBJ(ect) and NOUN-ADJ(ective), with a specific view to structural and functional factors underlying the preference for one or the other order, both intra- and cross-linguistically, and their interaction. Preliminary experiments have been carried out aimed at acquiring typological evidence from a selection of linguistically annotated treebanks for three different languages, namely Italian, Spanish and English. Our results show the effectiveness of the method in letting similarities and differences also emerge from typologically close languages.}, 
 KEYWORDS = {language typology, multilingual annotated corpora, linguistic knowledge extraction and modelling, word order variation}, 
 PAGES = {209-242}, 
 URL = {https://www.rivisteweb.it/doi/10.1418/95391}, 
 VOLUME = {18}, 
 DOI = {10.1418/95391}, 
 PUBLISHER = {Il Mulino, Bologna (Italia)}, 
 ISSN = {1720-9331}, 
 JOURNAL = {Lingue e linguaggio}, 
}

@INPROCEEDINGS{ALZETTA_2019_INPROCEEDINGS_ADMV_423881, 
 AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, 
 TITLE = {Dissecting Treebanks to Uncover Typological Trends. A Multilingual Comparative Approach}, 
 YEAR = {2019}, 
 ABSTRACT = {Over the last years, linguistic typology started attracting the interest of the community working on cross- and multi-lingual NLP as a way to tackle the bottleneck deriving from the lack of annotated data for many languages. Typological information is mostly acquired from publicly accessible typological databases, manually constructed by linguists. As reported in Ponti et al. (2018), despite the abundant information contained in them for many languages, these resources suffer from two main shortcomings, i.e. their limited coverage and the discrete nature of features (only "the majority value rather than the full range of possible values and their corresponding frequencies" is reported). Corpus-based studies can help to automatically acquire quantitative typological evidence which might be exploited for polyglot NLP. Recently, the availability of corpora annotated following a cross-linguistically consistent annotation scheme such as the one developed in the Universal Dependencies project is prompting new comparative linguistic studies aimed to identify similarities as well as idiosyncrasies among typologically different languages (Nivre, 2015). The line of research described here is aimed at acquiring quantitative typological evidence from UD treebanks through a multilingual contrastive approach.}, 
 KEYWORDS = {Natural Language Processing, Linguistic Typology}, 
 PAGES = {1-3}, 
 URL = {https://typology-and-nlp.github.io/2019/assets/2019/papers/5.pdf}, 
 ISBN = {978-1-950737-29-1}, 
 CONFERENCE_NAME = {1st TyP-NLP: The Workshop on Typology for Polyglot NLP, ACL workshop}, 
 CONFERENCE_PLACE = {Firenze}, 
 CONFERENCE_DATE = {01/08/2019}, 
}

@INPROCEEDINGS{ALZETTA_2018_INPROCEEDINGS_ADMSV_391617, 
 AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Simi, M. and Venturi, G.}, 
 TITLE = {Assessing the Impact of Iterative Error Detection and Correction. A Case Study on the Italian Universal Dependency Treebank}, 
 YEAR = {2018}, 
 ABSTRACT = {Detection and correction of errors and inconsistencies in "gold treebanks" are becoming more and more central topics of corpus annotation. The paper illustrates a new incremental method for enhancing treebanks, with particular emphasis on the extension of error patterns across different textual genres and registers. Impact and role of corrections have been assessed in a dependency parsing experiment carried out with four different parsers, whose results are promising. For both evaluation datasets, the performance of parsers increases, in terms of the standard LAS and UAS measures and of a more focused measure taking into account only relations involved in error patterns, and at the level of individual dependencies.}, 
 KEYWORDS = {Error Detection, Universal Dependency Treebanks, Syntactic parsing}, 
 PAGES = {1-7}, 
 URL = {http://universaldependencies.org/udw18/PDFs/39_Paper.pdf}, 
 ISBN = {978-1-948087-84-1}, 
 CONFERENCE_NAME = {Universal Dependencies Workshop 2018 (UDW 2018)}, 
 CONFERENCE_PLACE = {Brussels}, 
 CONFERENCE_DATE = {01/11/2018}, 
}

@INPROCEEDINGS{ALZETTA_2018_INPROCEEDINGS_ADMV_382333, 
 AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, 
 TITLE = {Dangerous Relations in Dependency Treebanks}, 
 YEAR = {2018}, 
 ABSTRACT = {The paper illustrates an effective and innovative method for detecting erroneously annotated arcs in gold dependency treebanks based on an algorithm originally developed to measure the reliability of automatically produced dependency relations. The method permits to significantly restrict the error search space and, more importantly, to reliably identify patterns of systematic recurrent errors which represent dangerous evidence to a parser which tendentially will replicate them. Achieved results demonstrate effectiveness and reliability of the method.}, 
 KEYWORDS = {Dependency treebanks, Error Detection, Linguistic Annotation}, 
 PAGES = {201-210}, 
 URL = {http://aclweb.org/anthology/W/W17/W17-7624.pdf}, 
 ISBN = {978-80-88132-04-2}, 
 CONFERENCE_NAME = {16th International Workshop on Treebanks and Linguistic Theories}, 
 CONFERENCE_PLACE = {Praga}, 
 CONFERENCE_DATE = {23-24 gennaio 2018}, 
}

@INPROCEEDINGS{ALZETTA_2018_INPROCEEDINGS_ADMV_385342, 
 AUTHOR = {Alzetta, C. and Dell'Orletta, F. and Montemagni, S. and Venturi, G.}, 
 TITLE = {Universal Dependencies and Quantitative Typological Trends. A Case Study on Word Order}, 
 YEAR = {2018}, 
 ABSTRACT = {The paper presents a new methodology aimed at acquiring typological evidence from "gold" treebanks for different languages. In particular, it investigates whether and to what extent algorithms developed for assessing the plausibility of automatically produced syntactic annotations could contribute to shed light on key issues of the linguistic typological literature. It reports the first and promising results of a case study focusing on word order patterns carried out on three different languages (English, Italian and Spanish).}, 
 KEYWORDS = {Linguistic Knowledge Extraction, Dependency Treebanks, Linguistic Typology}, 
 PAGES = {4540-4549}, 
 URL = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/1109.pdf}, 
 PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, 
 ISBN = {979-10-95546-00-9}, 
 CONFERENCE_NAME = {Proceedings of the 11th Edition of the Language Resources and Evaluation Conference (LREC 2018)}, 
 CONFERENCE_PLACE = {Miyazaki (Japan)}, 
 CONFERENCE_DATE = {7-12 maggio 2018}, 
}