@ARTICLE{DINI_2026_ARTICLE_DDBD_580564, AUTHOR = {Dini, L. and Domenichelli, L. and Brunato, D. and Dell'Orletta, F.}, TITLE = {On the impact of pretraining data ordering in transformer encoder-and decoder-only language models}, YEAR = {2026}, ABSTRACT = {Pretraining large language models typically relies on randomly ordered corpora, implicitly assuming that data order has limited impact on learning. However, curriculum learning suggests that the sequence of training examples can influence optimization and representation dynamics. In this work, we systematically examine pretraining data ordering as an independent design variable for transformer-based language models, analyzing how curriculum-inspired strategies affect learning trajectories, representations, and transfer performance. We pretrain encoder-only and decoder-only models under controlled conditions, varying only the ordering of training data according to readability-based complexity proxies and their inverted variants, alongside multiple random baselines. Beyond final accuracy, we adopt a multi-dimensional evaluation framework combining intrinsic metrics, linguistic probing across training stages, downstream tasks, and geometric analyses of embedding spaces. Results indicate architecture-dependent tendencies in response to data ordering. Encoder models generally exhibit stronger sensitivity to curriculum strategies, with noticeable differences in optimization behavior, probing dynamics, and representation geometry. Decoder models appear comparatively more stable under forward curricula, with more pronounced effects emerging under inverted orderings. Probing analyses suggest that early improvements reflect differences in data exposure rather than accelerated linguistic acquisition, while later-stage effects selectively mirror properties emphasized by specific curricula. Geometric analyses show that data ordering reshapes global variance structure, often increasing anisotropy, without substantially altering nonlinear intrinsic dimensionality. Overall, data ordering functions as a selective inductive bias during pretraining, influencing learning dynamics and representational emphasis rather than consistently improving performance. These findings clarify how curriculum design interacts with transformer architectures and delineate its practical impact on pretraining outcomes}, KEYWORDS = {Curriculum learning, Data ordering, Language model pretraining, Linguistic representations, Representation geometry}, VOLUME = {342}, DOI = {10.1016/j}, JOURNAL = {KNOWLEDGE-BASED SYSTEMS}, } @ARTICLE{DINI_2025_ARTICLE_DMBD_570447, AUTHOR = {Dini, L. and Moroni, L. and Brunato, D. and Dell'Orletta, F.}, TITLE = {In the eyes of a language model: A comprehensive examination through eye-tracking data}, YEAR = {2025}, ABSTRACT = {Cognitive signals, particularly eye-tracking data, offer a unique lens for understanding human sentence processing. Leveraging eye-gaze data from the English and Italian section of the Multilingual Eye-Movement Corpus (MECO), we designed a series of experiments aiming at exploring whether pre-trained neural language models (NLMs) encode patterns representative of human reading behavior and if directly incorporating this information through a fine-tuning process influences the cognitive plausibility of the model. Additionally, we sought to determine if such an impact persists through a downstream task. Our findings reveal that transformers encode eye-gaze-related information during pretraining and that explicitly integrating eye-tracking features increases model alignment with human attention. When investigating the effect of intermediate fine-tuning on eye-tracking data on the model's performance on a downstream task, we observe that this intermediate step does not result in catastrophic forgetting, despite the very different nature of the considered downstream task. In addition, the attention mechanism of models undergoing intermediate fine-tuning remains closely aligned with human attention. In conclusion, our comprehensive evaluation of NLMs informed by human attention patterns offers great potential for advancing the growing field of eXplainable Artificial Intelligence (XAI). Grounding language models in real-world cognitive processes enables the creation of systems that not only replicate human language output but also align with the cognitive mechanisms behind reading and comprehension. This alignment with human behavior enhances model adaptability, interpretability, and effectiveness, fostering more human-centric, transparent, and reliable AI applications across various domains. 1}, KEYWORDS = {Cognitive plausibility, Eye-tracking, Interpretability, Neural attention, Neural Language Models}, URL = {https://iris.cnr.it/handle/20.500.14243/570447}, VOLUME = {650}, DOI = {10.1016/j.neucom.2025.130617}, ISSN = {0925-2312}, JOURNAL = {NEUROCOMPUTING}, } @INPROCEEDINGS{DINI_2025_INPROCEEDINGS_DBDC_570521, AUTHOR = {Dini, L. and Brunato, D. and Dell'Orletta, F. and Caselli, T.}, TITLE = {TEXT-CAKE: Challenging Language Models on Local Text Coherence}, YEAR = {2025}, ABSTRACT = {We present a deep investigation of encoder-based Language Models (LMs) on their abilities to detect text coherence across four languages and four text genres using a new evaluation benchmark, TEXT-CAKE. We analyze both multilingual and monolingual LMs with varying architectures and parameters in different finetuning settings. Our findings demonstrate that identifying subtle perturbations that disrupt local coherence is still a challenging task. Furthermore, our results underline the importance of using diverse text genres during pre-training and of an optimal pre-traning objective and large vocabulary size. When controlling for other parameters, deep LMs (i. e., higher number of layers) have an advantage over shallow ones, even when the total number of parameters is smaller}, KEYWORDS = {Large Language Models (LLMs), Text Coherence}, PAGES = {4384-4398}, URL = {https://iris.cnr.it/handle/20.500.14243/570521}, PUBLISHER = {Association for Computational Linguistics (ACL)}, CONFERENCE_NAME = {31st International Conference on Computational Linguistics, COLING 2025}, BOOKTITLE = {Proceedings-International Conference on Computational Linguistics, COLING}, } @INPROCEEDINGS{DINI_2025_INPROCEEDINGS_DDBD_570446, AUTHOR = {Dini, L. and Domenichelli, L. and Brunato, D. and Dell'Orletta, F.}, TITLE = {From Human Reading to NLM Understanding: Evaluating the Role of Eye-Tracking Data in Encoder-Based Models}, YEAR = {2025}, ABSTRACT = {Cognitive signals, particularly eye-tracking data, offer valuable insights into human language processing. Leveraging eye-gaze data from the Ghent Eye-Tracking Corpus, we conducted a series of experiments to examine how integrating knowledge of human reading behavior impacts Neural Language Models (NLMs) across multiple dimensions: task performance, attention mechanisms, and the geometry of their embedding space. We explored several fine-tuning methodologies to inject eyetracking features into the models. Our results reveal that incorporating these features does not degrade downstream task performance, enhances alignment between model attention and human attention patterns, and compresses the geometry of the embedding space}, KEYWORDS = {Large Language Models (LLMs), Eye-tracking, Interpretability}, PAGES = {17796-17813}, URL = {https://iris.cnr.it/handle/20.500.14243/570446}, VOLUME = {1}, DOI = {10.18653/v1/2025.acl-long.870}, PUBLISHER = {Association for Computational Linguistics (ACL)}, CONFERENCE_NAME = {63rd Annual Meeting of the Association for Computational Linguistics, ACL 2025}, BOOKTITLE = {Proceedings of the Annual Meeting of the Association for Computational Linguistics}, } @INPROCEEDINGS{DOMENICHELLI_2025_INPROCEEDINGS_DDBD_570463, AUTHOR = {Domenichelli, L. and Dini, L. and Brunato, D. and Dell'Orletta, F.}, TITLE = {The Role of Eye-Tracking Data in Encoder-Based Models: an In-depth Linguistic Analysis}, YEAR = {2025}, ABSTRACT = {This paper falls within ongoing research aimed at enhancing the human interpretability of neural language models by incorporating physiological data. Specifically, we leverage eye-tracking data collected during reading to explore how such information can guide model behavior. We train a multilingual encoder model to predict eye-tracking features from the Multilingual Eye-tracking Corpus (MECO) and analyze the resulting shifts in model attention patterns, focusing on how attention redistributes across linguistically informed categories such as part of speech, word position, word length, and distance from the syntactic head after fine-tuning. Moreover, we test how this attention shift impacts the representation of the interested words in the embedding space. The study covers both Italian and English, enabling a cross-linguistic perspective on attention and representation shifts in multilingual encoders grounded in human reading behavior}, KEYWORDS = {Eye-tracking, Neural Attention, Multilingual models, Embedding space, Interpretability}, URL = {https://iris.cnr.it/handle/20.500.14243/570463}, BOOKTITLE = {Proceedings of the Eleventh Italian Conference on Computational Linguistics (CLiC-it 2025), 24-26 September 2025, Cagliari, Italy}, } @INPROCEEDINGS{BRUNATO_2023_INPROCEEDINGS_BDDR_455142, AUTHOR = {Brunato, D. and Dell'Orletta, F. and Dini, I. and Ravelli, A. A.}, TITLE = {Coherent or Not? Stressing a Neural Language Model for Discourse Coherence in Multiple Languages}, YEAR = {2023}, ABSTRACT = {In this study, we investigate the capability of a Neural Language Model (NLM) to distinguish between coherent and incoherent text, where the latter has been artificially created to gradually undermine local coherence within text. While previous research on coherence assessment using NLMs has primarily focused on English, we extend our investigation to multiple languages. We employ a consistent evaluation framework to compare the performance of monolingual and multilingual models in both in-domain and out-domain settings. Additionally, we explore the model's performance in a cross-language scenario}, KEYWORDS = {text coherence, neural language models, multilingual corpora}, PAGES = {10690-10700}, URL = {https://aclanthology.org/2023.findings-acl.680}, DOI = {10.18653/v1/2023.findings-acl.680}, PUBLISHER = {Association for Computational Linguistics (Stroudsburg, USA)}, ISBN = {978-1-959429-62-3}, CONFERENCE_NAME = {61st Annual Meeting of the Association for Computational Linguistics (ACL 2023)}, CONFERENCE_PLACE = {Stroudsburg}, BOOKTITLE = {Findings of the Association for Computational Linguistics: ACL 2023}, }