@INPROCEEDINGS{BOSCHETTI_2022_INPROCEEDINGS_BBDDGNZ_472289, AUTHOR = {Boschetti, F. and Burgassi, C. and Del Gratta, R. and Del Grosso, A. M. and Guadagnini, E. and Nahli, O. and Zenzaro, S.}, TITLE = {Il Laboratorio di Filologia Collaborativa e Cooperativa (CoPhiLab) del CNR-ILC: dati, strumenti, servizi e infrastrutture}, YEAR = {2022}, ABSTRACT = {Questo contributo illustra le attività e le risorse del Laboratorio di Filologia Collaborativa e Cooperativa (CoPhiLab) dell'Istituto di Linguistica Computazionale "A. Zampolli" del Consiglio Nazionale delle Ricerche (CNR-ILC), con particolare attenzione all'uso delle infrastrutture di ricerca nazionali e internazionali.}, KEYWORDS = {Filologia Computazionale, Modelli Formali, Lingua Araba, Domain-Specific Languages, Ingegneria del Software}, PAGES = {45-50}, URL = {https://www.eventi.garr.it/it/conf22}, DOI = {10.26314/GARR-Conf22-proceedings}, PUBLISHER = {Associazione Consortium GARR (Roma, ITA)}, ISBN = {978-88-946629-1-7}, CONFERENCE_NAME = {CondiVisioni. La rete come strumento per costruire il futuro}, CONFERENCE_PLACE = {Palermo}, CONFERENCE_DATE = {18/05/2022-20/05/2022}, BOOKTITLE = {CONDIVISIONI La rete come strumento per costruire il futuro}, EDITOR = {Mieli, M. and Volpe, C.}, } @ARTICLE{DELGROSSO_2021_ARTICLE_DFMTN_458287, AUTHOR = {Del Grosso, A. M. and Fihri, D. F. and Mohajir, M. E. and Tonazzini, A. and Nahli, O.}, TITLE = {Challenges in the digital analysis of historical laminated manuscripts}, YEAR = {2021}, ABSTRACT = {In this paper, we analyze and discuss the characteristics of a system for the effective digital preservation and fruition of historical manuscripts degraded by the process of lamination. The most significant degradation caused by lamination is that the parchment or paper support loses its flatness, and usually presents ripples and warnings. This, together with the affixed translucent varnish, dramatically impair the digital acquisition process, so that light reflections in the more disparate directions affect the digital images. A digital system to contrast this irreversible and progressive degradation and to enable an effective access to the fragile asset should provide a number of functionalities: specialized digitization, able to avoid reflections as much as possible; image enhancement, devised to correct the residual degradations and enhance the text for an easier legibility; semi-automatic transcription of the virtually restored pages; and, finally, scholarly encoding and linguistic analysis, which should adapt existing tools to the specificity of the primary source (writing system and language). As a case study, we will make reference to the "Poem in Rajaz on medicine", written by Abubacer in the XII century, and conserved in the Al Quaraouiyine Library located in Fez, Morocco. The feasibility study for the realization of such a system is of general utility, in that it can provide guidelines for the digitization, the enhancement and the text encoding of the many laminated manuscripts conserved in other historical archives. On the other hand, from the cultural heritage point of view, the experimentation on the "Poem in Rajaz on medicine" could foster the systematic philological and ontological study of a unique piece of our documental heritage: the longest poem of medieval Islamic medical literature.}, KEYWORDS = {Cultural Heritage Digital Safeguard, Historical Manuscript Digitization, Document Image Processing, Linguistic Analysis, Ontological Analysis}, PAGES = {34-43}, URL = {https://innove.org/ijist/index.php/ijist/article/view/190}, VOLUME = {5}, DOI = {10.57675/IMIST.PRSM/ijist-v5i1.190}, PUBLISHER = {[El Mohajir Mohammed] ([S. l. ], Marocco)}, ISSN = {2550-5114}, JOURNAL = {International Journal of Information Science and Technology}, } @ARTICLE{NAHLI_2021_ARTICLE_ND_463930, AUTHOR = {Nahli, O. and Del Grosso, A. M.}, TITLE = {Structuring Arabic lexical and morphological resources using TEI: theory and practice}, YEAR = {2021}, ABSTRACT = {An Arabic word can be described according to its lexical and morphological information. The lexical information, conveyed by the root, consists of both semantic meaning and syntactic properties (e.g. parts of speech). The morphological information, encoded by patterns, is useful to group the words having similar syntactic, inflectional and semantic behaviour. Lexical analysis and morphological analysis have been separately described since the very first studies of the Arabic language. Although several scholarly works have illustrated Arabic lexicon models that encode semantic meanings, a systematic description of word patterns is still strongly lacking. In this work, we have implemented an exhaustive resource consisting of two levels: lexical and morphological. The lexical level collects information extracted from the dictionary al=q¯am¯us al=muh. ¯?t.. The morphological level describes pattern formalization, which allows to enrich word descriptions with additional semantic, morphosyntactic and inflectional information. To build our digital resource, taking into account primary source, lexical requirements, and reusability, we followed the guidelines provided by the Text Encoding Initiative (abbreviated as TEI). In particular, we adopted the TEI module for the encoding of digital dictionaries and lexicons to formally represent the medieval al=q¯am¯us al=muh. ¯?t. dictionary. Given the complexity of describing the morphological information present in the patterns, we also used the TEI module devoted to encoding feature structures. Consequently, we are building an exhaustive resource formed by the lexical and the morphological blocks. These two components are distinct but complementary resources where the lexical data are connected to morphological information. In addition, the morphological resource can be used as a stand-alone tool that allows the morphological analyzers to capture aspects of meaning that cannot be identified by current systems.}, KEYWORDS = {classical Arabic dictionary, digital lexicography, al=q'}, PAGES = {3-14}, URL = {https://innove.org/ijist/index.php/ijist/article/view/191}, VOLUME = {5}, PUBLISHER = {[El Mohajir Mohammed] ([S. l. ], Marocco)}, ISSN = {2550-5114}, JOURNAL = {International Journal of Information Science and Technology}, } @ARTICLE{NAHLI_2021_ARTICLE_NSBB_463923, AUTHOR = {Nahli, O. and Sanna, A. and Bandini, M. and Boschetti, F.}, TITLE = {Commerce Numérique: traffic signals for the crossroads between cultures}, YEAR = {2021}, ABSTRACT = {Commerce is a literary French journal founded by Princess Margherita Caetani, involving three prestigious collaborators: Paul Valéry, Léon-Paul Fargue, Valéry Larbaud. It is composed by 29 volumes published between 1924 and 1932. Each volume collects different literary material of various well-known and unknown writers as poems or novels, translating some of the most important authors like Joyce, T.S. Eliot, Pirandello, Ungaretti, Saint-John Perse, Rilke, Hofmannsthal. Considering the historical, literary, and cultural importance of the Commerce journal, our project "Commerce numérique" aims to digitize and to make the journal contents freely available online to both the general public and the research community. This article presents how the journal was encoded. Also, we give importance to the coding of poems present in Commerce. Indeed, some poems are original in another language and they are accompanied by their French translation. Other poems are a French-translated form without original text. In order to fully and accurately express the phenomena and their structures, we have adopted some aspects of the TEI framework, which we will explain in detail. Particular attention was paid to the French translation of a Moroccan Arabic poem from the 13th century. On the one hand, the original Arabic poetry is interesting because it presents some aspects of the Moroccan dialect and some aspects of the oral text. On the other hand, the study and the encoding of Arabic poetry in parallel to its translation highlight some important structural differences between Arabic poetry and Western poetry.}, KEYWORDS = {Commerce Journal, OCR, TEI encoding, literary journal, digital resources, Arabic poetry}, PAGES = {36-45}, URL = {https://innove.org/ijist/index.php/ijist/article/view/193}, VOLUME = {5}, PUBLISHER = {[El Mohajir Mohammed] ([S. l. ], Marocco)}, ISSN = {2550-5114}, JOURNAL = {International Journal of Information Science and Technology}, } @ARTICLE{KHALFI_2020_ARTICLE_KZN_438041, AUTHOR = {Khalfi, M. and Zarghili, A. and Nahli, O.}, TITLE = {A New Rich Lexical Resource For Classical Arabic}, YEAR = {2020}, ABSTRACT = {Currently, large lexical resources are getting a high potential relevance for information systems and need of Lexical resources in Natural Language Processing (NLP) fields is paramount. To contribute meet these needs, we build a lexical resource from the famous dictionary al=q?m?s al=mu???(AQAM). Using a rule based approach, we have designed a system that allows extracting morpho-syntactical, semantics and lexical information from the famous dictionary. So, we obtained a digitized and structured version of AQAM, enriched by morpho-syntactical and lexical explicit information. In addition, the obtained resource is enriched by English translations of lemma and accompanying senses using a bilingual English-Arabic dictionary. Then we present an overview of an experiment alignment of the section of the letter b?" on Princeton"s WordNet (PWN) and Suggested Upper Merged Ontology (SUMO). This experience turned out to be interesting because it revealed that mapping an Arabic lexical resource on an English resource shows commonality between the two languages, but it allows especially to emphasize the non-equivalences between them. All obtained resources are represented in XML format anddistributed under free license}, KEYWORDS = {Information Extraction Arabic Lexicon Al Qamus Al Muhit Machine-readable dictionary Arabic Lexical Resource}, PAGES = {3863-3884}, URL = {https://www.ijact.in/index.php/ijact/article/view/1196}, VOLUME = {Volume-IX, Issue-X}, PUBLISHER = {Research India Publications (New Delhi, India)}, ISSN = {2249-3123}, JOURNAL = {International journal of advanced computer science and technology}, } @INPROCEEDINGS{DELGROSSO_2020_INPROCEEDINGS_DFENT_439862, AUTHOR = {Del Grosso, A. M. and Fassi, F. D. and El Mohajir, M. and Nahli, O. and Tonazzini, A.}, TITLE = {Digital safeguard of laminated historical manuscripts: the treatise "Poem in Rajaz on medicine" as a case study}, YEAR = {2020}, ABSTRACT = {In this paper, we analyze and discuss the characteristics of a system for the effective digital preservation and fruition of historical manuscripts degraded by the process of lamination. As a case study, we will make reference to the "Poem in Rajaz on medicine", written by Abubacer in the XII century, and conserved in the Al Quaraouiyine Library located in Fez, Morocco. The conceived system should have at least four main functionalities: image acquisition (i.e. digitization), image enhancement, text encoding, and linguistic analysis. Based on the evaluation of the manuscript damages, the acquisition set up should be designed in such a way to be able to avoid reflections as much as possible. Suitable digital image processing techniques should also be devised to correct the residual degradations and enhance the text for an easier legibility. Finally, semi-automatic transcription, scholarly encoding and linguistic analysis, to be performed on the virtually restored pages, should adapt existing tools to the specificity of the primary source writing system and language. The feasibility study for the realization of such a system is of general utility, in that it can provide guidelines for the digitization, the enhancement and the text encoding of the many laminated manuscripts conserved in other historical archives. On the other hand, from the cultural heritage point of view, the experimentation on the "Poem in Rajaz on medicine" could foster the systematic philological and ontological study of a unique piece of our documental heritage: the longest poem of medieval Islamic medical literature.}, KEYWORDS = {Cultural Heritage, Digital Safeguard, Historical Manuscript Digitization, Document Image Processing, Linguistic Analysis, Ontological Analysis}, PAGES = {192-197}, URL = {https://ieeexplore.ieee.org/document/9357192}, DOI = {10.1109/CiSt49399.2021.9357192}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-7281-6646-9}, CONFERENCE_NAME = {CiSt'2020-6th IEEE Congress on Information Science \& Technology}, CONFERENCE_PLACE = {Agadir-Essaouira, Morocco}, CONFERENCE_DATE = {June 5-12, 2021}, } @INPROCEEDINGS{NAHLI_2020_INPROCEEDINGS_ND_439789, AUTHOR = {Nahli, O. and Del Grosso, A. M.}, TITLE = {Creating Arabic Lexical Resources in TEI; A Schema for Discontinuous Morphology Encoding}, YEAR = {2020}, ABSTRACT = {This article aims at formally grouping lexical and morphological information in order to obtain an electronic resource with respect to the Arabic language starting from the classical dictionary al=q?m?s al=mu???. This contribution examines practical aspects about the adoption of the guidelines provided by the Text Encoding Initiative (TEI) to encode the Arabic dictionary as a primary source. Moreover, the contribution points out a possible way to integrate semantic, morphological and syntactic information characterizing word patterns within the same TEI document. Specifically, the formalization of word patterns allows us to emphasize additional morphosyntactic regularities mainly concerning word distribution within sentences. Consequently, the obtained digital object represents both the medieval Arabic dictionary and a suitable resource that can be exploited for a number of Natural Language Processing tasks.}, KEYWORDS = {classical Arabic dictionary, digital lexicography, al=q?m?s al=mu???, word patterns, TEI}, PAGES = {9}, URL = {https://publications.cnr.it/doc/439789}, DOI = {10.1109/CiSt49399.2021.9357273}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-7281-6646-9}, CONFERENCE_NAME = {IEEE-CIST2020 DPWH}, CONFERENCE_PLACE = {Agadir-Essaouira, Morocco}, CONFERENCE_DATE = {5/06/2021-12/06/2021}, } @INPROCEEDINGS{SANNA_2020_INPROCEEDINGS_SCBN_439796, AUTHOR = {Sanna, A. and Cinerari, R. and Boschetti, F. and Nahli, O.}, TITLE = {Digitizing and Encoding a Multilingual Literary Review: Commerce Numerique}, YEAR = {2020}, ABSTRACT = {Commerce was an important literary review founded in Paris by Princess Margherita Caetani, Prince Roffredo Caetani's wife. Born in America, she was polyglot and maecenas. Between 1924 and 1932 she surrounded herself with three prestigious collaborators: Paul Valéry, Léon-Paul Fargue, Valéry Larbaud. The review promoted the translation of World and European literature in French, translating some of the most important authors like Joyce, T.S. Eliot, Pirandello, Ungaretti, Saint-John Perse, Rilke, Hofmannsthal. The aim of this project is to promote by digitizing the dissemination of the review, to develop studies and research concerning the Caetani family's cultural activities in Europe. All the volumes of the literary review Commerce have been scanned, acquired by OCR and encoded in TEI-XML. The cultural value of the operation is discussed and the work-flow to create the digital textual corpus is described in detail.}, KEYWORDS = {Review Commerce, OCR, TEI encoding, literary review, digital resources}, PAGES = {4}, URL = {https://publications.cnr.it/doc/439796}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-7281-6646-9}, CONFERENCE_NAME = {IEEE-CIST2020 DPWH}, CONFERENCE_PLACE = {Agadir-Essaouira, Morocco}, CONFERENCE_DATE = {5/06/2021-12/06/2021}, } @INPROCEEDINGS{DELGROSSO_2018_INPROCEEDINGS_DBGMN_390296, AUTHOR = {Del Grosso, A. M. and Bellandi, A. and Giovannetti, E. and Marchi, S. and Nahli, O.}, TITLE = {Scanning is Just the Beginning: Exploiting Text and Language Technologies to Enhance the Value of Historical Manuscripts}, YEAR = {2018}, ABSTRACT = {In this paper we present a digital process for the explicitation of the textual, linguistic and semantic content of historical manuscripts. The proposed workflow is composed of a sequence of incremental steps, each of which is described both on a methodological and practical perspective. The steps are: 1) visualization and structuring of metadata, 2) transcription, 3) structural encoding, 4) annotation, 5) lexical and conceptual structuring.}, KEYWORDS = {Computational Lexica, Digital Scholarly Editing, Digital Humanities, al-Qamus al-Muhit}, PAGES = {214-219}, URL = {https://publications.cnr.it/doc/390296}, DOI = {10.1109/CIST.2018.8596373}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {CIST 2018 WH-MNLP}, CONFERENCE_PLACE = {MARRAKECH, MOROCCO}, CONFERENCE_DATE = {21-27/10/2018}, BOOKTITLE = {Colloquium in Information Science and Technology, CIST}, EDITOR = {Al Achhab, M. and El Mohajir, M. and Jellouli, I. and El Mohajir, B. E.}, } @INPROCEEDINGS{FERRO_2018_INPROCEEDINGS_FCGMNCP_390504, AUTHOR = {Ferro, M. and Cappa, C. and Giulivi, S. and Marzi, C. and Nahli, O. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: Reading for Understanding}, YEAR = {2018}, ABSTRACT = {This paper focuses on motivation, objectives, design issues and preliminary results of ReadLet, an ICT platform for assessing reading efficiency in primary school children. Test data are discussed on a sample of 200 early graders, reading French, Italian and Standard Modern Arabic (SMA).}, KEYWORDS = {Reading, text comprehension, Specific Learning Disorders, multimodal signal processing, cloud computing, portable assistive technology}, PAGES = {404-409}, URL = {https://publications.cnr.it/doc/390504}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {IEEE-CIST2018 LED-ICT}, CONFERENCE_PLACE = {Marrakech, Morocco}, CONFERENCE_DATE = {21-27/10/2018}, } @INPROCEEDINGS{MARZI_2018_INPROCEEDINGS_MFNBBP_388016, AUTHOR = {Marzi, C. and Ferro, M. and Nahli, O. and Belik, P. and Bompolas, S. and Pirrelli, V.}, TITLE = {Evaluating Inflectional Complexity Crosslinguistically: a Processing Perspective}, YEAR = {2018}, ABSTRACT = {The paper provides a cognitively motivated method for evaluating the inflectional complexity of a language, based on a sample of "raw" inflected word forms processed and learned by a recurrent self-organising neural network with fixed parameter setting. Training items contain no information about either morphological content or structure. This makes the proposed method independent of both meta-linguistic issues (e.g. format and expressive power of descriptive rules, manual or automated segmentation of input forms, number of inflectional classes etc.) and language-specific typological aspects (e.g. word-based, stem-based or template-based morphology). Results are illustrated by contrasting Arabic, English, German, Greek, Italian and Spanish.}, KEYWORDS = {paradigm-based morphology, inflectional complexity, prediction-based processing, recurrent self-organising networks, Statistical And Machine Learning Methods, Language Modelling}, PAGES = {3860-3866}, URL = {http://www.lrec-conf.org/proceedings/lrec2018/summaries/745.html}, VOLUME = {2018}, PUBLISHER = {European language resources association (ELRA) (Paris, FRA)}, ISBN = {979-10-95546-00-9}, CONFERENCE_NAME = {Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, CONFERENCE_PLACE = {Miyazaki, Japan}, CONFERENCE_DATE = {7-12/05/2018}, BOOKTITLE = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, EDITOR = {Calzolari, N. and Choukri, K. and Cieri, C. and Declerck, T. and Goggi, S. and Hasida, K. and Isahara, H. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S. and Tokunaga, T.}, } @INPROCEEDINGS{NAHLI_2018_INPROCEEDINGS_N_390405, AUTHOR = {Nahli, O.}, TITLE = {Arabic Language Alignment with English Ontologies-Some Ontological Reflections}, YEAR = {2018}, ABSTRACT = {There have been several attempts to build lexico-conceptual resources by extension of the English WordNet, i.e. by means of translation of English synsets. However, the extension approach is arguable because it assumes that the target resource is isomorphic to English WordNet. Yet, some languages, such as English and Arabic, can be very different. The problem would be to know, first, whether they conceptualize reality in the same way; and if not, to identify different concepts types. The mapping of a lexical resource of a different language onto Princeton WordNet of English (PWN) answers these questions. The experiment, in this article, describes results obtained from mapping the Arabic dictionary, al=q?m?s al=mu???, onto English WordNet and SUMO (Standard Upper Merged Ontology), also developed for the English language.}, KEYWORDS = {Ontology, concept, Arabic, PWN, SUMO, al=q?m?s al=mu???}, PAGES = {7}, URL = {https://publications.cnr.it/doc/390405}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-5386-4385-3}, CONFERENCE_NAME = {CIST 2018 WH-MNLP}, CONFERENCE_PLACE = {MARRAKECH, MAROCCO}, CONFERENCE_DATE = {21-27/10/2018}, } @INPROCEEDINGS{CAPPA_2018_INPROCEEDINGS_CFGMNCP_396593, AUTHOR = {Cappa, C. and Ferro, M. and Giulivi, S. and Marzi, C. and Nahli, O. and Cardillo, F. A. and Pirrelli, V.}, TITLE = {ReadLet: piattaforma ICT per valutare l'efficienza di lettura}, YEAR = {2018}, ABSTRACT = {ReadLet è una piattaforma ICT pensata per valutare accuratamente l'efficienza di lettura nei bambini della scuola primaria. Combina tecnologia ICT portatile e cloud-computing con una serie di moduli software, specifici per modalità di somministrazione. Questi, implementati come servizi web, includono: i) valutazione dell'elaborazione del testo e della leggibilità; ii) valutazione della velocità di lettura (ad alta voce e silente) e delle sue fluttuazioni); iii) valutazione della correttezza della decodifica ad alta voce; iv) valutazione della comprensione del testo (in lettura silente e da ascolto). Un prototipo della tecnologia ReadLet è stato sperimentato su circa 200 alunni (8-11 anni), che variano per stato socio-economico, lingua (italiana, francese, araba) e area geografica (Italia, Svizzera, Marocco). L'utilizzo del tablet per la lettura è stato percepito dai bambini come un'esperienza coinvolgente e piacevole. Gli insegnanti hanno trovato lo strumento facile da utilizzare e in grado di fornire maggiori informazioni rispetto agli strumenti tradizionali.}, KEYWORDS = {leggere per capire, disturbi del linguaggio, screening}, URL = {https://www.airipa.it/congresso/pluginfile.php/2781/mod_resource/content/1/Programma%20Congresso%20AIRIPA_Arezzo_dettagliato-3.pdf}, CONFERENCE_NAME = {XXVII Congresso Nazionale AIRIPA}, CONFERENCE_PLACE = {Arezzo (Italy)}, CONFERENCE_DATE = {28-29/09/2018}, } @THESIS{NAHLI_2018_THESIS_N_390506, AUTHOR = {Nahli, O.}, TITLE = {Vers une ontologie de la culture arabo-musulmane}, YEAR = {2018}, ABSTRACT = {Le projet vise à décrire les méthodologies permettant de développer un réseau de connaissance pour la culture arabo-islamique sur la base d'un processus d'extractions automatiques de données à partir du lexique arabe al=qamus al=muHiT (qamus). Le choix de qamus est justifié par le fait qu'il a un statut d'autorité dans le monde arabe, au point que la parole qamus [océan] a supplanté la parole mungid 'dictionnaire'. Le projet prévoit divers étapes de travail et, avant tout, l'acquisition d'une version numérique de qamus. La mise au point d'algorithmes pour la codification partielle et automatique de la macrostructure lexicale et la conversion du lexique en format XML. D'autres algorithmes permettent l'identification de la microstructure lexicale et, l'annotation de chaque partie constituante de l'entrée lexicale, entre autres, le lemme, sa nature morphologique, ses définitions, etc. En utilisant deux dictionnaires bilingues arabe-anglais, un système de recherche permet de trouver, de manière automatique et quand c'est possible, la traduction de chaque lemme, ce qui permet de le lier au synset correspondant dans PWN et au concept de SUMO à qui il pourrait faire référence. Une autre étape serait l'analyse de divers échantillons de lemmes pour détecter la validité des résultats.}, KEYWORDS = {al qamus al muHiyT, ontologie, langue arabe, Wordnet, PWN, SUMO (The Suggested Upper Merged Ontology)}, PAGES = {317}, URL = {https://publications.cnr.it/doc/390506}, } @ARTICLE{MARZI_2017_ARTICLE_MFN_363116, AUTHOR = {Marzi, C. and Ferro, M. and Nahli, O.}, TITLE = {Arabic word processing and morphology induction through adaptive memory self-organisation strategies}, YEAR = {2017}, ABSTRACT = {Aim of the present study is to model the human mental lexicon, by focussing on storage and processing dynamics, as lexical organisation relies on the process of input recoding and adaptive strategies for long-term memory organisation. A fundamental issue in word processing is represented by the emergence of the morphological organisation level in the lexicon, based on paradigmatic relations between fully-stored word forms. Morphology induction can be defined as the task of perceiving and identifying morphological formatives within morphologically complex word forms, as a function of the dynamic interaction between lexical representations and distribution and degrees of regularity in lexical data. In the computational framework we propose here (TSOMs), based on Self-Organising Maps with Hebbian connections defined over a temporal layer, the identification/perception of surface morphological relations involves the alignment of recoded representations of morphologically-related input words. Facing a non-concatenative morphology such as the Arabic inflectional system prompts a reappraisal of morphology induction through adaptive organisation strategies, which affect both lexical representations and long-term storage. We will show how a strongly adaptive self-organisation during training is conducive to emergent relations between word forms, which are concurrently, redundantly and competitively stored in human mental lexicon, and to generalising knowledge of stored words to unknown forms.}, KEYWORDS = {Non-concatenative morphological structure, Lexical storage and access, Topological alignment, Synchronisation, Self-Organising Maps}, PAGES = {179-188}, URL = {http://www.sciencedirect.com/science/article/pii/S1319157816301148}, VOLUME = {29}, DOI = {10.1016/j.jksuci.2016.11.006}, PUBLISHER = {Elsevier (Amsterdam, Paesi Bassi)}, ISSN = {2213-1248}, JOURNAL = {Journal of King Saud University. Computer and information sciences (Online)}, } @INPROCEEDINGS{BENOTTO_2016_INPROCEEDINGS_BGN_364353, AUTHOR = {Benotto, G. and Giovannetti, E. and Nahli, O.}, TITLE = {An application of distributional semantics for the analysis of the Holy Quran}, YEAR = {2016}, ABSTRACT = {In this contribution we illustrate the methodology and the results of an experiment we conducted by applying Distributional Semantics Models to the analysis of the Holy Quran. Our aim was to gather information on the potential differences in meanings that the same words might take on when used in Modern Standard Arabic w.r.t. their usage in the Quran. To do so we used the Penn Arabic Treebank as a contrastive corpus.}, KEYWORDS = {Distributional Semantics, the Holy Quran, Classical Arabic, Modern Standard Arabic, Contrastive Linguistics}, PAGES = {374-379}, URL = {http://ieeexplore.ieee.org/document/7805074/}, DOI = {10.1109/CIST.2016.7805074}, ISBN = {978-1-5090-0751-6}, CONFERENCE_NAME = {4th (IEEE) International Colloquium on Information Science and Technology, CiSt 2016}, CONFERENCE_PLACE = {Tangier, Morocco}, CONFERENCE_DATE = {24-26/10/2016}, BOOKTITLE = {2016 4th IEEE International Colloquium on Information Science and Technology (CiSt)}, EDITOR = {El Mohajir, M. and Chahhou, M. and Al Achhab, M. and El Mohajir, B. E.}, } @INPROCEEDINGS{NAHLI_2016_INPROCEEDINGS_NFMKZK_355436, AUTHOR = {Nahli, O. and Frontini, F. and Monachini, M. and Khan, F. and Zarghili, A. and Khalfi, M.}, TITLE = {Al Qamus al Muhit, a Medieval Arabic Lexicon in LMF}, YEAR = {2016}, ABSTRACT = {This paper describes the conversion into LMF, a standard lexicographic digital format of 'al-q?m?s al-mu???, a Medieval Arabic lexicon. The lexicon is first described, then all the steps required for the conversion are illustrated. The work is will produce a useful lexicographic resource for Arabic NLP, but is also interesting per se, to study the implications of adapting the LMF model to the Arabic language. Some reflections are offered as to the status of roots with respect to previously suggested representations. In particular, roots are, in our opinion are to be not treated as lexical entries, but modeled as lexical metadata for classifying and identifying lexical entries. In this manner, each root connects all entries that are derived from it.}, KEYWORDS = {Arabic Lexicon, LMF, Al Qamus al Muhi}, PAGES = {943-950}, URL = {http://www.lrec-conf.org/proceedings/lrec2016/index.html}, PUBLISHER = {European Language Resources Association ELRA (Paris, FRA)}, ISBN = {978-2-9517408-9-1}, CONFERENCE_NAME = {Tenth International Conference on Language Resources and Evaluation (LREC 2016)}, CONFERENCE_PLACE = {Portoroz, Slovenia}, CONFERENCE_DATE = {23-28 may}, EDITOR = {Calzolari, N. and Choukri, K. and Declerck, T. and Goggi, S. and Grobelnik, M. and Maegaard, B. and Mariani, J. and Mazo, H. and Moreno, A. and Odijk, J. and Piperidis, S.}, } @INPROCEEDINGS{NAHLI_2016_INPROCEEDINGS_NBAT_363709, AUTHOR = {Nahli, O. and Boschetti, F. and Arrigoni, S. and Tessarolo, L.}, TITLE = {Il corpus di testi arabi in Memorata Poetis}, YEAR = {2016}, KEYWORDS = {letteratura araba, temi e motivi}, PAGES = {157-162}, URL = {http://www.himeros.eu/aiucd2016/c03.pdf}, CONFERENCE_NAME = {Quinto Convegno Annuale AIUCD. Edizioni digitali: rappresentazione, interoperabilità, analisi del testo e infrastrutture}, CONFERENCE_PLACE = {Ca' Dolfin, Venezia, Italia}, CONFERENCE_DATE = {7-9/9/2016}, BOOKTITLE = {AIUCD 2016-Book of Abstracts}, EDITOR = {Boschetti, F.}, } @MISC{NAHLI_2016_MISC_N_390724, AUTHOR = {Nahli, O.}, TITLE = {Corpus dei testi arabi in "Memorata Poestis"}, YEAR = {2016}, ABSTRACT = {Corpus epigrafico arabo, per motivi didattici e scientifici, i testi sono stati vocalizzati e tradotti in italiano.}, KEYWORDS = {Poesia, Epigrafi, Memorata Poetis, lingua araba}, URL = {http://www.memoratapoetis.it/public/}, } @INPROCEEDINGS{NAHLI_2015_INPROCEEDINGS_NM_342436, AUTHOR = {Nahli, O. and Marchi, S.}, TITLE = {Improved Written Arabic Word Parsing through Orthographic, Syntactic and Semantic constraints}, YEAR = {2015}, ABSTRACT = {The script-based and morphological characteristics of the Arabic language increase considerably the number of alternative analyses output by any morphological parser that does not use orthographic, syntactic and semantic constraints. In order to reduce time-wasting and error-prone proliferation of multiple outputs to be filtered in a post-processing phase, we have tried to optimize word processing by providing the morphological parser with multiple levels of information. We have operated at three such levels: orthography, morpho-syntax and semantics.}, KEYWORDS = {Arabic Language, Arabic NLP, Orthography, Morpho-syntax, Semantics}, PAGES = {210-214}, URL = {http://www.aaccademia.it/elenco-libri?aaref=CLIC_2015}, PUBLISHER = {Accademia University Press (Torino, ITA)}, ISBN = {9788899200626}, CONFERENCE_NAME = {Second Italian Conference on Computational Linguistics CLiC-it 2015}, CONFERENCE_PLACE = {Trento}, CONFERENCE_DATE = {3-4 Dicembre 2015}, } @INPROCEEDINGS{PIRRELLI_2015_INPROCEEDINGS_PNBDM_333414, AUTHOR = {Pirrelli, V. and Nahli, O. and Boschetti, F. and Del Gratta, R. and Marzi, C.}, TITLE = {Computational Linguistics and Language Physiology: Insights from Arabic NLP and Cooperative Editing}, YEAR = {2015}, ABSTRACT = {Computer processing of written Arabic raises a number of challenges to traditional parsing architectures on many levels of linguistic analysis. In this contribution, we review some of these core issues and the demands they make, to suggest different strategies to successfully tackle them. In the end, we assess these issues in connection with the behaviour of neuro-biologically inspired lexical architectures known as Temporal Self-Organising Maps. We show that, far from being language-specific problems, issues in Arabic processing can shed light on some fundamental characteristics of the human language processor, such as structure-based lexical recoding, concurrent, competitive activation of output candidates and dynamic selection of optimal solutions.}, KEYWORDS = {Non-concatenative morphology, Optical Character Recognition, WordNet, Temporal Self-organising Maps, Mental Lexicon, Language neuro-physiology}, PAGES = {1-8}, URL = {http://dl.acm.org/citation.cfm?id=2802612}, DOI = {10.1145/2802612.2802637}, ISBN = {978-1-4503-3295-8}, CONFERENCE_NAME = {Third AIUCD Annual Conference-Humanities and Their Methods in the Digital Ecosystem}, CONFERENCE_PLACE = {Bologna (IT)}, CONFERENCE_DATE = {18-19/09/2014}, BOOKTITLE = {Third AIUCD Annual Conference-Humanities and Their Methods in the Digital Ecosystem}, EDITOR = {Tomasi, F. and Del Turco, R. R. and Tammaro, A. M.}, } @INPROCEEDINGS{BOSCHETTI_2015_INPROCEEDINGS_BDDMDN_295474, AUTHOR = {Boschetti, F. and Del Gratta, R. and Del Grosso, A. and Monachini, M. and Diakoff, H. and Nahli, O.}, TITLE = {Collaborative Philology on the way to Web Services: the case of CoPhiWordnet}, YEAR = {2015}, ABSTRACT = {Starting from previous initiatives of the CoPhiLab, we show how they can be reinterpreted as Web Services, especially when they become part of a wider scenario: Web Services are used to make connections between lexicons, semantic resources and a fine grained text management. Linked Open Data is chosen to be the paradigm used to link the dierent resources, but also as the modality of data presentation.}, KEYWORDS = {Collaborative Philology, Web Services, Linked Open Data, Text Services, Text Interpretation}, URL = {http://langrid.org/wlsi2015/program.html}, CONFERENCE_NAME = {The Second International Workshop on Worldwide Language Service Infrastructure, WLSI 2015}, CONFERENCE_PLACE = {Kyoto}, CONFERENCE_DATE = {22-23rd January 2015}, } @MISC{DANCONA_2015_MISC_DBNFCBDM_390659, AUTHOR = {D'Ancona, C. and Bozzi, A. and Nahli, O. and Farina, M. and Coda, E. and Boschetti, F. and Del Grosso, A. M. and Marchi, S.}, TITLE = {Banca dati testuale Greek into Arabic}, YEAR = {2015}, ABSTRACT = {Banca dati testuale con la codifica XML della pericopatura dei testi Greco-Arabo di alcuni trattati delle Enneadi di Plotino.}, KEYWORDS = {Digital Humanities, Computational Philology, Greek into Arabic, http: //g2a. ilc. cnr. it}, URL = {http://g2a.ilc.cnr.it/}, } @MISC{NAHLI_2015_MISC_N_390722, AUTHOR = {Nahli, O.}, TITLE = {Banca dati dell'analisi morfo-sintattica del testo "Aflūṭīn ʻinda al-ʻArab", ʻAbd al-Raḥmān Badawī, Cairo 1955, 1966}, YEAR = {2015}, ABSTRACT = {Banca dati testuali con l'analisi morfo-sintattica del testo "Afl???n ?inda l-?Arab"; editore ?A. Badaw?, D?r al-Nah?at al-?arabiyya, Cairo 1966}, KEYWORDS = {analisi morfo-sintattica, Lingua araba, Greek Into Arabic}, URL = {http://g2a.ilc.cnr.it:8080/Teologia_Wapp/Home.xhtml?centerPage=teologia}, } @MISC{NAHLI_2015_MISC_N_390727, AUTHOR = {Nahli, O.}, TITLE = {Aggiornamenti banca dati del Motore morfologico Aramorph}, YEAR = {2015}, ABSTRACT = {AraMorph's components are essentially two: the rule engine for morphological analysis and a repository of linguistic resources mainly composed of three lexicons: i) the dictStems lexicon, which contains 38.600 lemmas; ii) the dictPrefixes lexicon, which consists of sequences of proclitics and inflectional prefixes; iii) the dictSuffixes lexicon, which consists of sequences of inflectional suffixes and enclitics. These lexica are accompanied by three compatibility tables used for checking combinations of A (proclitics+prefixes), B (stems) and C (suffixes+enclitics). To cut down on arabic parse overgeneration, one has to enforce further restrictions in compatibility tables, e.g. the verb's ability to accept nominative and accusative pronouns, and to select a rational subject. We then augmented verb entries with subcategorization information such as case assignment and the restriction on rational subjects. At the same time, it was necessary to update compatibility tables.}, KEYWORDS = {analisi morfo-sintattica, Lingua araba, Aramorph}, URL = {http://hdl.handle.net/20.500.11752/ILC-94}, } @INPROCEEDINGS{DELGRATTA_2014_INPROCEEDINGS_DN_318313, AUTHOR = {Del Gratta, R. and Nahli, O.}, TITLE = {Enhancing Arabic WordNet with the use on Princeton WordNet and a bilingual dictionary}, YEAR = {2014}, ABSTRACT = {This paper describes an heuristic-based approach to enhance existing WordNets with freely available bilingual resources. The approach has been applied to the Arabic WordNet using the AraMorph bilingual dictionary as bilingual resource, but its guidelines are quite general to be effectively applied to other languages. The English words extracted from the bilingual resource are checked against Princeton WordNet in order to quantify their coverage and to select only those words which share the same set of synsets. This strongly reduces the number of Arabic words of the pairs. These latter are then checked against the Arabic WordNet to make new words emerge and -possibly- add new synonyms.}, KEYWORDS = {WordNet, Arabic, English, Bilingual Resource, Enhancement}, PAGES = {278-284}, URL = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=\&arnumber=7016632}, DOI = {10.1109/CIST.2014.7016632}, PUBLISHER = {IEEE Communications Society (Piscataway, USA)}, ISBN = {978-1-4799-5978-5}, CONFERENCE_NAME = {ANLP IEEE CIST14}, CONFERENCE_PLACE = {Tetuan, Morocco}, CONFERENCE_DATE = {20-22/10/ 2014}, BOOKTITLE = {3rd International IEEE Colloquium on Information Science and Technology; From 20th to 22nd of October 2014 Tetuan-Chefchaouen Morocco}, } @INPROCEEDINGS{DELGROSSO_2014_INPROCEEDINGS_DN_295187, AUTHOR = {Del Grosso, A. M. and Nahli, O.}, TITLE = {Towards a flexible open-source software library for multi-layered scholarly textual studies: An Arabic case study dealing with semi-automatic language processing}, YEAR = {2014}, ABSTRACT = {This paper presents both the general model and a case study of the Computational and Collaborative Philology Library (CoPhiLib), an ongoing initiative underway at the Institute for Computational Linguistics (ILC) of the National Research Council (CNR), Pisa, Italy. The library, designed and organized as a reusable, abstract and open-source software component, aims at solving the needs of multi-lingual and cross-lingual analysis by exposing common Application Programming Interfaces (APIs). The core modules, coded by the Java programming language, constitute the groundwork of a Web platform designed to deal with textual scholarly needs. The Web application, implemented according to the Java Enterprise specifications, focuses on multi-layered analysis for the study of literary documents and related multimedia sources. This ambitious challenge seeks to obtain the management of textual resources, on the one hand by abstracting from current language, on the other hand by decoupling from the specific requirements of single projects. This goal is achieved thanks to methodologies declared by the "agile process", and by putting into effect suitable use case modeling, design patterns, and component-based architectures. The reusability and flexibility of the system have been tested on an Arabic case study: the system allows users to choose the morphological engine (such as AraMorph or Al-Khalil), along with linguistic granularity (i.e. with or without declension). Finally, the application enables the construction of annotated resources for further statistical engines (training set).}, KEYWORDS = {Design, Information Engineering, Design Patterns, Text Processing, Arabic Natural Language Processing}, PAGES = {285-290}, URL = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?tp=\&arnumber=7016633\&queryText%3Ddel+grosso+philology}, DOI = {10.1109/CIST.2014.7016633}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-4799-5978-5}, CONFERENCE_NAME = {Third IEEE International Colloquium in Information Science and Technology (CIST)}, CONFERENCE_PLACE = {Tetuan, Morocco}, CONFERENCE_DATE = {20-22/10/2014}, BOOKTITLE = {IEEE Cinference Publications-Catalog Number: CFP1467R-ART}, EDITOR = {El Mohajir, M. and Al Achhab, M. and Chahhou, M. and Mounir, A. and El Mohajir, B. and Pirrelli, V. and Zarghili, A. and Elfar, M.}, } @INPROCEEDINGS{MARZI_2014_INPROCEEDINGS_MNF_295178, AUTHOR = {Marzi, C. and Nahli, O. and Ferro, M.}, TITLE = {Word Processing for Arabic Language: A reappraisal of morphology induction through adaptive memory self-organisation strategies}, YEAR = {2014}, ABSTRACT = {Modelling the mental lexicon focuses on processing and storage dynamics, since lexical organisation relies on the process of input recoding and adaptive strategies for long-term memory organisation. A fundamental issue in word processing is represented by the emergence of the morphological organisation level in the lexicon, based on paradigmatic relations between fully-stored word forms. Morphology induction can be defined as the task of identifying morphological formatives within morphologically complex word forms. In the computational framework we propose here (TSOMs), based on Self-Organising Maps with Hebbian connections defined over a temporal layer, the identification/perception of surface morphological relations involves the alignment of recoded representations of morphologically-related input words. Facing a non-concatenative morphology such as the Arabic inflectional system prompts a reappraisal of morphology induction through adaptive organisation strategies, which affect both lexical representations and long-term storage. We will show how a strongly adaptive self-organisation during training is conducive to emergent relations between stored word forms, and to high accuracy rates in generalising knowledge of stored words to unknown forms.}, KEYWORDS = {Non-concatenative morphological structure, lexical storage and access, SOMs, word recoding and processing, adaptive strategies, morphology}, PAGES = {241-247}, URL = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=7016626\&punumber%3D6996097}, DOI = {10.1109/CIST.2014.7016626}, PUBLISHER = {IEEE (New York, USA)}, ISBN = {978-1-4799-5979-2}, CONFERENCE_NAME = {Third IEEE International Colloquium in Information Science and Technology (CIST)}, CONFERENCE_PLACE = {Tetuan (Morocco)}, CONFERENCE_DATE = {20-22/10/2014}, BOOKTITLE = {IEEE Conference Publications-Catalog Number: CFP1467R-ART}, EDITOR = {El Mohajir, M. and Al Achhab, M. and Chahhou, M. and Mounir, A. and El Mohajir, B. and Pirrelli, V. and Zarghili, A. and Elfar, M.}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BDMNP_288372, AUTHOR = {Boschetti, F. and Del Gratta, R. and Marzi, C. and Nahli, O. and Pirrelli, V.}, TITLE = {Modelli, metodi e strumenti per il trattamento automatico della lingua araba e per l'editing in ambienti collaborativi}, YEAR = {2014}, ABSTRACT = {La linguistica computazionale ha portato negli ultimi vent'anni a un profondo mutamento nello studio delle lingue e delle loro testimonianze scritte, spostando l'accento della ricerca da aspetti linguistico-formali all'uso linguistico in contesti comunicativi reali. Il presente contributo illustra l'impatto di questo cambio di prospettiva sullo studio della lingua araba, attraverso una rassegna di alcune attività di ricerca in corso presso l'Istituto di Linguistica Computazionale del CNR di Pisa: I. acquisizione dei testi arabi tramite Optical Character Recognition (OCR) e sviluppo di strumenti per la correzione manuale del testo in ambienti collaborativi; II. sviluppo di algoritmi e strumenti per l'analisi morfologica della lingua araba; III. analisi delle dinamiche di acquisizione del lessico arabo mediante architetture bio-computazionali; IV. sviluppo della WordNet dell'Arabo collegata a Princeton WordNet, ItalWordNet, LatinWordNet e alla nascente AncientGreek WordNet. Queste attivit( sono rivolte sia all'analisi delle caratteristiche linguistiche dell'arabo che allo studio della produzione letteraria araba e dei suoi rapporti storico-culturali con altre lingue. In particolare, il contributo intende illustrare la fertilità di un approccio metodologico che metta in relazione le dinamiche di acquisizione del lessico arabo, con la messa a punto di procedure di analisi ed edizione critica del testo e con i principi di organizzazione ontologica di una lingua ad alta produttività derivazionale.}, URL = {http://aiucd2014.unibo.it/book-of-abstracts.pdf}, CONFERENCE_NAME = {AIUCD 3rd annual conference}, CONFERENCE_PLACE = {Bologna}, CONFERENCE_DATE = {18-19 settembre 2014}, BOOKTITLE = {La metodologia della ricerca umanistica nell'ecosistema digitale-AIUCD 2014 Terzo convegno annuale}, EDITOR = {Rossi, F. and Tomasi, F.}, } @INPROCEEDINGS{BOSCHETTI_2014_INPROCEEDINGS_BDKLN_288058, AUTHOR = {Boschetti, F. and Del Grosso, A. M. and Khan, A. F. and Lamé, M. and Nahli, O.}, TITLE = {A top-down approach to the design of components for the philological domain}, YEAR = {2014}, ABSTRACT = {This paper focuses on the methodology applied to the development of components in the domain of collaborative philology in the Memorata Poetis Project. This initiative, led by the University of Venice, coordinates eight units sharing the same cyber-infrastructure and is co-funded by the Italian Ministry of Instruction, University and Research (PRIN 2010/11). The project aims to study the multilingual intertextuality between epigraphic texts and literary epigrams, the transmission of themes, motives, etc. between different communicative situations (epigraphic versus literary) and different civilisations (Greek, Latin and Italian). As a control group, we analyse a corpus of epigraphic and literary texts in Arabic which do not belong to the same tradition as the others. The study of intertextuality affects both the reconstruction of the text (constitutio textus), by providing variants from the indirect tradition, and its interpretation (interpretatio), by widening the contexts in which the text has been reused.}, URL = {https://publications.cnr.it/doc/288058}, CONFERENCE_NAME = {DH2014}, CONFERENCE_PLACE = {Lausanne}, CONFERENCE_DATE = {8-12 july 2014}, BOOKTITLE = {Digital Humanities 2014-Book of Abstracts}, } @INPROCEEDINGS{DELGROSSO_2014_INPROCEEDINGS_DN_288053, AUTHOR = {Del Grosso, A. M. and Nahli, O.}, TITLE = {Towards a flexible open-source software library for multi-layered scholarly textual studies-An Arabic use-case dealing with semi-automatic language processing}, YEAR = {2014}, ABSTRACT = {This paper presents both the general model and a case study of the Computational and Collaborative Philology Library (CoPhiLib), an ongoing initiative underway at the Institute for Computational Linguistics (ILC) of the National Research Council (CNR), Pisa, Italy. The library, designed and organized as a reusable, abstract and open-source software component, aims at solving the needs of multi-lingual and cross- lingual analysis by exposing common Application Programming Interfaces (APIs). The core modules, coded by the Java programming language, constitute the groundwork of a Web platform designed to deal with textual scholarly needs. The Web application, implemented according to the Java Enterprise specifications, focuses on multi-layered analysis for the study of literary documents and related multimedia sources. This ambitious challenge seeks to obtain the management of textual resources, on the one hand by abstracting from current language, on the other hand by decoupling from the specific requirements of single projects. This goal is achieved thanks to methodologies declared by the "agile process", and by putting into effect suitable use case modeling, design patterns, and component- based architectures. The reusability and flexibility of the system have been tested on an Arabic case study: the system allows users to choose the morphological engine (such as AraMorph or Al- Khalil), along with linguistic granularity (i.e. with or without declension). Finally, the application enables the construction of annotated resources for further statistical engines (training set).}, KEYWORDS = {Computational and collaborative philology, API}, URL = {https://publications.cnr.it/doc/288053}, CONFERENCE_NAME = {ANLP IEEE CIST14}, CONFERENCE_PLACE = {Tetuan, Morocco}, CONFERENCE_DATE = {20-22/10/ 2014}, } @ARTICLE{NAHLI_2013_ARTICLE_N_288546, AUTHOR = {Nahli, O.}, TITLE = {Computational contributions for Arabic language processing Part I. The automatic morphologic analysis of Arabic texts}, YEAR = {2013}, ABSTRACT = {problems of ambiguity inherent to the Arabic language. Difficulties arose in the various stages of automatic processing of the Arabic version of Plotinus, the text which lies at the core of our project. Part I highlights the needs that led us to update the morphological engine AraMorph in order to optimize its morpho-syntactic analysis. Even if the engine has been optimized, a digital lexical source for better use of the system is still lacking. Part II presents a methodology exploiting the internal structure of the Arabic lexicographic encyclopaedia Lisan al-"arab, which allows automatic extraction of the roots and derived lemmas. The outcome of this work is a useful resource for morphological analysis of Arabic, either in its own right, or to enrich already existing resources}, KEYWORDS = {Morphological engine AraMorph, Morpho-syntactic analysis, Arabic language}, PAGES = {195-206}, URL = {http://www.greekintoarabic.eu/uploads/media/NAHLI_SGA_3-2013.pdf}, VOLUME = {3}, PUBLISHER = {Pacini Editore (Pisa, Italia)}, ISSN = {2281-2687}, JOURNAL = {Studia graeco-arabica}, } @ARTICLE{NAHLI_2013_ARTICLE_NG_282561, AUTHOR = {Nahli, O. and Giovannetti, E.}, TITLE = {Computational contributions for Arabic language processing-Part II. Lisan al-'arab as a source of lexical and morphological knowledge}, YEAR = {2013}, ABSTRACT = {The following sections illustrate a part of the study on the morphology of the Arabic language which is carried on within the framework of the ERC project Greek into Arabic. Philosophical Concepts and Linguistic Bridges ADG 249431. We used the Arabic lexicographic encyclopaedia Lisan al-'arab and, thanks to the regularity of its structure, we developed a system for the extraction of morphologically labelled word sequences, to be exploited for morphological analysis purposes.}, KEYWORDS = {Arabic morphology, Arabic morphological analysis, Arabic lexicography, Natural Language Processing}, PAGES = {207-210}, URL = {https://publications.cnr.it/doc/282561}, VOLUME = {3}, PUBLISHER = {CNR, Istituto di Linguistica Computazionale (Pisa, Italia)}, ISSN = {2239-012X}, JOURNAL = {Studia graeco-arabica}, } @TECHREPORT{BOZZI_2012_TECHREPORT_BGBNMPRD_390781, AUTHOR = {Bozzi, A. and Giovannetti, E. and Boschetti, F. and Nahli, O. and Marchi, S. and Piccini, S. and Ruimy, N. and Del Grosso, A. M.}, TITLE = {Greek into Arabic: contents, technologies and (humanistic and scientific) applications of a new software}, YEAR = {2012}, ABSTRACT = {This contribution aims to describe the methodological approach to Digital Philology by means of the G\&A Web Application. It also shows running examples for the: 1) Visualization and ordering of parallel texts subdivided in pericopes; 2) Linguistic annotations; 3) Scholarly comments; and 4)Search functions}, KEYWORDS = {digital philology, computational philology, software engineering, Greek into Arabic, Computational linguistics}, URL = {https://publications.cnr.it/doc/390781}, } @ARTICLE{NAHLI_2011_ARTICLE_N_288551, AUTHOR = {Nahli, O.}, TITLE = {Yaḥyā ibn ‘Adī sulla differenza fra la logica greca e la grammatica araba}, YEAR = {2011}, ABSTRACT = {?is paper examines the Treatise on the Difference between the two fields of philosophical logic and Arabic grammar (Maq?la f? taby?n al-fa?l bayna ?ina'atay al-man?iq al-falsaf? wa-l-na?w al-'arab?) by Ab? Zakariy?' Ya?y? ibn 'Ad?, providing also its Italian translation. It will appear that Ya?y? ibn 'Ad?'s approach is based on F?r?b?'s ideas about the relationship between logic and the sciences of language. Even more important is the fact that the difference established by Ya?y? ibn 'Ad?'s between logic and grammar both as for the subject (maw??') and as for scope (?arad) counts as the source for Avicenna's distinction between subject (maw??') and scope (?arad) of the metaphysics}, PAGES = {47-67}, URL = {http://www.greekintoarabic.eu/uploads/media/3wafae_utlimo.pdf}, VOLUME = {1}, PUBLISHER = {Pacini Editore (Pisa, Italia)}, ISSN = {2281-2687}, JOURNAL = {Studia graeco-arabica}, } @BOOK{NAHLI_2010_BOOK_N_288556, AUTHOR = {Nahli, O.}, TITLE = {lingua araba, il sistema verbale}, YEAR = {2010}, ABSTRACT = {Questo volume offre una descrizione del verbo arabo, in una prospettiva prevalentemente didattica seguendo la struttura di pensiero e le categorie linguistiche proprie della tradizione grammaticale araba. Emerge e si chiarisce così la logica che domina l'articolato sistema verbale arabo e che funge da cardine nell'organizzazione della sintassi e di buona parte della morfologia nominale. Per di più, al termine del libro, è presente un capitolo dedicato alla "frase araba", ovvero alla "frase verbale" e alla "frase nominale" ed ai loro rispettivi elementi costitutivi.}, PAGES = {140}, URL = {https://publications.cnr.it/doc/288556}, ISBN = {9788867413003}, } @ARTICLE{PICCHI_2003_ARTICLE_PSNC_64493, AUTHOR = {Picchi, E. and Sassolini, E. and Nahli, O. and Cucurullo, S.}, TITLE = {Risorse monolingui e multilingui. Corpus bilingue italiano-arabo}, YEAR = {2003}, ABSTRACT = {Abstract - The objective of the project is twofold: on the one hand, the creation and elaboration of software procedures for the Arabic language and, on the other hand, the creation of linguistic resources for the management of large Arabic corpora. The linguistic resources are substantially the following: a) Morphological engine for the Arabic language. The engine is constituted by a number of modules: the algorithms and modules for generation and analysis, an appropriate encoding system for the representation of lexical data and of morphological characteristics of Arabic, the so-called “lemmario”, i.e. the archive of lemmas; b) The automatic alignment of parallel texts in Italian and Arabic language; c) Automatic tagging of Arabic texts, performed by using the above morphological engine; d) Systems for accessing and querying (raw and/or tagged) Arabic texts and parallel Italian-Arabic corpora.}, KEYWORDS = {Morfologia araba, Corpora bilingui, Analisi testuale, Aligner, Tagger}, PAGES = {629-678}, URL = {https://publications.cnr.it/doc/64493}, VOLUME = {18-19}, PUBLISHER = {Istituti Editoriali e Poligrafici Internazionali (Ghezzano La Fontina, Italia)}, ISSN = {1824-1573}, JOURNAL = {Linguistica computazionale (Online)}, } @INPROCEEDINGS{PICCHI_2002_INPROCEEDINGS_PSNCV_288585, AUTHOR = {Picchi, E. and Sassolini, E. and Nahli, O. and Cucurullo, S. and Vargas, I. M.}, TITLE = {Italian Arabic Linguistic Tools}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/288585}, VOLUME = {Volume II}, CONFERENCE_NAME = {LREC 2002}, CONFERENCE_PLACE = {Las Palmas de Gran Canaria, Spain}, CONFERENCE_DATE = {30th \& 31 May 2002}, BOOKTITLE = {Third International Conference on Language Resources and Evaluation}, } @TECHREPORT{SASSOLINI_2002_TECHREPORT_SN_288591, AUTHOR = {Sassolini, E. and Nahli, O.}, TITLE = {Motore morfologico della lingua araba}, YEAR = {2002}, URL = {https://publications.cnr.it/doc/288591}, }