[{"id":1330,"id_source":481366,"institutes":["ILC"],"type":"conference_article","type_order":7,"title":"Challenges and Progress in Constructing Arabic Dialect Corpora and Linguistic tools: A Focus on Moroccan and Tunisian Dialects","year":2023,"authors":["Nahli, O.","Gugliotta, E.","Khlif, N.","Benotto, G."],"authors_source":"Nahli, Ouafae; Gugliotta, Elisa; Khlif, Nadia; Benotto, Giulia","authors_cnr_name":["NAHLI, OUAFAE","GUGLIOTTA, ELISA","Khlif, Nadia","BENOTTO, GIULIA"],"authors_cnr_id":["rp03551","rp16372","rp16691","rp06562"],"authors_cnr_institute":[],"abstract":"Given the lack of resources for Arabic dialects, the construction of corpora, lexical resources, and tools is a non-trivial challenge. The focus of the article is to describe our in-progress work to address these deficiencies. We start with Moroccan and Tunisian dialects to provide annotated corpora and corpus-based lexical resources. We also aim to extend an existing morphological engine with linguistic resources built \\emph{ad hoc} for each dialect. In addition, we develop an integrated component in the morphological engine to better address linguistic and sociolinguistic characteristics while preserving the integrity of dialectal texts","keywords":["Arabic dialects","Moroccan dialect","Tunisian dialect","corpora","lexical resources","Aramorph"],"pages":"293-298","url":"https:\/\/ieeexplore.ieee.org\/stamp\/stamp.jsp?tp=&arnumber=10410009","volume":"","doi":"10.1109\/cist56084.2023.10410009","editors":[],"editors_source":"","published":"2023 7th IEEE Congress on Information Science and Technology (CiSt)","publisher":"IEEE (USA)","issn":"","isbn":"978-1-6654-6133-7","conference_name":"7th IEEE Congress on Information Science and Technology (CiSt)","conference_place":"USA","conference_date":"","last_updated_cnr":"2025-02-25 17:01:49","last_updated_oai":"2025-02-25 17:01:49","last_updated_www":"0000-00-00 00:00:00"}]