@INPROCEEDINGS{NAHLI_2023_INPROCEEDINGS_NGKB_481366, AUTHOR = {Nahli, O. and Gugliotta, E. and Khlif, N. and Benotto, G.}, TITLE = {Challenges and Progress in Constructing Arabic Dialect Corpora and Linguistic tools: A Focus on Moroccan and Tunisian Dialects}, YEAR = {2023}, ABSTRACT = {Given the lack of resources for Arabic dialects, the construction of corpora, lexical resources, and tools is a non-trivial challenge. The focus of the article is to describe our in-progress work to address these deficiencies. We start with Moroccan and Tunisian dialects to provide annotated corpora and corpus-based lexical resources. We also aim to extend an existing morphological engine with linguistic resources built \emph(ad hoc) for each dialect. In addition, we develop an integrated component in the morphological engine to better address linguistic and sociolinguistic characteristics while preserving the integrity of dialectal texts}, KEYWORDS = {Arabic dialects, Moroccan dialect, Tunisian dialect, corpora, lexical resources, Aramorph}, PAGES = {293-298}, URL = {https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=\&arnumber=10410009}, DOI = {10.1109/cist56084.2023.10410009}, PUBLISHER = {IEEE (USA)}, ISBN = {978-1-6654-6133-7}, CONFERENCE_NAME = {7th IEEE Congress on Information Science and Technology (CiSt)}, CONFERENCE_PLACE = {USA}, BOOKTITLE = {2023 7th IEEE Congress on Information Science and Technology (CiSt)}, }