Module:ar-translit
Documentation for this module may be created at Module:ar-translit/doc
local export = {}
local tt = {
-- consonants
["ب"]="b", ["ت"]="t", ["ث"]="ṯ", ["ج"]="j", ["ح"]="ḥ", ["خ"]="ḵ",
["د"]="d", ["ذ"]="ḏ", ["ر"]="r", ["ز"]="z", ["س"]="s", ["ش"]="š",
["ص"]="ṣ", ["ض"]="ḍ", ["ط"]="ṭ", ["ظ"]="ẓ", ["ع"]="ʿ", ["غ"]="ḡ",
["ف"]="f", ["ق"]="q", ["ك"]="k", ["ل"]="l", ["م"]="m", ["ن"]="n",
["ه"]="h",
-- control characters
["\226\128\140"]="-", -- ZWNJ (zero-width non-joiner)
-- ["\226\128\141"]="", -- ZWJ (zero-width joiner)
-- rare latters
["پ"]="p", ["چ"]="č", ["ڤ"]="v", ["گ"]="g", ["ڨ"]="g", ["ڧ"]="q",
-- semivowels or long vowels, alif, hamza, special letters
["\216\167"]="ā",
-- hamzated latters
["\216\163"]="ʾ", ["إ"]="ʾ", ["ؤ"]="ʾ", ["ئ"]="ʾ", ["ء"]="ʾ",
["و"]="ū", --"w" before and after vowels except for ḍámma (u)
["ي"]="ī", --"y" before and after vowels except for kásra (i)
["ى"]="ā",
["\216\162"]="ʾā",
["ٱ"]= "", -- hámzat-al-wáṣl
["\217\176"] = "ā", -- ʾálif xanjaríyya
-- short vowels, šádda and sukūn
["\217\142"]="a", -- fátḥa
["\217\144"]="i", -- kásra
["\217\143"]="u", -- ḍámma
["\217\146"]="", --sukūn - no vowel
-- ligatures
["ﻻ"]="lā",
["ﷲ"]="llāh",
-- tatwīl
["ـ"]="", -- taṭwīl, no sound
-- numerals
["١"]="1", ["٢"]="2", ["٣"]="3", ["٤"]="4", ["٥"]="5",
["٦"]="6", ["٧"]="7", ["٨"]="8", ["٩"]="9", ["٠"]="0",
-- punctuation (leave on separate lines)
["؟"]="?", -- question mark
["\216\140"]=",", -- comma
["؛"]=";" -- semicolon
}
-- tāʾ marbūṭa (special) - always after a fátḥa (a), silent at the end of an utterance, "t" in ʾiḍāfa or with pronounced tanwīn
-- tanwin nasb: often used with ʾálif (before ʾálif in formal writing, misspelled after ʾálif)
-- translit any words or phrases
function export.tr(text, lang, sc, showI3raab)
-- shadda-fatha gets replaced with fatha-shadda during NFC normalisation, which MediaWiki
-- does for all Unicode strings; however, it makes the transliteration process
-- inconvenient, so undo it.
text = mw.ustring.gsub(text, "\217\142\217\145", "\217\145\217\142")
text = mw.ustring.gsub(text, "\216\167([\217\142\217\143])", "\216\163%1") -- add hamza
text = mw.ustring.gsub(text, "\216\167\217\144", "\216\165\217\144")
text = mw.ustring.gsub(text, "\217\143\217\136\216\167", "ū") -- ignore alif jamīla
text = mw.ustring.gsub(text, "(.)\217\145", "%1%1") -- shadda
text = mw.ustring.gsub(text, "\217\136([\217\139\217\140\217\141\217\142\217\143\217\144\217\145\217\146])", "w%1") -- if it has diacritic marks then it's w, otherwise ū
text = mw.ustring.gsub(text, "\217\138([\217\139\217\140\217\141\217\142\217\143\217\144\217\145\217\146])", "y%1") -- if it has diacritic marks then it's y, otherwise ī
text = mw.ustring.gsub(text, "^[\216\167\217\177]\217\142?\217\132", "al-")
text = mw.ustring.gsub(text, "%s[\216\167\217\177]\217\142?\217\132", " al-")
if showI3raab then -- show ʾiʿrāb (desinential inflection) in transliteration
text = mw.ustring.gsub(text, ".", {
["\216\169"] = "t", ["\217\139"] = "an", ["\217\141"] = "in", ["\217\140"] = "un",
["\217\142"] = "a", ["\217\144"] = "i" , ["\217\143"] = "u"
})
else
text = mw.ustring.gsub(text, "\216\169", "(t)")
text = mw.ustring.gsub(text, "[\217\139\217\140\217\141]", "")
text = mw.ustring.gsub(text, "[\217\142\217\143\217\144]%s", " ")
text = mw.ustring.gsub(text, "[\217\142\217\143\217\144]$", "")
end
text = mw.ustring.gsub(text, ".", tt)
text = mw.ustring.gsub(text, "aā", "ā")
text = mw.ustring.gsub(text, "iī", "ī")
text = mw.ustring.gsub(text, "uū", "ū")
return text
end
return export