Module:Km-translit
local export = {}
local gsub = mw.ustring.gsub
local len = mw.ustring.len
local match = mw.ustring.match
local sub = mw.ustring.sub
local cons_conv = {
["ក"] = { "k", "a" },
["ខ"] = { "kh", "a" },
["គ"] = { "k", "o" },
["ឃ"] = { "kh", "o" },
["ង"] = { "ng", "o" },
["ច"] = { "ch", "a" },
["ឆ"] = { "chh", "a" },
["ជ"] = { "ch", "o" },
["ឈ"] = { "chh", "o" },
["ញ"] = { "nh", "o" },
["ដ"] = { "d", "a" },
["ឋ"] = { "th", "a" },
["ឌ"] = { "d", "o" },
["ឍ"] = { "th", "o" },
["ណ"] = { "n", "a" },
["ត"] = { "t", "a" },
["ថ"] = { "th", "a" },
["ទ"] = { "t", "o" },
["ធ"] = { "th", "o" },
["ន"] = { "n", "o" },
["ប"] = { "b", "a" },
["ផ"] = { "ph", "a" },
["ព"] = { "p", "o" },
["ភ"] = { "ph", "o" },
["ម"] = { "m", "o" },
["យ"] = { "y", "o" },
["រ"] = { "r", "o" },
["ល"] = { "l", "o" },
["វ"] = { "v", "o" },
["ឝ"] = { "sh", "a" },
["ឞ"] = { "ss", "o" },
["ស"] = { "s", "a" },
["ហ"] = { "h", "a" },
["ឡ"] = { "l", "a" },
["អ"] = { "’", "a" },
[""] = { "", "" },
["ប៉"] = { "p", "a" },
}
local digraph = {
["ហ្គ"] = "g", ["ហ្ន"] = "n", ["ហ្ម"] = "m", ["ហ្ល"] = "l", ["ហ្វ"] = "f", ["ហ្ស"] = "z",
}
local indep_vowel = {
["ឣ"] = "’â", ["ឤ"] = "’a", ["ឥ"] = "’ĕ", ["ឦ"] = "’ei",
["ឧ"] = "’ŏ", ["ឨ"] = "’ŏk", ["ឩ"] = "’ŭ", ["ឪ"] = "’ŏu",
["ឫ"] = "rœ̆", ["ឬ"] = "rœ",
["ឭ"] = "lœ̆", ["ឮ"] = "lœ",
["ឯ"] = "’é", ["ឰ"] = "’ai", ["ឱ"] = "’aô", ["ឲ"] = "’aô", ["ឳ"] = "’âu",
}
local vowel_conv = {
[""] = { ["a"] = "â", ["o"] = "ô" },
["ា"] = { ["a"] = "a", ["o"] = "éa" },
["ិ"] = { ["a"] = "ĕ", ["o"] = "ĭ" },
["ី"] = { ["a"] = "ei", ["o"] = "i" },
["ឹ"] = { ["a"] = "œ̆", ["o"] = "œ̆" },
["ឺ"] = { ["a"] = "œ", ["o"] = "œ" },
["ុ"] = { ["a"] = "ŏ", ["o"] = "ŭ" },
["ូ"] = { ["a"] = "o", ["o"] = "u" },
["ួ"] = { ["a"] = "uŏ", ["o"] = "uŏ" },
["ើ"] = { ["a"] = "aeu", ["o"] = "eu" },
["ឿ"] = { ["a"] = "eua", ["o"] = "eua" },
["ៀ"] = { ["a"] = "iĕ", ["o"] = "iĕ" },
["េ"] = { ["a"] = "é", ["o"] = "é" },
["ែ"] = { ["a"] = "ê", ["o"] = "ê" },
["ៃ"] = { ["a"] = "ai", ["o"] = "ey" },
["ោ"] = { ["a"] = "aô", ["o"] = "oŭ" },
["ៅ"] = { ["a"] = "au", ["o"] = "ŏu" },
["ុំ"] = { ["a"] = "om", ["o"] = "ŭm" },
["ំ"] = { ["a"] = "âm", ["o"] = "um" },
["ាំ"] = { ["a"] = "ăm", ["o"] = "ŏâm" },
["ាំង"] = { ["a"] = "ăng", ["o"] = "eăng" },
["ះ"] = { ["a"] = "ăh", ["o"] = "eăh" },
["ុះ"] = { ["a"] = "ŏh", ["o"] = "uh" },
["េះ"] = { ["a"] = "éh", ["o"] = "éh" },
["ោះ"] = { ["a"] = "aŏh", ["o"] = "uŏh" },
["ឹះ"] = { ["a"] = "ĕh", ["o"] = "ĭh" },
["ៈ"] = { ["a"] = "a’", ["o"] = "éa’" },
["័"] = { ["a"] = '<span style="font-color:#DCDCDC">â</span>', ["o"] = '<span style="font-color:#DCDCDC">ô</span>' },
}
local char_type = {
["ក"] = "consonant", ["ខ"] = "consonant", ["គ"] = "consonant", ["ឃ"] = "consonant", ["ង"] = "consonant",
["ច"] = "consonant", ["ឆ"] = "consonant", ["ជ"] = "consonant", ["ឈ"] = "consonant", ["ញ"] = "consonant",
["ដ"] = "consonant", ["ឋ"] = "consonant", ["ឌ"] = "consonant", ["ឍ"] = "consonant", ["ណ"] = "consonant",
["ត"] = "consonant", ["ថ"] = "consonant", ["ទ"] = "consonant", ["ធ"] = "consonant", ["ន"] = "consonant",
["ប"] = "consonant", ["ផ"] = "consonant", ["ព"] = "consonant", ["ភ"] = "consonant", ["ម"] = "consonant",
["យ"] = "consonant", ["រ"] = "consonant", ["ល"] = "consonant", ["វ"] = "consonant", ["ឝ"] = "consonant",
["ឞ"] = "consonant", ["ស"] = "consonant", ["ហ"] = "consonant", ["ឡ"] = "consonant", ["អ"] = "consonant",
["ឣ"] = "indep_vowel", ["ឤ"] = "indep_vowel", ["ឥ"] = "indep_vowel", ["ឦ"] = "indep_vowel", ["ឧ"] = "indep_vowel",
["ឨ"] = "indep_vowel", ["ឩ"] = "indep_vowel", ["ឪ"] = "indep_vowel", ["ឫ"] = "indep_vowel", ["ឬ"] = "indep_vowel",
["ឭ"] = "indep_vowel", ["ឮ"] = "indep_vowel", ["ឯ"] = "indep_vowel", ["ឰ"] = "indep_vowel", ["ឱ"] = "indep_vowel",
["ឲ"] = "indep_vowel", ["ឳ"] = "indep_vowel",
["ា"] = "vowel_sign", ["ិ"] = "vowel_sign", ["ី"] = "vowel_sign", ["ឹ"] = "vowel_sign", ["ឺ"] = "vowel_sign",
["ុ"] = "vowel_sign", ["ូ"] = "vowel_sign", ["ួ"] = "vowel_sign", ["ើ"] = "vowel_sign", ["ឿ"] = "vowel_sign",
["ៀ"] = "vowel_sign", ["េ"] = "vowel_sign", ["ែ"] = "vowel_sign",
["ៃ"] = "terminating_vowel",
["ោ"] = "vowel_sign", ["ៅ"] = "vowel_sign",
["ំ"] = "terminating_vowel", ["ះ"] = "terminating_vowel", ["ៈ"] = "terminating_vowel",
["៉"] = "consonant_shift", ["៊"] = "consonant_shift",
["់"] = "terminating_sign",
["៌"] = "sign", ["៍"] = "sign", ["៎"] = "sign", ["៏"] = "sign", ["័"] = "sign", ["៑"] = "sign",
["្"] = "combining_sign",
["៓"] = "sign",
["។"] = "punctuation", ["៕"] = "punctuation",
["៖"] = "sign",
["ៗ"] = "punctuation", ["៘"] = "punctuation", ["៙"] = "punctuation", ["៚"] = "punctuation", ["៛"] = "punctuation",
["ៜ"] = "sign", ["៝"] = "sign",
[""] = "ZWS",
}
local sp_symbols = {
["០"] = "0", ["១"] = "1", ["២"] = "2", ["៣"] = "3", ["៤"] = "4",
["៥"] = "5", ["៦"] = "6", ["៧"] = "7", ["៨"] = "8", ["៩"] = "9",
["៰"] = "0", ["៱"] = "1", ["៲"] = "2", ["៳"] = "3", ["៴"] = "4",
["៵"] = "5", ["៶"] = "6", ["៷"] = "7", ["៸"] = "8", ["៹"] = "9",
}
function export.tr(text, lang, sc, debug_mode)
text = gsub(text, '[០-៹]', sp_symbols)
text = gsub(text, '(.)្(.្.)', '%1%2')
text = gsub(text, '([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]្[កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])', '%1%2')
text = gsub(text, '([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]្?[កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])', '%1%2')
text = gsub(text, '(.៍)', '%1')
for word in mw.ustring.gmatch(text, '[ក-៝]+') do
local original_text = word
local c, chartype, syl, curr_syl = {}, {}, {}, {}
local progress = 'none'
for i = 1, len(word) do
c[i] = sub(word, i, i)
chartype[i] = char_type[c[i]]
end
for i = 1, #c + 1 do
local next_types = {}
if i == #c + 1 or chartype[i] == 'ZWS' then
progress = 'none'
table.insert(syl, table.concat(curr_syl, ""))
curr_syl = {}
elseif progress == 'none' then
if chartype[i] == 'consonant' then
table.insert(curr_syl, c[i])
progress = 'initial'
else
table.insert(syl, c[i])
end
elseif progress == 'initial' then
if chartype[i] == 'combining_sign' then
table.insert(curr_syl, c[i])
progress = 'initial_combining'
elseif chartype[i] == 'sign' or chartype[i] == 'consonant_shift' then
table.insert(curr_syl, c[i])
elseif chartype[i] == 'vowel_sign' then
table.insert(curr_syl, c[i])
progress = 'vowel'
elseif chartype[i] == 'terminating_vowel' then
if c[i-1] .. c[i] .. (c[i+1] or '') == 'ាំង' and (i == #c - 1 or (i > #c + 1 and chartype[i+2] == 'consonant')) then
table.insert(curr_syl, c[i])
progress = 'vowel'
else
table.insert(curr_syl, c[i])
table.insert(syl, table.concat(curr_syl, ""))
curr_syl = {}
progress = 'none'
end
elseif chartype[i] == 'consonant' then
vowel_found = false
local j, skipped = i, 0
while not vowel_found do
if not chartype[j] or chartype[j] == 'punctuation' or chartype[j] == 'indep_vowel' or chartype[j] == 'terminating_sign' or chartype[j] == 'ZWS' then
skipped = 1
break
elseif chartype[j] == 'consonant' or chartype[j] == 'combining_sign' or (chartype[j] == 'sign' and c[j] ~= '័') then
table.insert(next_types, chartype[j])
else
vowel_found = true
end
j = j + 1
end
if skipped ~= 0 or match(table.concat(next_types, " "), 'consonant s?i?g?n? ?consonant') then
table.insert(curr_syl, c[i])
progress = 'coda'
else
table.insert(syl, table.concat(curr_syl, ""))
curr_syl = {c[i]}
progress = 'initial'
end
else
table.insert(syl, c[i])
progress = 'none'
end
elseif progress == 'initial_combining' then
if chartype[i] == 'consonant' then
table.insert(curr_syl, c[i])
progress = 'initial'
else
table.insert(syl, c[i])
progress = 'none'
end
elseif progress == 'vowel' then
if chartype[i] == 'vowel_sign' then
table.insert(curr_syl, c[i])
elseif chartype[i] == 'terminating_vowel' then
if c[i-1] .. c[i] .. (c[i+1] or '') == 'ាំង' and (i == #c - 1 or (i > #c + 1 and chartype[i+2] == 'consonant')) then
table.insert(curr_syl, c[i])
progress = 'vowel'
else
table.insert(curr_syl, c[i])
table.insert(syl, table.concat(curr_syl, ""))
curr_syl = {}
progress = 'none'
end
elseif chartype[i] == 'consonant' then
vowel_found = false
local j, skipped = i, 0
while not vowel_found do
if not chartype[j] or chartype[j] == 'punctuation' or chartype[j] == 'indep_vowel' or chartype[j] == 'terminating_sign' or chartype[j] == 'ZWS' then
skipped = 1
break
elseif chartype[j] == 'consonant' or chartype[j] == 'combining_sign' or (chartype[j] == 'sign' and c[j] ~= '័') then
table.insert(next_types, chartype[j])
else
vowel_found = true
end
j = j + 1
end
if skipped ~= 0 or match(table.concat(next_types, " "), 'consonant s?i?g?n? ?consonant') then
table.insert(curr_syl, c[i])
progress = 'coda'
else
table.insert(syl, table.concat(curr_syl, ""))
curr_syl = {c[i]}
progress = 'initial'
end
else
table.insert(syl, c[i])
progress = 'none'
end
elseif progress == 'coda' then
if chartype[i] == 'combining_sign' then
table.insert(curr_syl, c[i])
progress = 'coda_combining'
elseif chartype[i] == 'sign' or chartype[i] == 'terminating_sign' then
table.insert(curr_syl, c[i])
else
table.insert(syl, table.concat(curr_syl, ""))
curr_syl = {}
if chartype[i] == 'consonant' then
table.insert(curr_syl, c[i])
progress = 'initial'
else
table.insert(syl, c[i])
progress = 'none'
end
end
elseif progress == 'coda_combining' then
if chartype[i] == 'consonant' then
table.insert(curr_syl, c[i])
progress = 'coda'
else
table.insert(syl, table.concat(curr_syl, ""))
curr_syl = {}
progress = 'none'
end
end
end
for i = 1, #syl do
if match(syl[i], '៍') then
syl[i] = '<small><del>' .. gsub(syl[i], '.', function(consonant)
if cons_conv[consonant] then
return cons_conv[consonant][1]
end end) .. '</small></del>'
break
end
syl[i] = gsub(syl[i], '់$', '')
syl[i] = gsub(syl[i], '^([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])្?([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?)([៉៊]?)([ិីឹឺុូួើឿៀេែៃោៅា័]?[ំះៈ]?)([៉៊]?)([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?៉?)្?([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?)(៖?)$', function(initial_a, initial_b, cons_shifter_a, vowel, cons_shifter_b, coda_a, coda_b, optional_sign)
if cons_shifter_a .. cons_shifter_b .. vowel .. coda_a .. coda_b == '' and initial_b ~= '' and not match(syl[i], '្') then
coda_a = initial_b
initial_b = ''
end
base = initial_a
if initial_b ~= '' and not match(initial_b, '[ងញនមយរលវ]') then
base = initial_b
end
if vowel .. coda_a .. coda_b == 'ាំង' then
vowel, coda_a, coda_b = 'ាំង', '', ''
end
optional_sign = gsub(optional_sign, '៖', 'ː')
cons_shifter = cons_shifter_a .. cons_shifter_b
if cons_shifter == '' and cons_conv[base] then
vowel_class = cons_conv[base][2]
elseif cons_shifter == '៉' then
vowel_class = 'a'
elseif cons_shifter == '៊' then
vowel_class = 'o'
else
return initial_a .. initial_b .. cons_shifter .. vowel .. coda_a .. coda_b .. optional_sign
end
if digraph[initial_a .. '្' .. initial_b] and (digraph[coda_a .. '្' .. coda_b] or (cons_conv[coda_a] and cons_conv[coda_b])) and vowel_conv[vowel] then
return digraph[initial_a .. '្' .. initial_b] .. vowel_conv[vowel][vowel_class] .. (digraph[coda_a .. '្' .. coda_b] or cons_conv[coda_a][1] .. cons_conv[coda_b][1]) .. optional_sign
elseif cons_conv[initial_a] and cons_conv[initial_b] and vowel_conv[vowel] and cons_conv[coda_a] and cons_conv[coda_b] then
return cons_conv[initial_a][1] .. cons_conv[initial_b][1] .. vowel_conv[vowel][vowel_class] .. cons_conv[coda_a][1] .. cons_conv[coda_b][1] .. optional_sign
end end)
if syl[i] == 'ៗ' and i > 1 then
syl[i] = syl[i-1]
end
end
word = table.concat(syl, "")
text = gsub(text, original_text, word)
end
text = gsub(text, '.', indep_vowel)
text = gsub(text, '([^ ]*) ៗ', '%1 %1')
if match(text, '[ក-៹]') and not debug_mode then
return nil
else
return text
end
-- To do: other signs
end
return export
Content Disclaimer
Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.
- The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
- There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
- It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
- Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
- Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.