Module:Km-translit

local export = {}
local gsub = mw.ustring.gsub
local len = mw.ustring.len
local match = mw.ustring.match
local sub = mw.ustring.sub

local cons_conv = {
	["ក"] = { "k", "a" }, 
	["ខ"] = { "kh", "a" }, 
	["គ"] = { "k", "o" }, 
	["ឃ"] = { "kh", "o" }, 
	["ង"] = { "ng", "o" }, 
	["ច"] = { "ch", "a" }, 
	["ឆ"] = { "chh", "a" }, 
	["ជ"] = { "ch", "o" }, 
	["ឈ"] = { "chh", "o" }, 
	["ញ"] = { "nh", "o" }, 
	["ដ"] = { "d", "a" }, 
	["ឋ"] = { "th", "a" }, 
	["ឌ"] = { "d", "o" }, 
	["ឍ"] = { "th", "o" }, 
	["ណ"] = { "n", "a" }, 
	["ត"] = { "t", "a" }, 
	["ថ"] = { "th", "a" }, 
	["ទ"] = { "t", "o" }, 
	["ធ"] = { "th", "o" }, 
	["ន"] = { "n", "o" }, 
	["ប"] = { "b", "a" }, 
	["ផ"] = { "ph", "a" }, 
	["ព"] = { "p", "o" }, 
	["ភ"] = { "ph", "o" }, 
	["ម"] = { "m", "o" }, 
	["យ"] = { "y", "o" }, 
	["រ"] = { "r", "o" }, 
	["ល"] = { "l", "o" }, 
	["វ"] = { "v", "o" }, 
	["ឝ"] = { "sh", "a" }, 
	["ឞ"] = { "ss", "o" }, 
	["ស"] = { "s", "a" }, 
	["ហ"] = { "h", "a" }, 
	["ឡ"] = { "l", "a" }, 
	["អ"] = { "’", "a" }, 
	[""] = { "", "" },
	
	["ប៉"] = { "p", "a" },
}

local digraph = {
	["ហ្គ"] = "g", ["ហ្ន"] = "n", ["ហ្ម"] = "m", ["ហ្ល"] = "l", ["ហ្វ"] = "f", ["ហ្ស"] = "z",
}

local indep_vowel = {
	["ឣ"] = "’â", ["ឤ"] = "’a", ["ឥ"] = "’ĕ", ["ឦ"] = "’ei",
	["ឧ"] = "’ŏ", ["ឨ"] = "’ŏk", ["ឩ"] = "’ŭ", ["ឪ"] = "’ŏu", 
	["ឫ"] = "rœ̆", ["ឬ"] = "rœ",
	["ឭ"] = "lœ̆", ["ឮ"] = "lœ",
	["ឯ"] = "’é", ["ឰ"] = "’ai", ["ឱ"] = "’aô", ["ឲ"] = "’aô", ["ឳ"] = "’âu",
}

local vowel_conv = {
	[""] = { ["a"] = "â", ["o"] = "ô" }, 
	["ា"] = { ["a"] = "a", ["o"] = "éa" }, 
	["ិ"] = { ["a"] = "ĕ", ["o"] = "ĭ" }, 
	["ី"] = { ["a"] = "ei", ["o"] = "i" }, 
	["ឹ"] = { ["a"] = "œ̆", ["o"] = "œ̆" }, 
	["ឺ"] = { ["a"] = "œ", ["o"] = "œ" }, 
	["ុ"] = { ["a"] = "ŏ", ["o"] = "ŭ" }, 
	["ូ"] = { ["a"] = "o", ["o"] = "u" }, 
	["ួ"] = { ["a"] = "uŏ", ["o"] = "uŏ" }, 
	["ើ"] = { ["a"] = "aeu", ["o"] = "eu" }, 
	["ឿ"] = { ["a"] = "eua", ["o"] = "eua" }, 
	["ៀ"] = { ["a"] = "iĕ", ["o"] = "iĕ" }, 
	["េ"] = { ["a"] = "é", ["o"] = "é" }, 
	["ែ"] = { ["a"] = "ê", ["o"] = "ê" }, 
	["ៃ"] = { ["a"] = "ai", ["o"] = "ey" }, 
	["ោ"] = { ["a"] = "aô", ["o"] = "oŭ" }, 
	["ៅ"] = { ["a"] = "au", ["o"] = "ŏu" }, 
	["ុំ"] = { ["a"] = "om", ["o"] = "ŭm" }, 
	["ំ"] = { ["a"] = "âm", ["o"] = "um" }, 
	["ាំ"] = { ["a"] = "ăm", ["o"] = "ŏâm" }, 
	["ាំង"] = { ["a"] = "ăng", ["o"] = "eăng" }, 
	["ះ"] = { ["a"] = "ăh", ["o"] = "eăh" }, 
	["ុះ"] = { ["a"] = "ŏh", ["o"] = "uh" }, 
	["េះ"] = { ["a"] = "éh", ["o"] = "éh" }, 
	["ោះ"] = { ["a"] = "aŏh", ["o"] = "uŏh" }, 
	["ឹះ"] = { ["a"] = "ĕh", ["o"] = "ĭh" },
	["ៈ"] = { ["a"] = "a’", ["o"] = "éa’" },
	["័"] = { ["a"] = '<span style="font-color:#DCDCDC">â</span>', ["o"] = '<span style="font-color:#DCDCDC">ô</span>' },
}

local char_type = {
	["ក"] = "consonant", ["ខ"] = "consonant", ["គ"] = "consonant", ["ឃ"] = "consonant", ["ង"] = "consonant", 
	["ច"] = "consonant", ["ឆ"] = "consonant", ["ជ"] = "consonant", ["ឈ"] = "consonant", ["ញ"] = "consonant", 
	["ដ"] = "consonant", ["ឋ"] = "consonant", ["ឌ"] = "consonant", ["ឍ"] = "consonant", ["ណ"] = "consonant", 
	["ត"] = "consonant", ["ថ"] = "consonant", ["ទ"] = "consonant", ["ធ"] = "consonant", ["ន"] = "consonant", 
	["ប"] = "consonant", ["ផ"] = "consonant", ["ព"] = "consonant", ["ភ"] = "consonant", ["ម"] = "consonant", 
	["យ"] = "consonant", ["រ"] = "consonant", ["ល"] = "consonant", ["វ"] = "consonant", ["ឝ"] = "consonant", 
	["ឞ"] = "consonant", ["ស"] = "consonant", ["ហ"] = "consonant", ["ឡ"] = "consonant", ["អ"] = "consonant", 
	["ឣ"] = "indep_vowel", ["ឤ"] = "indep_vowel", ["ឥ"] = "indep_vowel", ["ឦ"] = "indep_vowel", ["ឧ"] = "indep_vowel", 
	["ឨ"] = "indep_vowel", ["ឩ"] = "indep_vowel", ["ឪ"] = "indep_vowel", ["ឫ"] = "indep_vowel", ["ឬ"] = "indep_vowel", 
	["ឭ"] = "indep_vowel", ["ឮ"] = "indep_vowel", ["ឯ"] = "indep_vowel", ["ឰ"] = "indep_vowel", ["ឱ"] = "indep_vowel", 
	["ឲ"] = "indep_vowel", ["ឳ"] = "indep_vowel",
	["ា"] = "vowel_sign", ["ិ"] = "vowel_sign", ["ី"] = "vowel_sign", ["ឹ"] = "vowel_sign", ["ឺ"] = "vowel_sign", 
	["ុ"] = "vowel_sign", ["ូ"] = "vowel_sign", ["ួ"] = "vowel_sign", ["ើ"] = "vowel_sign", ["ឿ"] = "vowel_sign", 
	["ៀ"] = "vowel_sign", ["េ"] = "vowel_sign", ["ែ"] = "vowel_sign", 
	["ៃ"] = "terminating_vowel", 
	["ោ"] = "vowel_sign", ["ៅ"] = "vowel_sign", 
	["ំ"] = "terminating_vowel", ["ះ"] = "terminating_vowel", ["ៈ"] = "terminating_vowel", 
	["៉"] = "consonant_shift", ["៊"] = "consonant_shift", 
	["់"] = "terminating_sign", 
	["៌"] = "sign", ["៍"] = "sign", ["៎"] = "sign", ["៏"] = "sign", ["័"] = "sign", ["៑"] = "sign", 
	["្"] = "combining_sign", 
	["៓"] = "sign", 
	["។"] = "punctuation", ["៕"] = "punctuation", 
	["៖"] = "sign", 
	["ៗ"] = "punctuation", ["៘"] = "punctuation", ["៙"] = "punctuation", ["៚"] = "punctuation", ["៛"] = "punctuation", 
	["ៜ"] = "sign", ["៝"] = "sign", 
	["​"] = "ZWS",
}

local sp_symbols = {
	["០"] = "0", ["១"] = "1", ["២"] = "2", ["៣"] = "3", ["៤"] = "4",
	["៥"] = "5", ["៦"] = "6", ["៧"] = "7", ["៨"] = "8", ["៩"] = "9",
	["៰"] = "0", ["៱"] = "1", ["៲"] = "2", ["៳"] = "3", ["៴"] = "4",
	["៵"] = "5", ["៶"] = "6", ["៷"] = "7", ["៸"] = "8", ["៹"] = "9",
}	

function export.tr(text, lang, sc, debug_mode)
	text = gsub(text, '[០-៹]', sp_symbols)
	text = gsub(text, '(.)្(.្.)', '%1​%2')
	text = gsub(text, '([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]្[កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])', '​%1%2')
	text = gsub(text, '([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]្?[កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])', '%1​%2')
	text = gsub(text, '(.៍)', '​%1')
	
	for word in mw.ustring.gmatch(text, '[ក-៝​]+') do
		local original_text = word
		local c, chartype, syl, curr_syl = {}, {}, {}, {}
		local progress = 'none'

		for i = 1, len(word) do
			c[i] = sub(word, i, i)
			chartype[i] = char_type[c[i]]
		end
		
		for i = 1, #c + 1 do
			local next_types = {}
			if i == #c + 1 or chartype[i] == 'ZWS' then
				progress = 'none'
				table.insert(syl, table.concat(curr_syl, ""))
				curr_syl = {}
			elseif progress == 'none' then
				if chartype[i] == 'consonant' then
					table.insert(curr_syl, c[i])
					progress = 'initial'
				else
					table.insert(syl, c[i])
				end
			elseif progress == 'initial' then
				if chartype[i] == 'combining_sign' then
					table.insert(curr_syl, c[i])
					progress = 'initial_combining'
				elseif chartype[i] == 'sign' or chartype[i] == 'consonant_shift' then
					table.insert(curr_syl, c[i])
				elseif chartype[i] == 'vowel_sign' then
					table.insert(curr_syl, c[i])
					progress = 'vowel'
				elseif chartype[i] == 'terminating_vowel' then
					if c[i-1] .. c[i] .. (c[i+1] or '') == 'ាំង' and (i == #c - 1 or (i > #c + 1 and chartype[i+2] == 'consonant')) then
						table.insert(curr_syl, c[i])
						progress = 'vowel'
					else
						table.insert(curr_syl, c[i])
						table.insert(syl, table.concat(curr_syl, ""))
						curr_syl = {}
						progress = 'none'
					end
				elseif chartype[i] == 'consonant' then
					vowel_found = false
					local j, skipped = i, 0
					while not vowel_found do
						if not chartype[j] or chartype[j] == 'punctuation' or chartype[j] == 'indep_vowel' or chartype[j] == 'terminating_sign' or chartype[j] == 'ZWS' then
							skipped = 1
							break
						elseif chartype[j] == 'consonant' or chartype[j] == 'combining_sign' or (chartype[j] == 'sign' and c[j] ~= '័') then
							table.insert(next_types, chartype[j])
						else
							vowel_found = true
						end
						j = j + 1
					end
					if skipped ~= 0 or match(table.concat(next_types, " "), 'consonant s?i?g?n? ?consonant') then
						table.insert(curr_syl, c[i])
						progress = 'coda'
					else
						table.insert(syl, table.concat(curr_syl, ""))
						curr_syl = {c[i]}
						progress = 'initial'
					end
				else
					table.insert(syl, c[i])
					progress = 'none'
				end
			elseif progress == 'initial_combining' then
				if chartype[i] == 'consonant' then
					table.insert(curr_syl, c[i])
					progress = 'initial'
				else
					table.insert(syl, c[i])
					progress = 'none'
				end
			elseif progress == 'vowel' then
				if chartype[i] == 'vowel_sign' then
					table.insert(curr_syl, c[i])
				elseif chartype[i] == 'terminating_vowel' then
					if c[i-1] .. c[i] .. (c[i+1] or '') == 'ាំង' and (i == #c - 1 or (i > #c + 1 and chartype[i+2] == 'consonant')) then
						table.insert(curr_syl, c[i])
						progress = 'vowel'
					else
						table.insert(curr_syl, c[i])
						table.insert(syl, table.concat(curr_syl, ""))
						curr_syl = {}
						progress = 'none'
					end
				elseif chartype[i] == 'consonant' then
					vowel_found = false
					local j, skipped = i, 0
					while not vowel_found do
						if not chartype[j] or chartype[j] == 'punctuation' or chartype[j] == 'indep_vowel' or chartype[j] == 'terminating_sign' or chartype[j] == 'ZWS' then
							skipped = 1
							break
						elseif chartype[j] == 'consonant' or chartype[j] == 'combining_sign' or (chartype[j] == 'sign' and c[j] ~= '័') then
							table.insert(next_types, chartype[j])
						else
							vowel_found = true
						end
						j = j + 1
					end
					if skipped ~= 0 or match(table.concat(next_types, " "), 'consonant s?i?g?n? ?consonant') then
						table.insert(curr_syl, c[i])
						progress = 'coda'
					else
						table.insert(syl, table.concat(curr_syl, ""))
						curr_syl = {c[i]}
						progress = 'initial'
					end
				else
					table.insert(syl, c[i])
					progress = 'none'
				end
			elseif progress == 'coda' then
				if chartype[i] == 'combining_sign' then
					table.insert(curr_syl, c[i])
					progress = 'coda_combining'
				elseif chartype[i] == 'sign' or chartype[i] == 'terminating_sign' then
					table.insert(curr_syl, c[i])
				else
					table.insert(syl, table.concat(curr_syl, ""))
					curr_syl = {}
					if chartype[i] == 'consonant' then
						table.insert(curr_syl, c[i])
						progress = 'initial'
					else
						table.insert(syl, c[i])
						progress = 'none'
					end
				end
			elseif progress == 'coda_combining' then
				if chartype[i] == 'consonant' then
					table.insert(curr_syl, c[i])
					progress = 'coda'
				else
					table.insert(syl, table.concat(curr_syl, ""))
					curr_syl = {}
					progress = 'none'
				end
			end
		end

		for i = 1, #syl do
			if match(syl[i], '៍') then
				syl[i] = '<small><del>' .. gsub(syl[i], '.', function(consonant)
					if cons_conv[consonant] then
						return cons_conv[consonant][1]
					end end) .. '</small></del>'
				break
			end
			syl[i] = gsub(syl[i], '់$', '')
			
			syl[i] = gsub(syl[i], '^([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ])្?([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?)([៉៊]?)([ិីឹឺុូួើឿៀេែៃោៅា័]?[ំះៈ]?)([៉៊]?)([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?៉?)្?([កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ]?)(៖?)$', function(initial_a, initial_b, cons_shifter_a, vowel, cons_shifter_b, coda_a, coda_b, optional_sign)
				if cons_shifter_a .. cons_shifter_b .. vowel .. coda_a .. coda_b == '' and initial_b ~= '' and not match(syl[i], '្') then
					coda_a = initial_b
					initial_b = ''
				end
				base = initial_a
				if initial_b ~= '' and not match(initial_b, '[ងញនមយរលវ]') then
					base = initial_b
				end
				if vowel .. coda_a .. coda_b == 'ាំង' then
					vowel, coda_a, coda_b = 'ាំង', '', ''
				end
				optional_sign = gsub(optional_sign, '៖', 'ː')
				
				cons_shifter = cons_shifter_a .. cons_shifter_b
				if cons_shifter == '' and cons_conv[base] then
					vowel_class = cons_conv[base][2]
				elseif cons_shifter == '៉' then
					vowel_class = 'a'
				elseif cons_shifter == '៊' then
					vowel_class = 'o'
				else
					return initial_a .. initial_b .. cons_shifter .. vowel .. coda_a .. coda_b .. optional_sign
				end
				
				if digraph[initial_a .. '្' .. initial_b] and (digraph[coda_a .. '្' .. coda_b] or (cons_conv[coda_a] and cons_conv[coda_b])) and vowel_conv[vowel] then
					return digraph[initial_a .. '្' .. initial_b] .. vowel_conv[vowel][vowel_class] .. (digraph[coda_a .. '្' .. coda_b] or cons_conv[coda_a][1] .. cons_conv[coda_b][1]) .. optional_sign
			
				elseif cons_conv[initial_a] and cons_conv[initial_b] and vowel_conv[vowel] and cons_conv[coda_a] and cons_conv[coda_b] then
					return cons_conv[initial_a][1] .. cons_conv[initial_b][1] .. vowel_conv[vowel][vowel_class] .. cons_conv[coda_a][1] .. cons_conv[coda_b][1] .. optional_sign
				end	end)
			
			if syl[i] == 'ៗ' and i > 1 then
				syl[i] = syl[i-1]
			end
		end
		word = table.concat(syl, "")
		text = gsub(text, original_text, word)
	end
	
	text = gsub(text, '.', indep_vowel)
	text = gsub(text, '([^ ]*) ៗ', '%1 %1')
	
	if match(text, '[ក-៹]') and not debug_mode then
		return nil
	else
		return text
	end
	
	-- To do: other signs
end	

return export

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.