Module:Vi

---Lexicographic tools for Vietnamese language text.
local lang = require("Module:languages").getByCode("vi")

local p = {}

---Converts the given text to traditional tone marks.
function p.toTraditionalTones(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	return (mw.ustring.gsub(text, "%a+", function (word)
		if mw.ustring.match(word, "^qu[yýỳỷỹỵ]$") then return word end
		return (mw.ustring.gsub(word, "%a%a$", {
			["oá"] = "óa", ["oà"] = "òa", ["oả"] = "ỏa", ["oã"] = "õa", ["oạ"] = "ọa",
			["oé"] = "óe", ["oè"] = "òe", ["oẻ"] = "ỏe", ["oẽ"] = "õe", ["oẹ"] = "ọe",
			["uý"] = "úy", ["uỳ"] = "ùy", ["uỷ"] = "ủy", ["uỹ"] = "ũy", ["uỵ"] = "ụy"
		}))
	end))
end

---Converts the given text to reformed tone marks.
function p.toReformedTones(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	return (mw.ustring.gsub(text, "%a+", function (word)
		return (mw.ustring.gsub(word, "%a%a$", {
			["óa"] = "oá", ["òa"] = "oà", ["ỏa"] = "oả", ["õa"] = "oã", ["ọa"] = "oạ",
			["óe"] = "oé", ["òe"] = "oè", ["ỏe"] = "oẻ", ["õe"] = "oẽ", ["ọe"] = "oẹ",
			["úy"] = "uý", ["ùy"] = "uỳ", ["ủy"] = "uỷ", ["ũy"] = "uỹ", ["ụy"] = "uỵ"
		}))
	end))
end

function p.allSpellings(main_spelling, makeLinks)
	local frame = nil
	if type(main_spelling) == "table" then
		frame = main_spelling
		main_spelling, makeLinks = frame.args[1], frame.args.link
	end
	
	local xformers = {
		p.toTraditionalTones, p.toReformedTones,
	}
	
	local spellings = {}
	for i, xformer in ipairs(xformers) do
		local alt_spelling = xformer(main_spelling)
		if not spellings[alt_spelling] then
			table.insert(spellings, alt_spelling)
			spellings[alt_spelling] = true
		end
	end
	
	if makeLinks then
		local m_links = require("Module:links") -- [[Module:links]]
		for k, link in ipairs(spellings) do
			spellings[k] = m_links.full_link({lang = lang, term = link})
		end
	end
	return frame and table.concat(spellings, "/") or spellings
end

---Unicode codepoints for combining Vietnamese tone marks.
p.combiningToneMarks = mw.ustring.char(
	0x300,  -- à
	0x301,  -- á
	0x303,  -- ã
	0x309,  -- ả
	0x323   -- ạ
)

---Unicode codepoints for combining Vietnamese accent marks.
p.combiningAccentMarks = mw.ustring.char(
	0x302,  -- â
	0x306,  -- ă
	0x31b   -- ơ
)

---Strips Vietnamese diacritical marks from the given text.
-- @param tones     Set to “0” to leave tone marks intact.
-- @param accents   Set to “0” to leave accent marks intact.
-- @param đ         Set to “0” to leave “Đ” and “đ” intact.
function p.removeDiacritics(text, toneMarks, accentMarks, stroke)
	if type(text) == "table" then
		text, toneMarks, accentMarks, stroke = text.args[1],
			not text.args.tones or tonumber(text.args.tones) == 1,
			not text.args.accents or tonumber(text.args.accents) == 1,
			not text.args["đ"] or tonumber(text.args["đ"]) == 1
	end
	text = mw.ustring.toNFD(text)
	if toneMarks then
		text = mw.ustring.gsub(text, "[" .. p.combiningToneMarks .. "]", "")
	end
	if accentMarks then
		text = mw.ustring.gsub(text, "[" .. p.combiningAccentMarks .. "]", "")
	end
	if stroke then
		text = mw.ustring.gsub(text, "[Đđ]", {["Đ"] = "D", ["đ"] = "d"})
	end
	return mw.ustring.toNFC(text)
end

---Vietnamese letters for use in comp().
p.letters = "aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ"

---Compare two syllables according to Vietnamese dictionary sorting order.
function p.compWord(word1, word2)
	if mw.ustring.find(word1, word2, 1, true) == 0 then return false end
	if mw.ustring.find(word2, word1, 1, true) == 0 then return true end
	
	do
		local func1, static1, var1 = mw.ustring.gmatch(word1, "[" .. p.letters .. "]")
		local func2, static2, var2 = mw.ustring.gmatch(word2, "[" .. p.letters .. "]")
		while true do
			local c1 = func1(static1, var1)
			local c2 = func2(static2, var2)
			if c1 == nil or c2 == nil then break end
			
			local idx1 = mw.ustring.find(p.letters, c1, 1, true)
			local idx2 = mw.ustring.find(p.letters, c2, 1, true)
			if idx1 and idx2 then
				if idx1 < idx2 then return true end
				if idx1 > idx2 then return false end
			end
		end
	end
	
	return word1 < word2
end

---Compare two strings according to Vietnamese dictionary sorting order.
function p.comp(text1, text2)
	if text1 == text2 then return false end
	
	do
		local func1, static1, var1 = mw.ustring.gmatch(text1, "%a+")
		local func2, static2, var2 = mw.ustring.gmatch(text2, "%a+")
		while true do
			local word1 = func1(static1, var1)
			local word2 = func2(static2, var2)
			if word1 == nil then return true end
			if word2 == nil then return false end
			
			if word1 ~= word2 then
				local lower1 = mw.ustring.lower(word1)
				local lower2 = mw.ustring.lower(word2)
				local noTones1 = p.removeDiacritics(lower1, true, false, false)
				local noTones2 = p.removeDiacritics(lower2, true, false, false)
				
				-- Compare base letters.
				if noTones1 ~= noTones2 then
					return p.compWord(noTones1, noTones2)
				end
				
				-- Compare letters case-insensitively.
				if lower1 ~= lower2 then
					return p.compWord(lower1, lower2)
				end
				
				-- Compare letters including tones.
				assert(word1 ~= word2)
				return p.compWord(word1, word2)
			end
		end
	end
	
	return text1 < text2
end

---[[Template:vi-readings]]
function p.readings(hanviet, nom, rs, phienthiet)
	if type(hanviet) == "table" then
		local args = hanviet:getParent().args
		hanviet, nom, rs, phienthiet =
			args.hanviet or args.hv, args.nom or args.n, args.rs or args.sort,
			args.phienthiet or args.phth or args.fanqie
	end
	
	local lines = {}
	local styles = {
		{
			link = "Hán Việt",
			cat = "Vietnamese Han tu",
			list = hanviet and mw.text.split(hanviet, "%s*,%s*"),
			phienthiet = phienthiet and mw.text.split(phienthiet, "%s*,%s*")
		},
		{
			link = "chữ Nôm|Nôm",
			cat = "Vietnamese Nom",
			list = nom and mw.text.split(nom, "%s*,%s*"),
		},
	}
	for i, style in ipairs(styles) do
		if style.list and #style.list > 0 and #style.list[1] > 0 then
			local readings = style.list
--			table.sort(readings, p.comp)
			for j, reading in ipairs(readings) do
				local spellings = p.allSpellings(reading, true)
				readings[j] = table.concat(spellings, "/")
				
				-- Fanqie
				if style.phienthiet and style.phienthiet[j] then
					local ruby = p.ruby(mw.ustring.match(mw.text.trim(style.phienthiet[j]),
						"(%a+) +(.+)"))
					if ruby then
						local suffix = p.ruby("切", "thiết")
						readings[j] = mw.ustring.format("%s (%s[[w:Fanqie|%s]])",
							readings[j], ruby, suffix)
					end
				end
			end
			if #readings > 0 then
				local sortkey = rs or mw.title.getCurrentTitle().text
				readings = table.concat(readings, ", ")
				table.insert(lines, mw.ustring.format("* '''[[%s]]''': %s[[Category:%s|%s]]",
					style.link, readings, style.cat, sortkey))
			end
		end
	end
	
	return table.concat(lines, "\n")
end

---[[Template:vi-ruby]]
function p.ruby(characters, readings, mark, alts)
	if type(characters) == "table" then
		local args = characters:getParent().args
		characters, readings, mark, alts =
			args[1] or "",
			args[2] or "",
			args.mark or mw.title.getCurrentTitle().text,
			((args.alts and mw.text.split(args.alts, "%s+")) or
				(args.ids and mw.text.split(args.ids, "%s+")) or {})
	end
	
	if not readings then
		return characters
	end
	
	readings = mw.text.split(readings, "[^" .. p.letters .. "]+")
	
	local result = {}
	local character_idx = 1
	local alt_idx = 1
	for character in mw.ustring.gmatch(characters, ".") do
		local is_alt = false
		if character == "*" and alts[alt_idx] then
			character = alts[alt_idx]
			is_alt = true
			alt_idx = alt_idx + 1
		end
		if is_alt or (mw.ustring.match(character, "^%a$") and not character:match("^%w$")) then
			local reading = readings[character_idx]
			if mark and character == mark then
				character = mw.ustring.format("<mark>%s</mark>", character)
				reading = mw.ustring.format("<mark>%s</mark>", reading)
			end
			character = mw.ustring.format(
				"<ruby><rb><span class='Hani'>%s</span></rb><rp>(</rp><rt><span style='padding: 0 0.25em;'>%s</span></rt><rp>)</rp></ruby>",
				character, reading)
			character_idx = character_idx + 1
		end
		table.insert(result, character)
	end
	
	return mw.ustring.format("<span lang='vi' style='font-size: 137%%;'>%s</span>", table.concat(result))
end

function p.hantutab()
	local hantu = mw.ustring.gsub(mw.title.getCurrentTitle().text, '[^㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟]', '')
	local table_head = '<table class="floatright wikitable" style="text-align:center; font-size:small;"><tr><th colspan="' .. 
		mw.ustring.len(hantu) .. 
		'" style="font-weight:normal;">[[Hán tự]] in this word</th></tr><tr lang="vi" class="Hani" style="font-size:2em; background:white; line-height:1em;">'
	return table_head .. 
		mw.ustring.gsub(hantu, '(.)', '<td style="padding:0.5em;">[[%1#Vietnamese|%1]]</td>') .. 
		'</table>'
end

---Returns the categories indicated by the given wikitext.
function p.classifierCategories(frame)
	local src = frame.args[1]
	local classifiers = {}
	for classifier in mw.ustring.gmatch(mw.ustring.gsub(src, "<[^>]->", ""), "[" .. p.letters .. "]+") do
		if classifier ~= "l" and classifier ~= "vi" and classifier ~= "vi-l" and
				classifier ~= "Vietnamese" then
			local cat = mw.ustring.format("[[Category:Vietnamese %s class nouns]]",
				classifier)
			table.insert(classifiers, cat)
		end
	end
	return table.concat(classifiers)
end

function p.new(frame)
	local title = mw.title.getCurrentTitle().text
	local args = frame:getParent().args
	local pos = args[1] or ""
	local def = args[2] or "{{rfdef|lang=vi}}"
	local pos2 = args[3] or (args[4] and "" or false)
	local def2 = args[4] or "{{rfdef|lang=vi}}"
	local pos3 = args[5] or (args[6] and "" or false)
	local def3 = args[6] or "{{rfdef|lang=vi}}"
	local etym = args["e"] or false
	local head = args["head"] or false
	local cat = args["cat"] or false
	local reg = args["reg"] or false
	
	if args["h"] then
		etym = "{{vi-etym-sino|" .. args["h"] .. "}}"
	end
	if not etym and mw.ustring.match(title, " ") then
		etym = "{{com|vi"
		for word in mw.text.gsplit(title, " ") do
			etym = etym .. "|" .. word
		end
		etym = etym .. "}}."
	end
	
	local result = ""
	
	local function genTitle(text)
		local pos_title = {
			[""] = "Noun", ["n"] = "Noun", ["pn"] = "Proper noun", ["propn"] = "Proper noun", ["pron"] = "Pronoun",
			["v"] = "Verb", ["vf"] = "Verb", ["a"] = "Adjective", ["adj"] = "Adjective", ["adv"] = "Adverb",
			["prep"] = "Preposition", ["postp"] = "Postposition", ["conj"] = "Conjunction",
			["part"] = "Particle", ["suf"] = "Suffix",
			["prov"] = "Proverb", ["id"] = "Idiom", ["ph"] = "Phrase", ["intj"] = "Interjection", ["interj"] = "Interjection",
			["cl"] = "Classifier", ["cls"] = "Classifier", ["num"] = "Numeral", ["abb"] = "Abbreviation", ["deter"] = "Determiner"
		};
		return pos_title[text] or mw.ustring.upper(sub(text, 1, 1)) .. sub(text, 2, -1)
	end
	
	local function genHead(text)
		local pos_head = {
			[""] = "noun", ["n"] = "noun", ["pn"] = "proper noun", ["propn"] = "proper noun", ["v"] = "verb", ["vf"] = "verb form", ["a"] = "adj",
			["postp"] = "post", ["conj"] = "conj", ["part"] = "particle", ["pron"] = "pronoun",
			["prov"] = "proverb", ["id"] = "idiom", ["ph"] = "phrase", ["intj"] = "interj",
			["abb"] = "abbr", ["cl"] = "classifier", ["deter"] = "det"
		};
		return pos_head[text] or text
	end
	
	local function other(class, title, args)
		local code = ""
		if args[class] then
			code = code .. "\n\n===" .. title .. "===\n* {{l|vi|" .. args[class] .. "}}"
			
			if args[class .. "2"] then
				code = code .. "\n* {{l|vi|" .. args[class .. "2"] .. "}}"
				
				if args[class .. "3"] then
					code = code .. "\n* {{l|vi|" .. args[class .. "3"] .. "}}"
					
					if args[class .. "4"] then
						code = code .. "\n* {{l|vi|" .. args[class .. "4"] .. "}}"
					end
				end
			end
		end
		return code
	end
	
	result = result .. "==Vietnamese=="
	if args["wp"] then result = result .. "\n{{wikipedia|lang=vi" .. 
		(args["wp"] == "y" and "" or "|" .. args["wp"]) .. "}}" end
	result = result .. other("alt", "Alternative forms", args)
	
	if etym then result = result .. "\n\n===Etymology===\n" .. etym end
	
	result = result .. "\n\n===Pronunciation===\n{{vi-IPA}}"
	result = result .. "\n\n===" .. genTitle(pos) .. "===\n{{vi-" .. genHead(pos) .. (head and ("|head=" .. head) or "") .. "}}\n\n# " .. def
	result = result .. other("syn", "=Synonyms=", args)
	result = result .. other("ant", "=Antonyms=", args)
	result = result .. other("der", "=Derived terms=", args)
	result = result .. other("also", "=See also=", args)
	
	if pos2 then
		result = result .. "\n\n===" .. genTitle(pos2) .. "===\n{{vi-" .. genHead(pos2) .. (head and ("|head=" .. head) or "") .. "}}\n\n# " .. def2
	end
	
	if pos3 then
		result = result .. "\n\n===" .. genTitle(pos3) .. "===\n{{vi-" .. genHead(pos3) .. (head and ("|head=" .. head) or "") .. "}}\n\n# " .. def3
	end
	
	if cat then result = result .. "\n\n{{C|vi|" .. cat .. "}}" end
	
	return result
end

return p

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.