Module:Grc-utilities

local export = {}

local m_script_utils = require("Module:script utilities")
local m_links = require("Module:links")
local lang = require("Module:languages").getByCode("grc")
local sc = require("Module:scripts").getByCode("polytonic")

local m_data = mw.loadData("Module:grc-utilities/data")
local groups = m_data.groups
local conversions = m_data.conversions
local diacritics = m_data.diacritics
local diacritic = m_data.diacritic
local diaeresis = diacritics.diaeresis
local macron = diacritics.macron
local breve = diacritics.breve
local spacing_macron = diacritics.spacing_macron
local spacing_breve = diacritics.spacing_breve
local circumflex = diacritics.circum
local subscript = diacritics.subscript

local i_diphthong = "[ΑΕΗΟΥΩαεηουω]ι"
local u_diphthong = "[ΑΕΗΟΩαεηοω]υ"

local find = mw.ustring.find
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local sub = mw.ustring.sub
local gsub = mw.ustring.gsub
local U = mw.ustring.char
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD

local dottedCircle = U(0x25CC)

-- This concatenates or inserts a character, then removes it from the text.
local function add(list, index, chars, text)
	if not chars then
		error("The function add cannot act on a nil character.")
	end
	if list[index] then
		list[index] = list[index] .. chars
	else
		list[index] = chars
	end
	local length = mw.ustring.len(chars)
	return sub(text, length + 1)
end

function export.addDottedCircle(text)
	if type(text) == "string" then
		text = gsub(text, "(" .. diacritic .. ")", dottedCircle .. "%1")
		return text
	end
end

function export.tag(term, face)
	return m_script_utils.tag_text(term, lang, sc, face)
end

function export.link(term, face, alt, tr)
	return m_links.full_link( { term = term, alt = alt, lang = lang, sc = sc, tr = tr }, face)
end

local function linkNoTag(term, alt)
	return m_links.language_link{ term = term, lang = lang, alt = alt }
end

-- Convert spacing to combining diacritics, and nonstandard to standard polytonic Greek.
function export.standardDiacritics(text)
	text = toNFD(text)
	
	for nonstandard, standard in pairs(conversions) do
		text = gsub(text, nonstandard, standard)
	end
	
	return text
end

--[=[	This function arranges diacritics in the following order:
			1. macron or breve
			2. breathings or diaeresis
			3. acute, circumflex, or grave
			4. iota subscript
		Used by [[Module:typing-aids]].
]=]
function export.reorderDiacritics(text)
	text = toNFD(text)
	
	-- Find a sequence of at least two diacritics.
	for sequence in gmatch(text, diacritic .. diacritic .. "+") do
		
		local outputDiacritics = {}
		
		for i, group in ipairs(groups) do
			local diacritic = match(sequence, group) or ""
			outputDiacritics[i] = diacritic
		end
		
		local diacriticsReplacement = table.concat(outputDiacritics)
		
		text = gsub(text, sequence, diacriticsReplacement)
	end
	
	return text
end

--[=[	This breaks a word into meaningful "tokens", which are
		individual letters or diphthongs with their diacritics.
		Used by [[Module:grc-accent]] and [[Module:grc-pronunciation]].	]=]--

function export.tokenize(text)
	
	-- standardize, decompose, and reorder diacritics
	text = export.standardDiacritics(text)
	text = export.reorderDiacritics(text)
	if type(text) ~= "string" then
		error("Text is not a string", 2)
	end
	
	local tokens = {}
	-- token tracks our position in the table of tokens.
	local i = 0
	while mw.ustring.len(text) > 0 do
		local char = sub(text, 1, 1) or ""
		local chars = sub(text, 1, 2) or ""
		local nextchars = sub(text, 3, 4) or ""
		-- Look for a diacritic and add it to the current token. Remove it from the text.
		if find(char, diacritic) then
			text = add(tokens, i, char, text)
	--[[	See if the next two characters form a diphthong and if so,
			add them to the current token. Remove them from the text.
			If there's a diaeresis, it will be immediately after
			the second of the two characters, or after a macron or breve.	]]
		elseif ( find(chars, '^' .. i_diphthong .. '$') or  find(chars, '^' .. u_diphthong .. '$') ) and not match(nextchars, "^[" .. macron .. breve .. "]?" .. diaeresis) then
			i = i + 1
			text = add(tokens, i, chars, text)
		else
		-- Add the current character to the next token. Remove it from the text.
			i = i + 1
			text = add(tokens, i, char, text)
		end
	end
	
	tokens.maxindex = i
	
	tokens = require("Module:table").compressSparseArray(tokens)
	
	local err = ""
	if not tokens.maxindex == #tokens then
		err = "There must have been a nil value in the tokens table."
	end
	
	return tokens, err
end

function export.printTokens(frame)
	text = frame.args[1]
	
	if text then
		local tokens, err = export.tokenize(text)
		for i, token in pairs(tokens) do
			if token == " " then
				tokens[i] = '<span style="background-color: lightgray;">&nbsp;</span>'
			end
		end
		return "|-\n| " .. export.tag(text) .. " || " .. export.tag(table.concat(tokens, ", ")) .. " || " .. err
	else
		error("Provide text to tokenize in first parameter.")
	end
end

--[=[	Places diacritics in the following order:
			1. breathings or diaeresis
			2. acute, circumflex, or grave
			3. macron or breve
			4. iota subscript
		Used by [[Module:grc-pronunciation]].		]=]
function export.pronunciationOrder(text)
	text = export.standardDiacritics(text)
	
	for sequence in gmatch(text, diacritic .. diacritic .. "+") do
		-- Put breathing and diaeresis first, then accents, then macron or breve
		local diacriticsReplacement = table.concat{
			match(sequence, groups[2]) or "",
			match(sequence, groups[3]) or "",
			match(sequence, groups[1]) or "",
			match(sequence, groups[4]) or ""
		}
		
		text = gsub(text, sequence, diacriticsReplacement)
	end
	
	text = gsub(text, macron, spacing_macron) -- combining to spacing macron
	text = gsub(text, breve, spacing_breve) -- combining to spacing breve
	
	return toNFC(text)
end

-- Returns a table of any ambiguous vowels in the text, language-tagged.
function export.findAmbig(text, noTag)
	if (not text) or type(text) ~= "string" then
		error("The input to function findAmbig is nonexistent or not a string")
	end
	
	-- breaks the word into units
	local tokens = export.tokenize(text)
	if not tokens then
		error("No tokens.")
	elseif type(tokens) ~= "table" then
		error("tokens aren't a table.")
	end
	
	-- ipairs() won't work because tokens[1] is nil.
	local output = {}
	local vowels = {}
	for _, token in pairs(tokens) do
		if not find(token, m_data.consonant) then
			local vowel, diacritics = match(token, "^([" .. "αιυ" .. "])(" .. diacritic .. "*)$")
			
			if vowel then
				if not diacritics
					or not (
						find(diacritics, macron)
						or find(diacritics, breve)
						or find(diacritics, circumflex)
						or find(diacritics, subscript) )
					then
					
					local diacriticked_vowel
					if not noTag then
						diacriticked_vowel = export.tag(vowel .. diacritics)
					else
						diacriticked_vowel = vowel
					end
					
					table.insert(output, diacriticked_vowel)
					
					-- Lists the vowel letters that are ambiguous, for categorization purposes.
					vowel = mw.ustring.lower(vowel)
					if not vowels[vowel] then
						vowels[vowel] = true
					end
				end
			end
		end
	end
		
	return output, vowels
end

function export.printDiacritics(frame)
	local functionToPrint = frame.args[1] or error('Specify a function in the first parameter.')
	local term = frame.args[2] or error('Add text in the second parameter.')
	
	local result = export[functionToPrint](term)
	
	-- Show diacritics above or below a dotted circle.
	content = {
		term = export.tag(term),
		term_decomposition = export.tag(export.addDottedCircle(toNFD(term))),
		result = export.tag(result),
		result_decomposition = export.tag(export.addDottedCircle(result)),
	}
	
	local output = [[ term (term_decomposition) → result (result_decomposition)]]
	
	local function addContent(item)
		return content[item] or ""
	end
	
	output = gsub(output, "[%a_]+", addContent)

	return output
end

function export.decompose(frame)
	local params = {
		[1] = {},
		["link"] = { type = "boolean" },
	}
	
	args = require("Module:parameters").process(frame.args, params)
	
	local text = args[1]
	text = toNFD(text)
	local link = args.link
	local composed
	
	if link then
		composed = export.link(text, nil, nil, "-")
	else
		composed = export.tag(text)
	end
	
	local decomposed = export.addDottedCircle(text)
	
	if link then
		local result = {}
		for seat, letter in gmatch(decomposed, "(" .. dottedCircle .. "?)(.)") do
			local link
			if letter then
				link = linkNoTag(letter, seat .. letter)
			end
			
			table.insert(result, link)
		end
		decomposed = table.concat(result)
	end
	
	decomposed = export.tag(decomposed)
	
	return composed .. " (" .. decomposed .. ")"
end

return export

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.