Module:Ancient Greek

local p = {}

local ustring = mw.ustring
local U = ustring.char
local find = ustring.find
local gsub = ustring.gsub
local decompose = ustring.toNFD
local lower = ustring.lower
local upper = ustring.upper

local str_gmatch = string.gmatch

local macron = U(0x304)
local breve = U(0x306)
local rough = U(0x314)
local smooth = U(0x313)
local diaeresis = U(0x308)
local acute = U(0x301)
local grave = U(0x300)
local circumflex = U(0x342)
local Latin_circumflex = U(0x302)
local subscript = U(0x345)
local macron_circumflex = macron .. diaeresis .. '?' .. Latin_circumflex

local is_velar = { ['κ'] = true, ['γ'] = true, ['χ'] = true, ['ξ'] = true, }

local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
local basic_Greek = "[\206-\207][\128-\191]" -- excluding first line of Greek and Coptic block: ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ

local info = {}

-- The tables are shared among different characters so that they can be checked
-- for equality if needed, and to use less space.
local vowel = { vowel = true, diacritic_seat = true }
local iota = { vowel = true, diacritic_seat = true, offglide = true }
local upsilon = { vowel = true, diacritic_seat = true, offglide = true }
-- Technically rho is only a seat for rough or smooth breathing.
local rho = { consonant = true, diacritic_seat = true }
local consonant = { consonant = true }
local diacritic = { diacritic = true }
-- Needed for equality comparisons.
local breathing = { diacritic = true }

local function add_info(characters, t)
	if type(characters) == "string" then
		for character in string.gmatch(characters, UTF8_char) do
			info[character] = t
		end
	else
		for _, character in ipairs(characters) do
			info[character] = t
		end
	end
end

add_info({ macron, breve,
		diaeresis,
		acute, grave, circumflex,
		subscript,
	}, diacritic)

add_info({rough, smooth}, breathing)
add_info("ΑΕΗΟΩαεηοω", vowel)
add_info("Ιι", iota)
add_info("Υυ", upsilon)
add_info("ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨϜϘϺϷͶϠβγδζθκλμνξπρσςτφχψϝϙϻϸͷϡ", consonant)
add_info("Ρρ", rho)

local not_recognized = {}
setmetatable(info, { __index =
	function()
		return not_recognized
	end
})

local function quote(str)
	return "“" ..  str .. "”"
end

local tt = {
	-- Vowels
	["α"] = "a",
	["ε"] = "e",
	["η"] = "e" .. macron,
	["ι"] = "i",
	["ο"] = "o",
	["υ"] = "u",
	["ω"] = "o" .. macron,

	-- Consonants
	["β"] = "b",
	["γ"] = "g",
	["δ"] = "d",
	["ζ"] = "z",
	["θ"] = "th",
	["κ"] = "k",
	["λ"] = "l",
	["μ"] = "m",
	["ν"] = "n",
	["ξ"] = "x",
	["π"] = "p",
	["ρ"] = "r",
	["σ"] = "s",
	["ς"] = "s",
	["τ"] = "t",
	["φ"] = "ph",
	["χ"] = "kh",
	["ψ"] = "ps",
	
	-- Archaic letters
	["ϝ"] = "w",
	["ϻ"] = "ś",
	["ϙ"] = "q",
	["ϡ"] = "š",
	["ͷ"] = "v",
	
	-- Diacritics
	-- unchanged: macron, diaeresis, grave, acute
	[breve] = '',
	[smooth] = '',
	[rough] = '',
	[circumflex] = Latin_circumflex,
	[subscript] = 'i',
}

--[=[
		This breaks a word into meaningful "tokens", which are
		individual letters or diphthongs with their diacritics.
		Used by [[Module:grc-accent]] and [[Module:grc-pronunciation]].
--]=]
local function tokenize(text)
	local tokens, vowel_info, prev_info = {}, {}, {}
	local token_i = 1
	local prev
	for character in str_gmatch(decompose(text), UTF8_char) do
		local curr_info = info[character]
		-- Split vowels between tokens if not a diphthong.
		if curr_info.vowel then
			if prev and (not (curr_info.offglide and prev_info.vowel)
					-- υυ → υ, υ
					-- ιυ → ι, υ
					or prev_info.offglide and curr_info == upsilon) then
				token_i = token_i + 1
			end
			tokens[token_i] = (tokens[token_i] or "") .. character
			table.insert(vowel_info, { index = token_i })
		elseif curr_info.diacritic then
			tokens[token_i] = (tokens[token_i] or "") .. character
			if prev_info.vowel or prev_info.diacritic then
				if character == diaeresis then
					-- Current token is vowel, vowel, possibly other diacritics,
					-- and a diaeresis.
					-- Split the current token into two:
					-- the first letter, then the second letter plus any diacritics.
					local previous_vowel, vowel_with_diaeresis = string.match(tokens[token_i], "^(" .. basic_Greek .. ")(" .. basic_Greek .. ".+)")
					if previous_vowel then
						tokens[token_i], tokens[token_i + 1] = previous_vowel, vowel_with_diaeresis
						token_i = token_i + 1
					end
				end
			elseif prev_info == rho then
				if curr_info ~= breathing then
					return string.format("The character %s canna hae the accent %s on it.", prev, "◌" .. character)
				end
			else
				error("The character " .. quote(prev) .. " cannot have a diacritic on it.")
			end
		elseif curr_info == rho then
			if prev and not (prev_info == breathing and info[string.match(tokens[token_i], "^" .. basic_Greek)] == rho) then
				token_i = token_i + 1
			end
			tokens[token_i] = (tokens[token_i] or "") .. character
		else
			if prev then
				token_i = token_i + 1
			end
			tokens[token_i] = (tokens[token_i] or "") .. character
		end
		prev = character
		prev_info = curr_info
	end
	return tokens
end

function p.transliterate(text)
	text = decompose(text)
	--[[
	if text == '῾' then
		return 'h'
	end
	--]]
	
	--[[
		Replace semicolon or Greek question mark with regular question mark,
		except after an ASCII alphanumeric character (to avoid converting
		semicolons in HTML entities).
	--]]
	text = gsub(text, "([^A-Za-z0-9])[;" .. U(0x37E) .. "]", "%1?")
	
	-- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common.
	text = text:gsub("·", ";")
	
	local tokens = tokenize(text)

	--now read the tokens
	local output = {}
	for i, token in pairs(tokens) do
		-- substitute each character in the token for its transliteration
		local translit = gsub(mw.ustring.lower(token), '.', tt)
		
		if token == 'γ' and is_velar[tokens[i + 1]] then
			-- γ before a velar should be <n>
			translit = 'n'
		elseif token == 'ρ' and tokens[i - 1] == 'ρ' then
			-- ρ after ρ should be <rh>
			translit = 'rh'
		elseif find(token, '^[αΑ].*' .. subscript .. '$') then
			-- add macron to ᾳ
			translit = gsub(translit, '([aA])', '%1' .. macron)
		end
		
		if token:find(rough) then
			if find(token, '[Ρρ]') then
				translit = translit .. 'h'
			else -- vowel
				translit = 'h' .. translit
			end
		end
		
		-- Remove macron from a vowel that has a circumflex.
		if find(translit, macron_circumflex) then
			translit = translit:gsub(macron, '')
		end
		
		-- Capitalize first character of transliteration.
		if token ~= lower(token) then
			translit = gsub(translit, "^.", upper)
		end
		
		table.insert(output, translit)
	end
	
	return table.concat(output)
end

function p.translit(frame)
	local args = frame:getParent().args
	local text = frame.args[1] or args[1]
	local transliteration = p.transliterate(text)
	return '<span title="Ancient Greek transliteration" lang="grc-Latn"><i>' .. transliteration .. '</i></span>'
end

function p.bare_translit(frame)
	return p.transliterate(frame.args[1] or frame:getParent().args[1])
end

return p

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.