Module:Ja
local export = {}
local find = mw.ustring.find
local length = mw.ustring.len
local trim = mw.text.trim
local split = mw.text.split
local sub, gsub = mw.ustring.sub, mw.ustring.gsub
local match, gmatch = mw.ustring.match, mw.ustring.gmatch
local to_cp, to_char = mw.ustring.codepoint, mw.ustring.char
local Jpan = require("Module:scripts").getByCode("Jpan")
local lang = require("Module:languages").getByCode("ja")
-- note that arrays loaded by mw.loadData cannot be directly used by gsub
local data = mw.loadData("Module:ja/data")
export.data = {
joyo_kanji = data.joyo_kanji,
jinmeiyo_kanji = data.jinmeiyo_kanji,
grade1 = data.grade1,
grade2 = data.grade2,
grade3 = data.grade3,
grade4 = data.grade4,
grade5 = data.grade5,
grade6 = data.grade6
}
function export.hira_to_kata(text)
if type(text) == "table" then text = text.args[1] end
return (gsub(text, '[ぁ-ゖ]', function(char) return to_char(to_cp(char) + 96) end))
end
function export.kata_to_hira(text)
if type(text) == "table" then text = text.args[1] end
return (gsub(text, '[ァ-ヶ]', function(char) return to_char(to_cp(char) - 96) end))
end
function export.fullwidth_to_halfwidth(text)
if type(text) == "table" then text = text.args[1] end
text = gsub(text, ' ', ' ')
return (gsub(text, '[!-~]', function(char) return to_char(to_cp(char) - 65248) end))
end
function export.kana_to_romaji(text, options)
-- options: no_diacritics, keep_period, hist
if type(text) == "table" then
text = text.args[1]
end
if not options then options = {} end
local tracking_has_percent = find(text, '%%')
local text_old = trim(require('Module:ja/k2r-old').kana_to_romaji(text, options.no_diacritics, options.keep_period))
-- conversions
text = gsub(text, '(%-)([はハ])$', '%1㊟㈛㊟%2') -- は as suffix and appearing at the end of string
text = gsub(text, '(%-)([はハ]) ', '%1㊟㈛㊟%2 ') -- は as suffix and appearing mid-sentence
text = gsub(text, '%', '㊟㌫㊟') -- at [[見込む]], for example; avoid collision with % used in our ruby syntax
text = gsub(text, '\'\'\'', '㊟⒝㊟')
text = gsub(text, '<u>', '㊟㋑⒰㊟')
text = gsub(text, '</u>', '㊟㋺⒰㊟')
-- avoid tampering with existing latin text: store it away
local escape = {}
local id = 0
for latin in gmatch(text, "[a-z]+") do
escape[id] = latin
text = gsub(text, latin, "㊟㊕㊕㊟" .. id .. "㊟㊕㊕㊟")
id = id + 1
end
-- special preformatting
text = gsub(text, 'ヶげつ', 'かげつ')
text = gsub(text, 'ヶ(㊟[㋑㋺⒝⒰]+㊟)げつ', 'か%1げつ') -- 「'''ヶ'''げつ」
text = gsub(text, 'ヶ', 'が')
text = gsub(text, '(.)ゝ', '%1%1')
text = gsub(text, '(.)ゞ', function(char) return mw.ustring.toNFC(char .. char .. '゙') end) -- unicode hax
-- [[Wiktionary:Grease_pit/2017/May#Formatting_for_individual_Japanese_readings]]
if options.hist then
text = gsub(text, 'づ', 'du')
text = gsub(text, 'ぢ', 'di')
text = gsub(text, 'を', 'wo')
text = gsub(text, '([やゆよわゐゑを])', '㊟⒳㊟%1')
end
text = export.hira_to_kata(text)
text = gsub(text, '.', function(char) return data.kr[char] or char end)
text = export.fullwidth_to_halfwidth(text)
if options.hist then
text = gsub(text, 'ou', 'o.u')
text = gsub(text, '([iu])㊟⒳㊟', '') -- くゐやう kwyau
text = gsub(text, '㊟⒳㊟', '') -- ゑつ wetsu
end
-- markup
text = gsub(text, '%%', '.') -- ruby "percent sign" syntax
text = gsub(text, '([ッ¤])%.', '%1') -- 「し を ぼっ.す」; 「るい%じん%えん」→「rui.jin¤.en¤」
-- 「テェェェ」→「テェーー」 (avoid funky romaji effected by the "(テュ→)teユ→tyu" line below)
text = gsub(text, '(ァ)(ァ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
text = gsub(text, '(ィ)(ィ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
text = gsub(text, '(ゥ)(ゥ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
text = gsub(text, '(ェ)(ェ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
text = gsub(text, '(ォ)(ォ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
-- (ゲェ→)geェ→gee (note that this causes things like ウゥ→ū and ギィ→gī)
text = gsub(text, '[aiueo][ァィゥェォ]', {['aァ']='aa',['iィ']='ii',['uゥ']='uu',['eェ']='ee',['oォ']='oo',})
-- (クヮ→)kuヮ→kwa
text = gsub(text, '[u]([ヮ])', {['ヮ']='wa',})
-- (クァ→)kuァ→kwa, (トァ→)toァ→twa, (ウィ→)uィ→wi
text = gsub(text, '[uo]([ァィェォ])', {['ァ']='wa',['ィ']='wi',['ェ']='we',['ォ']='wo',})
-- (ツァ→)cwa→ca
text = gsub(text, '([fvcsz])w', '%1')
-- (テュ→)teユ→tyu, (ギェ→)giェ→gye
text = gsub(text, '[aiueo]([ャュェョ])', {['ャ']='ya',['ュ']='yu',['ェ']='ye',['ョ']='yo',})
-- (ジュ→)jyu→ju
text = gsub(text, '([xjq])y', '%1')
-- (ティ→)teィ→ti (essentially forget about the vowel in between)
text = gsub(text, '[aiueo]([ァィゥェォ])', {['ァ']='a',['ィ']='i',['ゥ']='u',['ェ']='e',['ォ']='o',})
-- chouonpu and sokuon
while find(text, '[aiueo]ー') or find(text, 'ッ *[bcdfghjklmnpqrstvwxyz]') or find(text, 'ッ㊟[㋑㋺⒝⒰]+㊟[bcdfghjklmnpqrstvwxyz]') do
text = gsub(text, '([aiueo])ー', '%1%1')
text = gsub(text, 'ッ( *)([bcdfghjklmnpqrstvwxyz])', '%2%1%2')
text = gsub(text, 'ッ(㊟[㋑㋺⒝⒰]+㊟)([bcdfghjklmnpqrstvwxyz])', '%2%1%2')
end
-- deal with leftover sokuon not used as geminate
text = gsub(text, 'ッ', 'h')
-- (ん→)n¤
text = gsub(text, '¤([aiueoy])', "'%1")
text = gsub(text, '¤', '')
-- は
text = gsub(text, "([^a-z.㊟])ha([^a-z.㊟])", "%1wa%2")
text = gsub(text, "([^a-z.㊟])ha$", "%1wa")
text = gsub(text, "^ha([^a-z.㊟])", "wa%1")
-- へ
text = gsub(text, "([^a-z.㊟])he([^a-z.㊟])", "%1e%2")
text = gsub(text, "([^a-z.㊟])he$", "%1e")
text = gsub(text, "^he([^a-z.㊟])", "e%1")
-- change only when
-- ① not flanked by a-z or a period ("^sore wa nani$", "^hyappou no .he hitotsu$")
-- ② at the end of the string and not preceded by a-z or a period ("^are wa$")
-- ③ at the beginning of the string and not followed by a-z or a period ("^he ikou$") [not sure this is actually necessary, but I suppose it is consistent with ②]
-- this also means that "^ha$" becomes "ha"
-- period can be used next to the kana (either side) to force the "dumb" romanization (i.e. "ha", "he")
-- fix sh, ch, ts
text = gsub(text, '([xqc]*)([xqc])', function(geminate,main)
--「めちゃ」→「mecha」
--「めっちゃ」→「metcha」
--「めっっちゃ」→「mettcha」
local corresp_geminate_form = {['x']='s',['q']='t',['c']='t'}
local corresp_main = {['x']='sh',['q']='ch',['c']='ts'}
return (geminate and mw.ustring.rep(corresp_geminate_form[main], length(geminate))) .. corresp_main[main]
end
)
-- macrons
if not options.no_diacritics then
text = gsub(text, 'oo', 'ō')
text = gsub(text, 'aa', 'ā')
text = gsub(text, 'ee', 'ē')
text = gsub(text, 'ou', 'ō')
text = gsub(text, 'uu', 'ū')
text = gsub(text, 'ii', 'ī')
end
-- remove markup and convert real periods
if not options.keep_period then
text = gsub(text, '%.', '')
text = gsub(text, '。', '◆.◇')
end
--
text = gsub(text, '◇◆', '')
text = gsub(text, '◆◇', '')
text = gsub(text, ' *◆ *', '')
text = gsub(text, ' *◇ *', ' ')
-- restore latin text
text = gsub(text, "㊟㊕㊕㊟(%d+)㊟㊕㊕㊟", function(id) return escape[tonumber(id)] end)
-- clean up spaces
text = trim(text)
text = gsub(text, ' +', ' ')
-- uppercase markup
text = gsub(text, "(%^)(㊟⒝㊟)", "%2%1") -- move ^ to an effective position if placed before bold markup
text = gsub(text, "(%^)( )", "%2%1") -- same but with spaces
text = gsub(text, '%^(.)', mw.ustring.upper) -- uppercase conversion
-- clean up spaces again
text = gsub(text, ' +', ' ')
-- conversions
text = gsub(text, '㊟⒝㊟', '\'\'\'')
text = gsub(text, '㊟㋑⒰㊟', '<u>')
text = gsub(text, '㊟㋺⒰㊟', '</u>')
text = gsub(text, '㊟㈛㊟', '')
text = gsub(text, '㊟㌫㊟', '%')
-- comparison with old kana_to_romaji() code
text_old = gsub(text_old, '%(ba%)', ' (ba)') -- avoid flooding the tracking template with na-adjectives. ← this really should be looked at though
text_old = gsub(text_old, ' ”', '”') -- and spacing around quotation marks
if text ~= text_old then
if mw.ustring.lower(text) == mw.ustring.lower(text_old) then
require('Module:debug').track('ja/k2r diff caps')
elseif find(text_old, 'ッ') then
require('Module:debug').track('ja/k2r diff w xtu')
elseif tracking_has_percent then
require('Module:debug').track('ja/k2r diff pc')
else
require('Module:debug').track('ja/k2r diff')
end
mw.log('new]' .. text .. '[')
mw.log('old]' .. text_old .. '[')
end
if find(text, '[ぁ-ー]') then
require('Module:debug').track('ja/k2r failure')
end
return text
end
-- removes spaces and hyphens from input
-- intended to be used when checking manual romaji to allow the
-- insertion of spaces or hyphens in manual romaji without appearing "wrong"
function export.rm_spaces_hyphens(f)
local text = type(f) == 'table' and f.args[1] or f
text = gsub(text, ' ', '')
text = gsub(text, '-', '')
text = gsub(text, '%.', '')
text = gsub(text, ' ', '')
text = gsub(text, '\'', '')
return text
end
function export.romaji_to_kata(f)
local text = type(f) == 'table' and f.args[1] or f
text = gsub(text, '.', function (char) return data.rd[char] or char end)
text = gsub(text, 'kk', 'ッk')
text = gsub(text, 'ss', 'ッs')
text = gsub(text, 'tt', 'ッt')
text = gsub(text, 'pp', 'ッp')
text = gsub(text, 'bb', 'ッb')
text = gsub(text, 'dd', 'ッd')
text = gsub(text, 'gg', 'ッg')
text = gsub(text, 'jj', 'ッj')
text = gsub(text, 'tc', 'ッc')
text = gsub(text, 'tsyu', 'ツュ')
text = gsub(text, 'ts[uoiea]', {['tsu']='ツ',['tso']='ツォ',['tsi']='ツィ',['tse']='ツェ',['tsa']='ツァ'})
text = gsub(text, 'sh[uoiea]', {['shu']='シュ',['sho']='ショ',['shi']='シ',['she']='シェ',['sha']='シャ'})
text = gsub(text, 'ch[uoiea]', {['chu']='チュ',['cho']='チョ',['chi']='チ',['che']='チェ',['cha']='チャ'})
text = gsub(text, "n[uoiea']?", {['nu']='ヌ',['no']='ノ',['ni']='ニ',['ne']='ネ',['na']='ナ',['n']='ン',["n'"]='ン'})
text = gsub(text, '[wvtrpsmlkjhgfdbzy][yw]?[uoiea]', function (char) return data.rk[char] or char end)
text = gsub(text, 'u', 'ウ')
text = gsub(text, 'o', 'オ')
text = gsub(text, 'i', 'イ')
text = gsub(text, 'e', 'エ')
text = gsub(text, 'a', 'ア')
return text
end
-- expects: any mix of kanji and kana
-- determines the script types used
-- e.g. given イギリス人, it returns Kana+Hani
function export.script(f)
text, script = type(f) == 'table' and f.args[1] or f, {}
if match(text, '[ぁ-ゖ]') then
table.insert(script, 'Hira')
end
-- TODO: there are two kanas. This should insert Kata.
if match(text, '[ァ-ヺー]') then
table.insert(script, 'Kana')
end
-- 一 is unicode 4e00, previously used 丁 is 4e01
if match(text, '[㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-]') then
table.insert(script, 'Hani')
end
-- matching %a should have worked but matched the end of every string
if match(text, '[a-zA-ZāēīōūĀĒĪŌŪa-zA-Z]') then
table.insert(script, 'Romaji')
end
if match(text, '[0-90-9]') then
table.insert(script, 'Number')
end
if match(text, '[〆々]') then
table.insert(script, 'Abbreviation')
end
return table.concat(script, '+')
end
-- when counting morae, most small hiragana belong to the previous mora,
-- so for purposes of counting them, they can be removed and the characters
-- can be counted to get the number of morae. The exception is small tsu,
-- so data.nonmora_to_empty maps all small hiragana except small tsu.
function export.count_morae(text)
if type(text) == "table" then
text = text.args[1]
end
-- convert kata to hira (hira is untouched)
text = export.kata_to_hira(text)
-- remove all of the small hiragana such as ょ except small tsu
text = gsub(text,'.',function (char) return data.nonmora_to_empty[char] or char end)
-- remove zero-width spaces
text = gsub(text, '', '')
-- return number of characters, which should be the number of morae
return length(text)
end
-- accepts: any mix of kana
-- returns: a hiragana sort key designed for WMF software
-- this is like sort() but doesn't return |sort=sortkey,
-- just the sort key itself, but unlike sort(), this
-- replaces the long vowel mark with its vowel
function export.jsort(text)
if type(text) == "table" then
text = text.args[1]
end
local textsub = ''
local convertedten = ''
local result = ''
local len = 1
-- remove western spaces, hyphens, and periods
-- diff=41967612: also remove caret
text = gsub(text, '[ %-%.%^]', '')
text = export.kata_to_hira(text)
-- if the first character has dakuten, replace it with the corresponding
-- character without dakuten and add an apostrophe to the end, e.g.
-- がす > かす'
if gsub(sub(text,1,1),'.',function (char) return data.dakuten[char] or char end) == '' then
len = length(text)
textsub = sub(text,2,len)
convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
text = (convertedten .. textsub .. "'")
else
-- similar thing but with handuken and two apostrophes, e.g. ぱす -> はす''
if gsub(sub(text,1,1),'.',function (char) return data.handakuten[char] or char end) == '' then
len = length(text)
textsub = sub(text,2,len)
convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
text = (convertedten .. textsub .. "''")
end
end
-- replace the long vowel mark with the vowel that it stands for
for key,value in pairs(data.longvowels) do
text = gsub(text,key,value)
end
return text
end
-- returns a sort key with |sort= in front, e.g.
-- |sort=はつぐん' if given ばつぐん
function export.sort(f)
local text = type(f) == 'table' and f.args[1] or f
local textsub = ''
local convertedten = ''
local result = ''
local len = 1
local kyreplace = ''
kyreplace = gsub(text,'[ァ-ヺ]', '')
if kyreplace == '' then
result = ('|' .. 'sort' .. '=')
end
text = export.kata_to_hira(text)
if gsub(sub(text,1,1),'.',function (char) return data.dakuten[char] or char end) == '' then
if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
len = length(text)
textsub = sub(text,2,len)
convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
result = (result .. convertedten .. textsub .. "'")
else
if gsub(sub(text,1,1),'.',function (char) return data.handakuten[char] or char end) == '' then
if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
len = length(text)
textsub = sub(text,2,len)
convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
result = (result .. convertedten .. textsub .. "''")
else
if kyreplace == '' then
result = (result .. text)
end
end
end
return result
end
-- returns the "stem" of a verb or -i adjective, that is the term minus the final character
function export.definal(f)
return sub(f.args[1],1,(length(f.args[1])-1))
end
-- see also Template:JAruby
-- meant to be called from another module
function export.add_ruby_backend(term, kana, from_ja_link)
local pattern = ""
-- holds the whole segments of markup enclosed in <ruby>...</ruby>
local ruby_markup = {}
-- range of kana: '[ぁ-ゖァ-ヺ]'
-- nonkana: [^ぁ-ゖァ-ヺ]
local kanji_pattern = "[々㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-0-9]"
-- links without pipes will fail
term = gsub(term, '%[%[([^|%]]+)%]%]', '[[%1|%1]]')
-- remove links from kana
kana = gsub(kana, '%[%[([^|%]]+)%]%]', '%1')
kana = gsub(kana, '%[%[[^%]]+|([^%]]+)%]%]', '%1')
-- build up pattern
-- escape the magic characters in the term
pattern = gsub(term, '%[%[[^%]]+|([^%]]+)%]%]', '%1')
pattern = require("Module:string").pattern_escape(pattern)
pattern = gsub(pattern, "[%[%]]+", " *")
kana = gsub(kana, "[%[%]]+", '')
pattern = gsub(pattern, " *('+) *", "%1")
kana = gsub(kana, " *('+) *", "%1")
pattern = gsub(pattern, " +", " ")
kana = gsub(kana, " +", " ")
-- remove periods and caret signs and hyphens
pattern = gsub(pattern, '%%[%.%^%-]', '')
kana = gsub(kana, '[%.%^%-]', '')
-- in order to make a pattern that will find the ruby,
-- replace every unbroken string of kanji with a sub-pattern
pattern = gsub(pattern, kanji_pattern .. '+', '(.+)')
-- get a pattern like
-- (.+)ばか(.+)ばか(.+)ばかばかばああか(.+) when given 超ばか猿超ばか猿超ばかばかばああか猿
-- it turns out we need to keep the spaces sometimes
-- so that kana don't "leak" in ambiguous cases like 捨すてて撤退 where it's not clear if it's
-- す, てったい or すて, ったい. only solution now is to put spaces in the "term" param
-- if they fall between kana
-- build up term (e.g. [[歌う|歌った]])
local replaced = {}
local count = 0
term = gsub(term, '%]', '%]') -- escape the "]" character so that it cannot appear, example becomes [[歌う|歌った%]%]
term = gsub(term, kanji_pattern .. '+', function(text)
count = count + 1
-- remove spaces
text = gsub(text, ' ', '')
table.insert(replaced, text)
return '[' .. count .. ']'
end) -- example becomes [[[1]う|[2]った%]%]
while match(term, '%[%[[^|]*%[%d+%][^|]*|') do
term = gsub(
term,
'(%[%[[^|]*)%[(%d+)%]([^|]*|)',
function(a,b,c)
return a .. replaced[tonumber(b)] .. c
end
)
end
-- example becomes [[歌う|[2]った%]%]
-- apply that pattern to the kana to collect the rubies
-- if this fails, try it without spaces
if match(kana, pattern) == nil then kana = gsub(kana, ' ', '') end
local ruby = { match(kana, pattern) }
-- local ruby = {}
-- for c in gmatch(kana, pattern) do table.insert(ruby, c) end
-- find the kanji strings again and combine them with their ruby to make the <ruby> markup
local kanji_segments = {}
for c in gmatch(term, '%[(%d+)%]') do
table.insert(kanji_segments, replaced[tonumber(c)])
end
for i = 1, #kanji_segments do
if not ruby[i] then
error('No ruby for kanji segment "' .. kanji_segments[i] .. '".')
end
table.insert(ruby_markup, "<ruby>" .. kanji_segments[i] .. "<rp> (</rp><rt>" .. ruby[i] .. "</rt><rp>) </rp></ruby>")
end
count = 0
term = gsub(term, '%[%d+%]', function()
count = count + 1
return ruby_markup[count]
end)
term = gsub(term, '%%%]', ']')
term = gsub(term, '%%', '')
term = gsub(term, ' ', '')
--done
return '<span style="font-size: 1.2em">' .. term .. '</span>'
end
-- do the work of Template:ja-kanji
function export.kanji(frame)
local PAGENAME = mw.title.getCurrentTitle().text
-- only do this if this entry is a kanji page and not some user's page
if match(PAGENAME, "[㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-]") then
local args = frame:getParent().args
local grade = args["grade"] or ""
local rs = args["rs"] or ""
local style = args["style"] or ""
local shin = args["shin"] or ""
local kyu = args["kyu"] or ""
local head = args["head"] or ""
local wikitext = {}
local categories = {}
local catsort = (rs ~= "") and rs or PAGENAME
-- display the kanji itself at the top at 275% size
table.insert(wikitext, '<div><span lang="ja" class="Jpan" style="font-size:275%; line-height: 100%;">' .. (args["head"] or PAGENAME) .. '</span></div>')
-- display information for the grade
-- if grade was not specified, determine it now
if grade == "" then
local joyo_kanji_pattern = ('[' .. data.joyo_kanji .. ']')
local jinmeiyo_kanji_pattern = ('[' .. data.jinmeiyo_kanji .. ']')
if match(PAGENAME, joyo_kanji_pattern) then grade = "c"
elseif match(PAGENAME, jinmeiyo_kanji_pattern) then grade = "n"
else
grade = "uc"
end
end
table.insert(wikitext, "(''")
if grade == "1" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 1 “Kyōiku” kanji]]")
elseif grade == "2" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 2 “Kyōiku” kanji]]")
elseif grade == "3" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 3 “Kyōiku” kanji]]")
elseif grade == "4" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 4 “Kyōiku” kanji]]")
elseif grade == "5" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 5 “Kyōiku” kanji]]")
elseif grade == "6" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 6 “Kyōiku” kanji]]")
elseif grade == "7" or grade == "c" then table.insert(wikitext, "[[w:Jōyō kanji|common “Jōyō” kanji]]")
elseif grade == "8" or grade == "n" then table.insert(wikitext, "[[w:Jinmeiyō kanji|“Jinmeiyō” kanji used for names]]")
elseif grade == "9" or grade == "uc" then table.insert(wikitext, "[[w:Hyōgai kanji|uncommon “Hyōgai” kanji]]")
elseif grade == "0" or grade == "r" then table.insert(wikitext, "[[w:Radical_(Chinese_character)|Radical]]")
else
table.insert(categories, "[[Category:Japanese terms needing attention/kanji grade]]")
end
-- if style was indicated, mention that and provide link to corresponding kanji
-- (link to shinjitai if this is kyujitai, link to kyujitai if this is shinjitai)
if style == "s" then
table.insert(wikitext, ", ")
if kyu == "" then
table.insert(wikitext, "[[shinjitai]] kanji")
else
table.insert(wikitext, '[[shinjitai]] kanji, [[kyūjitai]] form <span lang="ja" class="Jpan">[[' .. kyu .. '#Japanese|' .. kyu .. ']]</span>')
end
elseif style == "ky" then
table.insert(wikitext, ", ")
if shin == "" then
table.insert(wikitext, "[[kyūjitai]] kanji")
else
table.insert(wikitext, '[[kyūjitai]] kanji, [[shinjitai]] form <span lang="ja" class="Jpan">[[' .. shin .. '#Japanese|' .. shin .. "]]</span>")
end
end
table.insert(wikitext, "'')")
-- add categories
table.insert(categories, "[[Category:Japanese Han characters|" .. catsort .. "]]")
if grade == "1" then table.insert(categories, "[[Category:Grade 1 kanji|" .. catsort .. "]]")
elseif grade == "2" then table.insert(categories, "[[Category:Grade 2 kanji|" .. catsort .. "]]")
elseif grade == "3" then table.insert(categories, "[[Category:Grade 3 kanji|" .. catsort .. "]]")
elseif grade == "4" then table.insert(categories, "[[Category:Grade 4 kanji|" .. catsort .. "]]")
elseif grade == "5" then table.insert(categories, "[[Category:Grade 5 kanji|" .. catsort .. "]]")
elseif grade == "6" then table.insert(categories, "[[Category:Grade 6 kanji|" .. catsort .. "]]")
elseif grade == "7" or grade == "c" then table.insert(categories, "[[Category:Common kanji|" .. catsort .. "]]")
elseif grade == "8" or grade == "n" then table.insert(categories, "[[Category:Kanji used for names|" .. catsort .. "]]")
elseif grade == "9" or grade == "uc" then table.insert(categories, "[[Category:Uncommon kanji|" .. catsort .. "]]")
elseif grade == "0" or grade == "r" then table.insert(categories, "[[Category:CJKV radicals| ]]")
end
-- error category
if rs == "" then table.insert(categories, "[[Category:Japanese terms needing attention/radical and strokes]]") end
return table.concat(wikitext, "") .. table.concat(categories, "\n")
end
end
local grade1_pattern = ('[' .. data.grade1 .. ']')
local grade2_pattern = ('[' .. data.grade2 .. ']')
local grade3_pattern = ('[' .. data.grade3 .. ']')
local grade4_pattern = ('[' .. data.grade4 .. ']')
local grade5_pattern = ('[' .. data.grade5 .. ']')
local grade6_pattern = ('[' .. data.grade6 .. ']')
local secondary_pattern = ('[' .. data.secondary .. ']')
local jinmeiyo_kanji_pattern = ('[' .. data.jinmeiyo_kanji .. ']')
local hyogaiji_pattern = ('[^' .. data.joyo_kanji .. data.jinmeiyo_kanji .. ']')
function export.kanji_grade(kanji)
if type(kanji) == "table" then
kanji = kanji.args[1]
end
if match(kanji, hyogaiji_pattern) then return 9
elseif match(kanji, jinmeiyo_kanji_pattern) then return 8
elseif match(kanji, secondary_pattern) then return 7
elseif match(kanji, grade6_pattern) then return 6
elseif match(kanji, grade5_pattern) then return 5
elseif match(kanji, grade4_pattern) then return 4
elseif match(kanji, grade3_pattern) then return 3
elseif match(kanji, grade2_pattern) then return 2
elseif match(kanji, grade1_pattern) then return 1
end
return false
end
function export.new(frame)
local args = frame:getParent().args
local result = "==Japanese=="
if args["defs"] then
result = result .. "\n{{DEFAULTSORT:" .. args["defs"] .. "}}"
end
if args["wp"] then
result = result .. "\n{{wp|lang=ja" .. (args["wp"] ~= "y" and "|" .. args["wp"] or "") .. "}}"
end
wp_count = 2
while args["wp" .. wp_count] do
result = result .. "\n{{wp|lang=ja|" .. args["wp" .. wp_count] .. "}}"
wp_count = wp_count + 1
end
if args["swp"] then
result = result .. "\n{{swp|lang=ja" .. (args["swp"] ~= "y" and "|" .. args["swp"] or "") .. "}}"
end
swp_count = 2
while args["swp" .. swp_count] do
result = result .. "\n{{swp|lang=ja|" .. args["swp" .. swp_count] .. "}}"
swp_count = swp_count + 1
end
pagename = mw.title.getCurrentTitle().text
text = args[1] ~= "" and args[1] or pagename
text = gsub(text, "%-", "|")
local function make_tab(original, yomi)
output_text = ""
original = gsub(original, " ", "|")
original = gsub(original, "%.", "|")
original = gsub(original, "%^", "")
if match(original, "<") then
for word in gmatch(original, "<([^>]+)>") do
output_text = output_text .. "|" .. word
end
yomi = yomi or "k"
else
output_text = gsub(original, ">([1-9])", "|k%1=")
output_text = match(output_text, "|") and "|" .. output_text or false
end
yomi = yomi or "o"
return "\n{{ja-kanjitab" .. (output_text or "") .. "|yomi=" .. yomi .. (yomi == "irr" and "" or sortkey or "") .. "}}", yomi
end
if match(pagename, "[㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-]") then
to_add, yomi = make_tab(text, args["yomi"])
result = result .. to_add
end
if match(text, "<") then
text = gsub(text, "[<>]", "")
else
text = gsub(text, "^[^>|]+>%d+([^>|]+)", "%1")
text = gsub(text, "|[^>|]+>%d+([^>|]+)", "%1")
text = gsub(text, "([あかがさざただなはばぱまやらわ])|(あ)", "%1.%2")
text = gsub(text, "([いきぎしじちぢにひびぴみり])|(い)", "%1.%2")
text = gsub(text, "([うくぐすずつづぬふむゆる])|(う)", "%1.%2")
text = gsub(text, "([えけげせぜてでねへめれ])|([えい])", "%1.%2")
text = gsub(text, "([おこごそぞとどのほぼぽもよろ])|([おう])", "%1.%2")
text = gsub(text, "|", "")
end
local function other(class, title, args)
local code, i = "", 2
if args[class] then
code = code .. "\n\n===" .. title .. "===\n* {{ja-l|" .. args[class] .. "}}"
while args[class .. i] do
code = code .. "\n* {{ja-l|" .. args[class .. i] .. "}}"
i = i + 1
end
end
code = gsub(code, "{{ja%-l\|([^%|%}]+)[::]", "{{ja-r|%1|") -- change something like "{{ja-l|辞典:じてん}}" to "{{ja-r|辞典|じてん}}"
code = gsub(code, "{{ja%-l\|([ぁ-ー ^%%.]+)}}", "{{ja-r|%1}}") -- change something like "{{ja-l|じてん}}" to "{{ja-r|じてん}}"
return code
end
result = result .. other("alt", "Alternative forms", args)
sortkey = export.script(text) == "Kana" and export.sort(text) or false
if sortkey and sortkey == "|sort=" .. text then
sortkey = false
end
if args["d"] or args["e"] or args["we1"] or args["b"] or args["lb"] or args["co1"] or args["et"] or args["pr1"] or args["su1"] then
result = result .. "\n\n===Etymology===\n"
if args["we1"] then
result = result .. "{{waei|" .. args["we1"] .. (args["we2"] and "|" .. args["we2"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
else
if args["pr1"] then
result = result .. "{{pre|ja|" .. args["pr1"] .. "|" .. args["pr2"] .. (args["defs"] and "" or sortkey or "") .. (args["tr1"] and "|tr1=" .. args["tr1"] or "") .. (args["tr2"] and "|tr2=" .. args["tr2"] or "") .. (args["pos1"] and "|pos1=" .. args["pos1"] or "") .. (args["pos2"] and "|pos2=" .. args["pos2"] or "") .. (args["t1"] and "|t1=" .. args["t1"] or "") .. (args["t2"] and "|t2=" .. args["t2"] or "") .. "}}"
else
if args["su1"] then
result = result .. "{{suf|ja|" .. args["su1"] .. "|" .. args["su2"] .. (args["defs"] and "" or sortkey or "") .. (args["tr1"] and "|tr1=" .. args["tr1"] or "") .. (args["tr2"] and "|tr2=" .. args["tr2"] or "") .. (args["pos1"] and "|pos1=" .. args["pos1"] or "") .. (args["pos2"] and "|pos2=" .. args["pos2"] or "") .. (args["t1"] and "|t1=" .. args["t1"] or "") .. (args["t2"] and "|t2=" .. args["t2"] or "") .. "}}"
else
if args["co1"] then
result = result .. "{{com|ja|" .. args["co1"] .. "|" .. args["co2"] .. (args["co3"] and "|" .. args["co3"] or "") .. (args["co4"] and "|" .. args["co4"] or "") .. (args["co5"] and "|" .. args["co5"] or "") .. (args["co6"] and "|" .. args["co6"] or "") .. (args["tr1"] and "|tr1=" .. args["tr1"] or "") .. (args["tr2"] and "|tr2=" .. args["tr2"] or "") .. (args["tr3"] and "|tr3=" .. args["tr3"] or "") .. (args["tr4"] and "|tr4=" .. args["tr4"] or "") .. (args["tr5"] and "|tr5=" .. args["tr5"] or "") .. (args["tr6"] and "|tr6=" .. args["tr6"] or "") .. (args["pos1"] and "|pos1=" .. args["pos1"] or "") .. (args["pos2"] and "|pos2=" .. args["pos2"] or "") .. (args["pos3"] and "|pos3=" .. args["pos3"] or "") .. (args["pos4"] and "|pos4=" .. args["pos4"] or "") .. (args["pos5"] and "|pos5=" .. args["pos5"] or "") .. (args["pos6"] and "|pos6=" .. args["pos6"] or "") .. (args["t1"] and "|t1=" .. args["t1"] or "") .. (args["t2"] and "|t2=" .. args["t2"] or "") .. (args["t3"] and "|t3=" .. args["t3"] or "") .. (args["t4"] and "|t4=" .. args["t4"] or "") .. (args["t5"] and "|t5=" .. args["t5"] or "") .. (args["t6"] and "|t6=" .. args["t6"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
else
if args["et"] then
result = result .. "{{cal|ja|" .. ((args["el"] and "etyl lang=" .. args["el"]) or "etyl lang=en") .. "|etyl term=" .. args["et"] .. (args["nocap"] and "|nocap=" .. args["nocap"] or "") .. (args["ca1"] and "|" .. args["ca1"] or "") .. (args["ca2"] and "|" .. args["ca2"] or "") .. (args["ca3"] and "|" .. args["ca3"] or "") .. (args["ca4"] and "|" .. args["ca4"] or "") .. (args["ca5"] and "|" .. args["ca5"] or "") .. (args["ca6"] and "|" .. args["ca6"] or "") .. (args["tr1"] and "|tr1=" .. args["tr1"] or "") .. (args["tr2"] and "|tr2=" .. args["tr2"] or "") .. (args["tr3"] and "|tr3=" .. args["tr3"] or "") .. (args["tr4"] and "|tr4=" .. args["tr4"] or "") .. (args["tr5"] and "|tr5=" .. args["tr5"] or "") .. (args["tr6"] and "|tr6=" .. args["tr6"] or "") .. (args["pos1"] and "|pos1=" .. args["pos1"] or "") .. (args["pos2"] and "|pos2=" .. args["pos2"] or "") .. (args["pos3"] and "|pos3=" .. args["pos3"] or "") .. (args["pos4"] and "|pos4=" .. args["pos4"] or "") .. (args["pos5"] and "|pos5=" .. args["pos5"] or "") .. (args["pos6"] and "|pos6=" .. args["pos6"] or "") .. (args["t1"] and "|t1=" .. args["t1"] or "") .. (args["t2"] and "|t2=" .. args["t2"] or "") .. (args["t3"] and "|t3=" .. args["t3"] or "") .. (args["t4"] and "|t4=" .. args["t4"] or "") .. (args["t5"] and "|t5=" .. args["t5"] or "") .. (args["t6"] and "|t6=" .. args["t6"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
else
if args["b"] then
result = result .. "{{bor|ja|" .. (args["bl"] or "en") .. (args["b"] and "|" .. args["b"] or "") .. (args["tr"] and "|tr=" .. args["tr"] or "") .. (args["t"] and "||" .. args["t"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
else
if args["lb"] then
result = result .. "{{lbor|ja|" .. (args["lbl"] or "grc") .. (args["lb"] and "|" .. args["lb"] or "") .. (args["tr"] and "|tr=" .. args["tr"] or "") .. (args["t"] and "||" .. args["t"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
else
result = result .. (args["e"] or
("From {{der|ja|" .. (args["dl"] or "en") .. (args["d"] and "|" .. args["d"] or "") .. (args["tr"] and "|tr=" .. args["tr"] or "") .. (args["t"] and "||" .. args["t"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"))
end
end
end
end
end
end
end
end
if not args["nop"] then
result = result .. "\n\n===Pronunciation===\n{{ja-pron" .. (args[1] ~= "" and "|" .. gsub(text, '%^', '') or "")
if args["y"] == "n" then
result = result .. ""
else if args["y"] and args["y"] ~= "n" then
result = result .. "|y=" .. args["y"]
else if yomi then
if yomi == "irr" then
result = result .. "|y=i"
else
result = result .. "|y=" .. yomi
end
end
end
end
result = result .. (args["acc"] and "|acc=" .. args["acc"] or "") .. (args["acc2"] and "|acc2=" .. args["acc2"] or "") .. (args["acc3"] and "|acc3=" .. args["acc3"] or "") .. (args["acc4"] and "|acc4=" .. args["acc4"] or "") .. (args["acc_ref"] and "|acc_ref=" .. args["acc_ref"] or "") .. (args["acc2_ref"] and "|acc2_ref=" .. args["acc2_ref"] or "") .. (args["acc3_ref"] and "|acc3_ref=" .. args["acc3_ref"] or "") .. (args["acc4_ref"] and "|acc4_ref=" .. args["acc4_ref"] or "") .. (args["dev"] and "|dev=" .. args["dev"] or "") .. "}}" .. (args["hmp"] and "\n* {{hmp|lang=ja|" .. args["hmp"] .. (args["hmp2"] and "|" .. args["hmp2"] or "") .. (args["hmp3"] and "|" .. args["hmp3"] or "") .. (args["hmp4"] and "|" .. args["hmp4"] or "") .. (args["hmp5"] and "|" .. args["hmp5"] or "") .. (args["hmp6"] and "|" .. args["hmp6"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}" or "")
end
local pos = args[2] ~= "" and args[2] or "n"
local pos_table = {
[""] = { "Noun", "noun", true },
["n"] = { "Noun", "noun", true },
["s"] = { "Noun", "noun", true, "Verb", "verb-suru" },
["noun"] = { "Noun", "noun", true },
["suru"] = { "Noun", "noun", true, "Verb", "verb-suru" },
["an"] = { "Adjective", "adj", true, "Noun", "noun" },
["anoun"] = { "Adjective", "adj", true, "Noun", "noun" },
["v"] = { "Verb", "verb", true },
["verb"] = { "Verb", "verb", true },
["vform"] = { "Verb", "verb form", true },
["verb form"] = { "Verb", "verb form", true },
["a"] = { "Adjective", "adj", true },
["adj"] = { "Adjective", "adj", true },
["adjective"] = { "Adjective", "adj", true },
["adv"] = { "Adverb", "adverb", false },
["adverb"] = { "Adverb", "adverb", false },
["pron"] = { "Pronoun", "pronoun", false },
["pronoun"] = { "Pronoun", "pronoun", false },
["pn"] = { "Proper noun", "proper", false },
["propn"] = { "Proper noun", "proper", false },
["proper"] = { "Proper noun", "proper", false },
["proper noun"] = { "Proper noun", "proper", false },
["ph"] = { "Phrase", "phrase", true },
["phrase"] = { "Phrase", "phrase", true },
["interjection"] = { "Interjection", "interjection", false },
["intj"] = { "Interjection", "interjection", false },
["conj"] = { "Conjunction", "conjunction", false },
["part"] = { "Particle", "particle", false },
["prep"] = { "Preposition", "preposition", false },
["suf"] = { "Suffix", "suffix", false },
["suffix"] = { "Suffix", "suffix", false },
["pref"] = { "Prefix", "prefix", false },
["prefix"] = { "Prefix", "prefix", false },
["prov"] = { "Proverb", "proverb", false },
}
result = result .. "\n\n===" .. pos_table[pos][1] .. "===\n{{ja-" ..
(not pos_table[pos][3] and "pos|" or "") .. pos_table[pos][2] ..
(args[1] ~= "" and "|" .. text or "") ..
(args["head"] and "|head=" .. args["head"] or "") ..
(args["kyu"] and "|kyu=" .. args["kyu"] or "") ..
(args["sin"] and "|shin=" .. args["sin"] or "") ..
(args["ak"] and "|" .. args["ak"] or "") ..
(args["ak2"] and "|" .. args["ak2"] or "") ..
(args["ro"] and "|rom=" .. args["ro"] or "") ..
(args["hh"] and "|hhira=" .. args["hh"] or "") ..
(args["hk"] and "|hkata=" .. args["hk"] or "")
if pos_table[pos][1] == "Adjective" then
result = result .. "|infl=" .. (args["infl"] and args["infl"] or "na")
end
result = result .. (args["type"] and "|type=" .. args["type"] or "") .. (args["tr"] and "|tr=" .. args["tr"] or "") .. "}}"
result = result .. "\n\n# " .. (args[3] or "{{rfdef|ja}}")
if pos_table[pos][1] == "Adjective" then
result = result .. "\n\n====Inflection====\n"
if args["infl"] == "i" or args["infl"] == "い" then
result = result .. "{{ja-i" .. (args[1] ~= "" and "|" .. sub(text, 1, -2) or "") .. "}}"
else
result = result .. "{{ja-na" .. (args[1] ~= "" and "|" .. text or "") .. "}}"
end
end
if pos_table[pos][2] == "verb" then
result = result .. "\n\n====Conjugation====\n{{ja-"
penul, cons = text, text
penul, cons = sub(penul, -2, -2), sub(cons, -1, -1)
penul, cons = export.hira_to_kata(penul), export.hira_to_kata(cons)
penul, cons = gsub(penul, ".", function (char) return data.kr[char] or char end), gsub(cons, ".", function (char) return data.kr[char] or char end)
penul, cons = sub(penul, -1, -1), sub(cons, 1, 1)
if cons == "u" then
cons = ""
elseif cons == "c" then
cons = "ts"
end
if final == "る" and (penul == "i" or penul == "e") and args["type"] == 2 then
result = result .. "ichi"
else
result = result .. "go-" .. cons .. "u"
end
result = result .. (args[1] ~= "" and "|" .. sub(text, 1, -2) or "") .. "}}"
end
if pos_table[pos][4] and args[4] ~= "" then
result = result .. "\n\n===" .. pos_table[pos][4] .. "===\n{{ja-" .. pos_table[pos][5] .. (args[1] ~= "" and "|" .. text or "") ..
(args["type"] and "|type=" .. args["type"] or "") .. (args["tr"] and "|tr=" .. args["tr"] or "") .. "}}\n\n# " .. (args[4] or "{{rfdef|ja}}")
if pos_table[pos][4] == "Verb" then
result = result .. "\n\n====Conjugation====\n{{ja-suru" .. (args[1] ~= "" and "|" .. text or "") .. "}}"
end
end
result = result .. other("syn", "=Synonyms=", args)
result = result .. other("ant", "=Antonyms=", args)
result = result .. other("der", "=Derived terms=", args)
result = result .. other("rel", "=Related terms=", args)
if args["dzh"] or args["dko"] or args["dvi"] then
result = result .. "\n\n===Descendants===\n" .. (args["dzh"] and "* Chinese: {{zh-l|" .. args["dzh"] .. "}}" or "")
if args["dzh"] then
if args["dko"] or args["dvi"] then
result = result .. "\n"
end
end
result = result .. (args["dko"] and "* Korean: {{ko-l|" .. args["dko"] .. "}}" or "")
if args["dko"] then
if args["dvi"] then
result = result .. "\n"
end
end
result = result .. (args["dvi"] and "* Vietnamese: {{vi-l|" .. args["dvi"] .. "}}" or "")
end
result = result .. other("ana", "Anagrams", args)
result = result .. other("also", "See also", args)
if args["acc_ref"] or args["acc2_ref"] or args["acc3_ref"] then
result = result .. "\n\n===References===\n<references/>"
end
if args["cn"] then
result = result .. "\n\n{{cln|ja|" .. args["cn"] .. (args["cn2"] and "|" .. args["cn2"] or "") .. (args["cn3"] and "|" .. args["cn3"] or "") .. (args["cn4"] and "|" .. args["cn4"] or "") .. (args["cn5"] and "|" .. args["cn5"] or "") .. (args["cn6"] and "|" .. args["cn6"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
end
if args["ct"] then
result = result .. (args["cn"] and "\n" or "\n\n") .. "{{C|ja|" .. args["ct"] .. (args["ct2"] and "|" .. args["ct2"] or "") .. (args["ct3"] and "|" .. args["ct3"] or "") .. (args["ct4"] and "|" .. args["ct4"] or "") .. (args["ct5"] and "|" .. args["ct5"] or "") .. (args["ct6"] and "|" .. args["ct6"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
end
if args["k"] then
result = result .."\n\n----\n\n==Korean==\n{{ko-hanjatab}}\n\n===" .. pos_table[args["kp"] or "n"][1] ..
"===\n{{ko-" .. pos_table[args["kp"] or "n"][2] .. "|hangeul=" .. args["k"] .. (args["mr"] and "|mr=" .. args["mr"] or "") .. (args["yl"] and "|y=" .. args["yl"] or "") .. "}}" ..
"\n\n# {{hanja form of|" .. args["k"] .. "|" .. (args["kd"] or args[3]) .. "}}"
end
return result
end
return export
Content Disclaimer
Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.
- The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
- There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
- It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
- Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
- Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.