[create]
The documentation for this module is missing. Click here to create it.
-- <nowiki>
local p = {}
local getArgs = require('Dev:Arguments').getArgs
local html = mw.html
function p.formatJ(a)
return tostring(html.create("span"):addClass("japan"):attr("lang", "ja"):wikitext(a))
end
function p.formatR(a)
return tostring(html.create("span"):addClass("romaji"):wikitext(a))
end
function p.formatL(a)
return tostring(html.create("span"):addClass("lit"):wikitext(a))
end
p.helpMark = tostring(html.create("span"):addClass("help"):wikitext("[[wikipedia:Help:Japanese|?]]"))
p.romCat = "Category:Japanese romanization needed"
p.nullRomaji = "[missing rōmaji]" .. "[[" .. p.romCat .. "]]"
function p.initJ(frame)
local f = getArgs(frame, { frameOnly = frame.args[1] and true or false })
local j = f[1]
if not j then return '<strong class="error">Invalid template use</strong>' end
local r = f[2] or p.nullRomaji
local l = f[3]
local fmt = f.fmt
if r and r:find("maji]") then r = p.nullRomaji end
return p.format(j, r, l, fmt)
end
function p.format(j, r, l, fmt)
if not fmt then fmt = "(j,r)" end
local n = 1
local t = {}
while fmt:sub(n):find("%b[]") do
local s, e = fmt:sub(n):find("%b[]")
table.insert(t, fmt:sub(n, n+s-2))
table.insert(t, fmt:sub(n+s, n+e-2))
n = n+e
end
table.insert(t, fmt:sub(n))
local isJ, isR, isL = false, false, false
local pat = ""
for i=1, #t, 2 do
pat = pat .. t[i]
end
if pat:find("j") then
isJ = true
j = p.formatJ(j)
end
if pat:find("r") then
isR = true
r = p.formatR(r)
end
if isR and pat:find("r") > pat:find("j") then
r = r .. p.helpMark
else
j = j .. p.helpMark
end
if l then
l = p.formatL(l)
if pat:find("l") then
isL = true
else
if isR and pat:find("r") > pat:find("j") then
r = r .. ", lit. " .. l
else
j = j .. ", lit. " .. l
end
end
end
local output = ""
for i=1, #t do
local w = t[i]
if i%2 == 1 then
w = w
:gsub(" ", "")
:gsub(",", ", ")
:gsub("[(]", " (")
:gsub("[)]", ") ")
:gsub("j", "$j~")
:gsub("r", "$r~")
:gsub("l", "$l~")
if isJ then w = w:gsub("$j~", j) end
if isR then w = w:gsub("$r~", r) end
if isL then w = w:gsub("$l~", l) end
end
output = output .. w
end
return html.create("span"):addClass("jTemplate"):wikitext(mw.text.trim(output))
end
--WIP
function p.romanize(frame)
return p.romanizekana(frame.args[1], frame.args.style, frame.args.case, frame.args.particles=="true")
end
--returns larger form of chiisai kana
--to make かぁぁぁぁ type sequences romanise as kāāa (like かぁあぁあ)
local function ookiikana(c)
local inputs = {
["ぁ"] = "あ",
["ぃ"] = "い",
["ぅ"] = "う",
["ぇ"] = "え",
["ぉ"] = "お",
["っ"] = "つ",
["ゃ"] = "や",
["ゅ"] = "ゆ",
["ょ"] = "よ",
["ゎ"] = "わ",
["ァ"] = "ア",
["ィ"] = "イ",
["ゥ"] = "ウ",
["ェ"] = "エ",
["ォ"] = "オ",
["ッ"] = "ツ",
["ャ"] = "ヤ",
["ュ"] = "ユ",
["ョ"] = "ヨ",
["ヮ"] = "ワ",
}
return inputs[c] or c
end
--
--This function processes a kana string and makes standardised changes
---It tries to guess particles (をはへ) and converts them to the phonetic equivalent kana (less pointless because it would allow easy changing of romanisation systems, but still burdensome. potential for false positives and false negatives. it should probably be an optional mode)
--
local function processkana(kana, case, particles, particlelist)
particlelist = particlelist or {}
----yeah, kinda pointless. this can be converted in the input
--local other = {
-- ["%"] = "パーセント",
-- ["&"] = "ト",
-- ["A"] = "エー",
-- ["B"] = "ビー",
-- ["C"] = "シー",
-- ["D"] = "ディー ",
-- ["E"] = "イー",
-- ["F"] = "エフ",
-- ["G"] = "ジー",
-- ["H"] = "エイチ",
-- ["I"] = "アイ",
-- ["J"] = "ジェー",
-- ["K"] = "ケー",
-- ["L"] = "エル",
-- ["M"] = "エム",
-- ["N"] = "エヌ",
-- ["O"] = "オー",
-- ["P"] = "ピー",
-- ["Q"] = "キュー",
-- ["R"] = "アール",
-- ["S"] = "エス",
-- ["T"] = "ティー",
-- ["U"] = "ユー",
-- ["V"] = "ヴィー",
-- ["W"] = "ダブリュー",
-- ["X"] = "エックス ",
-- ["Y"] = "ワイ",
-- ["Z"] = "ゼット"
--}
--
--for a,b in pairs(other) do
-- kana = kana:gsub(a,"." .. b.. ".")
--end
if not particles and case ~= "sentence" then return kana end
--
local part = {
["は"] = "ha",
["が"] = "ga",
["で"] = "de",
["に"] = "ni",
["と"] = "to",
["も"] = "mo",
["へ"] = "he",
["を"] = "wo",
["の"] = "no",
["から"] = "kara",
["まで"] = "made",
["より"] = "yori",
["よ"] = "yo",
["ね"] = "ne",
["や"] = "ya",
["か"] = "ka"
}
local temp = ""
for m, t in string.gmatch(kana, "([.<>]?)([^%s]+)") do
local modifier, text = m, t
if part[text] then
if particlelist[part[text]] then
text = particlelist[part[text]]
end
if modifier ~= ">" then modifier = "<" end
elseif case == "sentence" and modifier ~= "<" then
modifier = ">"
end
temp = temp .. " " .. modifier .. text
end
return temp
end
function p.romanizekana(kana, style, case, particles)
if not case then case = "sentence" end
if particles == nil then particles = false end
local vowelhats = {
a = "ā",
i = "ī",
u = "ū",
e = "ē",
o = "ō"
}
local styles = {
nihon = "nihonsiki",
kunrei = "kunreisiki",
hepburn = "hepburn",
wikipedia = "wikipedia",--modified hepburn used by Wikipedia and FFWiki
convenient = "convenient",--modified hepburn used by VGMDb and MyAnimeList
--the only other notable systems are wa-puro (but it's not /really/ for representing japanese words), and the modified hepburn used by wiktionary and Google Translate (mostly similar to WP but uses ī)
[""] = "wikipedia"--default
}
--if one of the above inputs aren't given, it will read the "style" input as a wiki page
--so if a wiki wanted to have its own system they could define it locally
local romanizationsystem = mw.loadData(styles[style or ""] and ("Module:Japanese/" .. styles[style or ""]) or style)
local inputs, particlelist = romanizationsystem.inputs, (romanizationsystem.particles or {})
--convert accepted non-kana into kana
--split kana string into parts
---"."s separates boundaries
---e.g. ばあい->bāi, ば.あい->baai
kana = processkana(kana, case, particles, particlelist)
if kana:find("[.<>%s]") then
local t = {}
for whitespace, modifier, s in string.gmatch(kana, "(%s*)([.<>]*)([^.<>%s]+)") do
local temp = p.romanizekana(s, style, "", false)
if
modifier == ">"
then
temp = mw.ustring.gsub(temp, "^%l", mw.ustring.upper)
end
table.insert(t, whitespace .. temp)
end
return table.concat(t)
end
local output = ""
local currentinput
local upperLimit = mw.ustring.len(kana) + 1
local mode
for i=1, upperLimit do
if i == upperLimit and not currentinput then
break
end
local thiskana = mw.ustring.sub(kana, i, i)
if currentinput then
local temp = false
local testcombo = currentinput.combo[thiskana]
if testcombo then
local testinput = inputs[thiskana]
--look ahead to test combo based on priority
--e.g. "クウィ" should be "kwī", not "kū~i"
if i+1 ~= upperLimit and testinput and testcombo.priority == false and testinput.combo[mw.ustring.sub(kana, i+1, i+1)] and testinput.combo[mw.ustring.sub(kana, i+1, i+1)].priority == true then
else
currentinput = testcombo
temp = true
end
end
if temp == false then
local temp = true
local c, v = currentinput.consonant, currentinput.vowel
if i ~= upperLimit then
local testinput = inputs[thiskana]
local tc, tv = thiskana.consonant, thiskana.vowel
if (c == "—" and v== "x") and not (tc == "n" and tv== "x") then
mode = "sokuon"
temp = false
elseif (c == "n" and v== "x") and not (tc == "—" and tv== "x") then
mode = "nasal"
temp = false
end
end
if temp then
if c == "x" then c = "" end
if v == "x" then v = "" end
local prefix = ""
if mode then
if mode == "sokuon" then
prefix = currentinput.sokuoncon
elseif mode == "nasal" then
prefix = currentinput.nasalcon
end
end
mode = nil
output = output .. prefix .. c .. v
end
currentinput = nil
end
end
if not currentinput then
if inputs[thiskana] then
local c, v = inputs[thiskana].consonant, inputs[thiskana].vowel
--handle small-vowel-kana sequences いぃぃぃぃぃ
--this is the most low-tech part of this whole thing because the
--function doesn't /know/ that ī is an extended form of i
--this does nothing if we're not using hepburn vowel macrons
if c == "~" and vowelhats[v] and output:match(vowelhats[v] .. "$") then
currentinput = inputs[ookiikana(thiskana)]
else
currentinput = inputs[thiskana]
end
else
if i ~= upperLimit then
local code = mw.ustring.codepoint(kana, i)
if code > 0xFF00 and code < 0xFF5F then
thiskana = mw.ustring.char(code-0xFEE0)
end
end
output = output .. thiskana
end
end
end
return mw.text.trim(output)
end
--this is loadData-d by sub-data
function p.romanizefuncs(data)
--see https://finalfantasy.fandom.com/wiki/Module:Japanese/wikipedia for example of data
local katakana = {
a = "ア",
i ="イ",
u ="ウ",
e ="エ",
o ="オ",
xa = "ァ",
xi = "ィ",
xu = "ゥ",
xe = "ェ",
xo = "ォ",
vu = "ヴ",
ka = "カ",
ki = "キ",
ku = "ク",
ke = "ケ",
ko = "コ",
ga = "ガ",
gi = "ギ",
gu = "グ",
ge = "ゲ",
go = "ゴ",
sa = "サ",
si = "シ",
su = "ス",
se = "セ",
so = "ソ",
za = "ザ",
zi = "ジ",
zu = "ズ",
ze = "ゼ",
zo = "ゾ",
ta = "タ",
ti = "チ",
tu = "ツ",
te = "テ",
to = "ト",
xtu = "ッ",
da = "ダ",
di = "ヂ",
du = "ヅ",
de = "デ",
["do"] = "ド",
na = "ナ",
ni = "ニ",
nu = "ヌ",
ne = "ネ",
no = "ノ",
ha = "ハ",
hi = "ヒ",
hu = "フ",
he = "ヘ",
ho = "ホ",
ba = "バ",
bi = "ビ",
bu = "ブ",
be = "ベ",
bo = "ボ",
pa = "パ",
pi = "ピ",
pu = "プ",
pe = "ペ",
po = "ポ",
ma = "マ",
mi = "ミ",
mu = "ム",
me = "メ",
mo = "モ",
ya = "ヤ",
yu = "ユ",
yo = "ヨ",
xya = "ャ",
xyu = "ュ",
xyo = "ョ",
ra = "ラ",
ri = "リ",
ru = "ル",
re = "レ",
ro = "ロ",
wa = "ワ",
wo = "ヲ",
xwa = "ヮ",
n = "ン"
}
local hiragana = {
a = "あ",
i = "い",
u ="う",
e = "え",
o = "お",
xa = "ぁ",
xi = "ぃ",
xu = "ぅ",
xe = "ぇ",
xo = "ぉ",
vu = "ゔ",
ka = "か",
ki = "き",
ku = "く",
ke = "け",
ko = "こ",
ga = "が",
gi = "ぎ",
gu = "ぐ",
ge = "げ",
go = "ご",
sa = "さ",
si = "し",
su = "す",
se = "せ",
so = "そ",
za = "ざ",
zi = "じ",
zu = "ず",
ze = "ぜ",
zo = "ぞ",
ta = "た",
ti = "ち",
tu = "つ",
te = "て",
to = "と",
xtu = "っ",
da = "だ",
di = "ぢ",
du = "づ",
de = "で",
["do"] = "ど",
na = "な",
ni = "に",
nu = "ぬ",
ne = "ね",
no = "の",
ha = "は",
hi = "ひ",
hu = "ふ",
he = "へ",
ho = "ほ",
ba = "ば",
bi = "び",
bu = "ぶ",
be = "べ",
bo = "ぼ",
pa = "ぱ",
pi = "ぴ",
pu = "ぷ",
pe = "ぺ",
po = "ぽ",
ma = "ま",
mi = "み",
mu = "む",
me = "め",
mo = "も",
ya = "や",
yu = "ゆ",
yo = "よ",
xya = "ゃ",
xyu = "ゅ",
xyo = "ょ",
ra = "ら",
ri = "り",
ru = "る",
re = "れ",
ro = "ろ",
wa = "わ",
wo = "を",
xwa = "ゎ",
n = "ん"
}
--this function creates a map of every possible kana combination and its romanisation output
--it does things twice for hiragana and katakana. maybe we should be converting?
--mixed kana yields mixed results. avoid mixing kana
local inputs = {}
local function newinput(c, v, priority)
if not v then v = "x" end
local thisinput = { consonant = c, vowel = v, combo = {}, priority = priority==true or priority==nil, sokuoncon = data.sokuoncons[c], nasalcon = data.nasal[c] }
if not thisinput.sokuoncon then
if c == "~" or c == "x" then
thisinput.sokuoncon = "—"
else
thisinput.sokuoncon = c:sub(1,1)
end
end
if not thisinput.nasalcon then
thisinput.nasalcon = "n"
end
if data.choonpuvowels[v] then
thisinput.combo["ー"] = newinput(c, data.choonpuvowels[v])
end
if data.vowelcombo[v] then
for nextkana, newvowel in pairs(data.vowelcombo[v]) do
thisinput.combo[katakana[nextkana]] = newinput(c, newvowel, false)
thisinput.combo[hiragana[nextkana]] = newinput(c, newvowel, false)
end
end
if data.extendcombo[v] then
for nextkana, newvowel in pairs(data.extendcombo[v]) do
thisinput.combo[katakana[nextkana]] = newinput(c, newvowel)
thisinput.combo[hiragana[nextkana]] = newinput(c, newvowel)
end
end
if data.yooncons[c .. v] then
for nextkana, newvowel in pairs(data.yoonvowels) do
thisinput.combo[katakana[nextkana]] = newinput(data.yooncons[c .. v], newvowel)
thisinput.combo[hiragana[nextkana]] = newinput(data.yooncons[c .. v], newvowel)
end
end
return thisinput
end
local script = { katakana, hiragana }
for i=1, #script do for k, v in pairs(script[i]) do
local cons = k:sub(1,1)
local vowel = k:sub(2)
if k == "xtu" then
vowel = "x"
cons = "—"
elseif cons == "x" then
cons = "~"
--vowel = "x"
elseif k == "n" then
cons = "n"
vowel = "x"
elseif #k==1 then
vowel = cons
cons = "x"
elseif data.specialcons[k] then
cons = data.specialcons[k]
end
inputs[v] = newinput(cons, vowel)
--specific kana combinations
if data.kanacombo[k] then
for k2, cv in pairs (data.kanacombo[k]) do
inputs[v].combo[script[i][k2]] = newinput(cv[1], cv[2])
end
end
end end
return {
inputs = inputs,
particles = data.particles
}
end
return p
-- </nowiki>