Module:Japanese

p = {} local getArgs = require('Dev:Arguments').getArgs local html = mw.html

function p.formatJ(a) return tostring(html.create("span"):addClass("japan"):attr("lang", "ja"):wikitext(a)) end

function p.formatR(a) return tostring(html.create("span"):addClass("romaji"):wikitext(a)) end

function p.formatL(a) return tostring(html.create("span"):addClass("lit"):wikitext(a)) end

p.helpMark = tostring(html.create("span"):addClass("help"):wikitext("?"))

p.romCat = "Category:Japanese romanization needed"

p.nullRomaji = "[missing rōmaji]" .. "" .. p.romCat .. ""

function p.initJ(frame) local f = getArgs(frame) local j = f[1] if not j then return ' Invalid template use ' end local r = f[2] or p.nullRomaji local l = f[3] local fmt = f.fmt if r and r:find("maji]") then r = p.nullRomaji end return p.format(j, r, l, fmt) end

function p.format(j, r, l, fmt) if not fmt then fmt = "(j,r)" end local n = 1 local t = {} while fmt:sub(n):find("%b[]") do       local s, e = fmt:sub(n):find("%b[]") table.insert(t, fmt:sub(n, n+s-2)) table.insert(t, fmt:sub(n+s, n+e-2)) n = n+e end table.insert(t, fmt:sub(n))

local isJ, isR, isL = false, false, false local pat = "" for i=1, #t, 2 do       pat = pat .. t[i] end if pat:find("j") then isJ = true j = p.formatJ(j) end if pat:find("r") then isR = true r = p.formatR(r) end if isR and pat:find("r") > pat:find("j") then r = r .. p.helpMark else j = j .. p.helpMark end

if l then l = p.formatL(l) if pat:find("l") then isL = true else if isR and pat:find("r") > pat:find("j") then r = r .. ", lit. " .. l           else j = j .. ", lit. " .. l           end end end

local output = "" for i=1, #t do       local w = t[i] if i%2 == 1 then w = w               :gsub(" ", "") :gsub(",", ", ") :gsub("[(]", " (") :gsub("[)]", ") ") :gsub("j", "$j~") :gsub("r", "$r~") :gsub("l", "$l~") if isJ then w = w:gsub("$j~", j) end if isR then w = w:gsub("$r~", r) end if isL then w = w:gsub("$l~", l) end end output = output .. w   end return html.create("span"):addClass("jTemplate"):wikitext(mw.text.trim(output)) end

--WIP

function p.romanize(frame) return p.romanizekana(frame.args[1], frame.args.style, frame.args.case, frame.args.particles=="true") end

--returns larger form of chiisai kana --to make かぁぁぁぁ type sequences romanise as kāāa (like かぁあぁあ) function ookiikana(c) local inputs = { ["ぁ"] = "あ", ["ぃ"] = "い", ["ぅ"] = "う", ["ぇ"] = "え", ["ぉ"] = "お", ["っ"] = "つ", ["ゃ"] = "や", ["ゅ"] = "ゆ", ["ょ"] = "よ", ["ゎ"] = "わ", ["ァ"] = "ア", ["ィ"] = "イ", ["ゥ"] = "ウ", ["ェ"] = "エ", ["ォ"] = "オ", ["ッ"] = "ツ", ["ャ"] = "ヤ", ["ュ"] = "ユ", ["ョ"] = "ヨ", ["ヮ"] = "ワ", }   return inputs[c] or c end

function p.romanizekana(kana, style, case, particles) if not case then case = "sentence" end if particles==nil then particles = false end local vowelhats = { a = "ā", i = "ī", u = "ū", e = "ē", o = "ō" }

local styles = { nihon = "nihonsiki", kunrei = "kunreisiki", hepburn = "hepburn", wikipedia = "wikipedia",--modified hepburn used by Wikipedia and FFWiki convenient = "convenient",--modified hepburn used by VGMDb and MyAnimeList --the only other notable systems are wa-puro (but it's not /really/ for representing japanese words), and the modified hepburn used by wiktionary and Google Translate (mostly similar to WP but uses ī) [""] = "wikipedia"--default }

--if one of the above inputs aren't given, it will read the "style" input as a wiki page --so if a wiki wanted to have its own system they could define it locally local romanizationsystem = mw.loadData(styles[style or ""] and ("Module:Japanese/" .. styles[style or ""]) or style)

local inputs, particlelist = romanizationsystem.inputs, (romanizationsystem.particles or {})

--convert accepted non-kana into kana --split kana string into parts ---"."s separates boundaries ---e.g. ばあい->bāi, ば.あい->baai kana = processkana(kana, case, particles, particlelist) if kana:find("[.<>%s]") then local t = {} for whitespace, modifier, s in string.gmatch(kana, "(%s*)([.<>]*)([^.<>%s]+)") do           local temp = p.romanizekana(s, style, "", false) if               modifier == ">" then temp = mw.ustring.gsub(temp, "^%l", mw.ustring.upper) end table.insert(t, whitespace .. temp) end return table.concat(t) end

local output = ""

local currentinput local upperLimit = mw.ustring.len(kana) + 1

local mode

for i=1, upperLimit do       if i == upperLimit and not currentinput then break end local thiskana = mw.ustring.sub(kana, i, i)       if currentinput then local temp = false local testcombo = currentinput.combo[thiskana] if testcombo then local testinput = inputs[thiskana] --look ahead to test combo based on priority --e.g. "クウィ" should be "kwī", not "kū~i" if i+1 ~= upperLimit and testinput and testcombo.priority == false and testinput.combo[mw.ustring.sub(kana, i+1, i+1)] and testinput.combo[mw.ustring.sub(kana, i+1, i+1)].priority == true then else currentinput = testcombo temp = true end end if temp == false then local temp = true local c, v = currentinput.consonant, currentinput.vowel if i ~= upperLimit then local testinput = inputs[thiskana] local tc, tv = thiskana.consonant, thiskana.vowel if (c == "—" and v== "x") and not (tc == "n" and tv== "x") then mode = "sokuon" temp = false elseif (c == "n" and v== "x") and not (tc == "—" and tv== "x") then mode = "nasal" temp = false end end

if temp then if c == "x" then c = "" end if v == "x" then v = "" end local prefix = "" if mode then if mode == "sokuon" then prefix = currentinput.sokuoncon elseif mode == "nasal" then prefix = currentinput.nasalcon end end mode = nil output = output .. prefix .. c .. v               end

currentinput = nil end end if not currentinput then if inputs[thiskana] then local c, v = inputs[thiskana].consonant, inputs[thiskana].vowel --handle small-vowel-kana sequences いぃぃぃぃぃ --this is the most low-tech part of this whole thing because the --function doesn't /know/ that ī is an extended form of i               --this does nothing if we're not using hepburn vowel macrons if c == "~" and vowelhats[v] and output:match(vowelhats[v] .. "$") then currentinput = inputs[ookiikana(thiskana)] else currentinput = inputs[thiskana] end else if i ~= upperLimit then local code = mw.ustring.codepoint(kana, i)                   if code > 0xFF00 and code < 0xFF5F then thiskana = mw.ustring.char(code-0xFEE0) end end output = output .. thiskana end end end return mw.text.trim(output) end

-- --This function processes a kana string and makes standardised changes ---It tries to guess particles (をはへ) and converts them to the phonetic equivalent kana (less pointless because it would allow easy changing of romanisation systems, but still burdensome. potential for false positives and false negatives. it should probably be an optional mode) -- function processkana(kana, case, particles, particlelist) particlelist = particlelist or {} yeah, kinda pointless. this can be converted in the input --local other = { --   ["％"] = "パーセント", --   ["＆"] = "ト", --   ["Ａ"] = "エー", --   ["Ｂ"] = "ビー", --   ["Ｃ"] = "シー", --   ["Ｄ"] = "ディー ", --   ["Ｅ"] = "イー", --   ["Ｆ"] = "エフ", --   ["Ｇ"] = "ジー", --   ["Ｈ"] = "エイチ", --   ["Ｉ"] = "アイ", --   ["Ｊ"] = "ジェー", --   ["Ｋ"] = "ケー", --   ["Ｌ"] = "エル", --   ["Ｍ"] = "エム", --   ["Ｎ"] = "エヌ", --   ["Ｏ"] = "オー", --   ["Ｐ"] = "ピー", --   ["Ｑ"] = "キュー", --   ["Ｒ"] = "アール", --   ["Ｓ"] = "エス", --   ["Ｔ"] = "ティー", --   ["Ｕ"] = "ユー", --   ["Ｖ"] = "ヴィー", --   ["Ｗ"] = "ダブリュー", --   ["Ｘ"] = "エックス ", --   ["Ｙ"] = "ワイ", --   ["Ｚ"] = "ゼット" --}   --    --for a,b in pairs(other) do    --    kana = kana:gsub(a,"." .. b.. ".") --end

if not particles and case ~= "sentence" then return kana end

--       local part = { ["は"] = "ha", ["が"] = "ga", ["で"] = "de", ["に"] = "ni", ["と"] = "to", ["も"] = "mo", ["へ"] = "he", ["を"] = "wo", ["の"] = "no", ["から"] = "kara", ["まで"] = "made", ["より"] = "yori", ["よ"] = "yo", ["ね"] = "ne", ["や"] = "ya", ["か"] = "ka" }

local temp = "" for m, t in string.gmatch(kana, "([.<>]?)([^%s]+)") do       local modifier, text = m, t        if part[text] then if particlelist[part[text]] then text = particlelist[part[text]] end if modifier ~= ">" then modifier = "<" end elseif case == "sentence" and modifier ~= "<" then modifier = ">" end temp = temp .. " " .. modifier .. text end

return temp end

--this is loadData-d by sub-data function p.romanizefuncs(data) --see https://finalfantasy.fandom.com/wiki/Module:Japanese/wikipedia for example of data local katakana = { a = "ア", i ="イ", u ="ウ", e ="エ", o ="オ", xa = "ァ", xi = "ィ", xu = "ゥ", xe = "ェ", xo = "ォ", vu = "ヴ", ka = "カ", ki = "キ", ku = "ク", ke = "ケ", ko = "コ", ga = "ガ", gi = "ギ", gu = "グ", ge = "ゲ", go = "ゴ", sa = "サ", si = "シ", su = "ス", se = "セ", so = "ソ", za = "ザ", zi = "ジ", zu = "ズ", ze = "ゼ", zo = "ゾ", ta = "タ", ti = "チ", tu = "ツ", te = "テ", to = "ト", xtu = "ッ", da = "ダ", di = "ヂ", du = "ヅ", de = "デ", ["do"] = "ド", na = "ナ", ni = "ニ", nu = "ヌ", ne = "ネ", no = "ノ", ha = "ハ", hi = "ヒ", hu = "フ", he = "ヘ", ho = "ホ", ba = "バ", bi = "ビ", bu = "ブ", be = "ベ", bo = "ボ", pa = "パ", pi = "ピ", pu = "プ", pe = "ペ", po = "ポ", ma = "マ", mi = "ミ", mu = "ム", me = "メ", mo = "モ", ya = "ヤ", yu = "ユ", yo = "ヨ", xya = "ャ", xyu = "ュ", xyo = "ョ", ra = "ラ", ri = "リ", ru = "ル", re = "レ", ro = "ロ", wa = "ワ", wo = "ヲ", xwa = "ヮ", n = "ン" }   local hiragana = { a = "あ", i = "い", u ="う", e = "え", o = "お", xa = "ぁ", xi = "ぃ", xu = "ぅ", xe = "ぇ", xo = "ぉ", vu = "ゔ", ka = "か", ki = "き", ku = "く", ke = "け", ko = "こ", ga = "が", gi = "ぎ", gu = "ぐ", ge = "げ", go = "ご", sa = "さ", si = "し", su = "す", se = "せ", so = "そ", za = "ざ", zi = "じ", zu = "ず", ze = "ぜ", zo = "ぞ", ta = "た", ti = "ち", tu = "つ", te = "て", to = "と", xtu = "っ", da = "だ", di = "ぢ", du = "づ", de = "で", ["do"] = "ど", na = "な", ni = "に", nu = "ぬ", ne = "ね", no = "の", ha = "は", hi = "ひ", hu = "ふ", he = "へ", ho = "ほ", ba = "ば", bi = "び", bu = "ぶ", be = "べ", bo = "ぼ", pa = "ぱ", pi = "ぴ", pu = "ぷ", pe = "ぺ", po = "ぽ", ma = "ま", mi = "み", mu = "む", me = "め", mo = "も", ya = "や", yu = "ゆ", yo = "よ", xya = "ゃ", xyu = "ゅ", xyo = "ょ", ra = "ら", ri = "り", ru = "る", re = "れ", ro = "ろ", wa = "わ", wo = "を", xwa = "ゎ", n = "ん" }   --this function creates a map of every possible kana combination and its romanisation output --it does things twice for hiragana and katakana. maybe we should be converting? --mixed kana yields mixed results. avoid mixing kana local inputs = {}

function newinput(c, v, priority) if not v then v = "x" end if not push then push = "" end local thisinput = { consonant = c, vowel = v, combo = {}, priority = priority==true or priority==nil, sokuoncon = data.sokuoncons[c], nasalcon = data.nasal[c] } if not thisinput.sokuoncon then if c == "~" or c == "x" then thisinput.sokuoncon = "—" else thisinput.sokuoncon = c:sub(1,1) end end if not thisinput.nasalcon then thisinput.nasalcon = "n" end if data.choonpuvowels[v] then thisinput.combo["ー"] = newinput(c, data.choonpuvowels[v]) end if data.vowelcombo[v] then for nextkana, newvowel in pairs(data.vowelcombo[v]) do               thisinput.combo[katakana[nextkana]] = newinput(c, newvowel, false) thisinput.combo[hiragana[nextkana]] = newinput(c, newvowel, false) end end if data.extendcombo[v] then for nextkana, newvowel in pairs(data.extendcombo[v]) do               thisinput.combo[katakana[nextkana]] = newinput(c, newvowel) thisinput.combo[hiragana[nextkana]] = newinput(c, newvowel) end end if data.yooncons[c .. v] then for nextkana, newvowel in pairs(data.yoonvowels) do               thisinput.combo[katakana[nextkana]] = newinput(data.yooncons[c .. v], newvowel) thisinput.combo[hiragana[nextkana]] = newinput(data.yooncons[c .. v], newvowel) end end

return thisinput end script = { katakana, hiragana } for i=1, #script do for k, v in pairs(script[i]) do       local cons = k:sub(1,1) local vowel = k:sub(2) if k == "xtu" then vowel = "x" cons = "—" elseif cons == "x" then cons = "~" --vowel = "x" elseif k == "n" then cons = "n" vowel = "x" elseif #k==1 then vowel = cons cons = "x" elseif data.specialcons[k] then cons = data.specialcons[k] end inputs[v] = newinput(cons, vowel) --specific kana combinations if data.kanacombo[k] then for k2, cv in pairs (data.kanacombo[k]) do               inputs[v].combo[script[i][k2]] = newinput(cv[1], cv[2]) end end end end

return { inputs = inputs, particles = data.particles }

end

return p