Fandom Developers Wiki
Documentation icon Module documentation

The documentation for this module is missing. Click here to create it.

-- <nowiki>
local p = {}
local getArgs = require('Dev:Arguments').getArgs
local html = mw.html

function p.formatJ(a)
    return tostring(html.create("span"):addClass("japan"):attr("lang", "ja"):wikitext(a))
end

function p.formatR(a)
    return tostring(html.create("span"):addClass("romaji"):wikitext(a))
end

function p.formatL(a)
    return tostring(html.create("span"):addClass("lit"):wikitext(a))
end

p.helpMark = tostring(html.create("span"):addClass("help"):wikitext("[[wikipedia:Help:Japanese|?]]"))

p.romCat = "Category:Japanese romanization needed"

p.nullRomaji = "[missing rōmaji]" .. "[[" .. p.romCat .. "]]"

function p.initJ(frame)
    local f = getArgs(frame, { frameOnly = frame.args[1] and true or false })
    local j = f[1]
    if not j then return '<strong class="error">Invalid template use</strong>' end
    local r = f[2] or p.nullRomaji
    local l = f[3]
    local fmt = f.fmt
    if r and r:find("maji]") then r = p.nullRomaji end
    return p.format(j, r, l, fmt)
end

function p.format(j, r, l, fmt)
    if not fmt then fmt = "(j,r)" end
    local n = 1
    local t = {}
    while fmt:sub(n):find("%b[]") do
        local s, e = fmt:sub(n):find("%b[]")
        table.insert(t, fmt:sub(n, n+s-2))
        table.insert(t, fmt:sub(n+s, n+e-2))
        n = n+e
    end
    table.insert(t, fmt:sub(n))

    local isJ, isR, isL = false, false, false
    local pat = ""
    for i=1, #t, 2 do
        pat = pat .. t[i]
    end
    if pat:find("j") then
        isJ = true
        j = p.formatJ(j)
    end
    if pat:find("r") then
        isR = true
        r = p.formatR(r)
    end
    
    if isR and pat:find("r") > pat:find("j") then
        r = r .. p.helpMark
    else
        j = j .. p.helpMark
    end

    if l then
        l = p.formatL(l)
        if pat:find("l") then
            isL = true
        else
            if isR and pat:find("r") > pat:find("j") then
                r = r .. ", lit. " .. l
            else
                j = j .. ", lit. " .. l
            end
        end
    end

    local output = ""
    for i=1, #t do
        local w = t[i]
        if i%2 == 1 then
            w = w
                :gsub(" ", "")
                :gsub(",", ", ")
                :gsub("[(]", " (")
                :gsub("[)]", ") ")
                :gsub("j", "$j~")
                :gsub("r", "$r~")
                :gsub("l", "$l~")
                
            if isJ then w = w:gsub("$j~", j) end
            if isR then w = w:gsub("$r~", r) end
            if isL then w = w:gsub("$l~", l) end
        end
        output = output .. w
    end
    return html.create("span"):addClass("jTemplate"):wikitext(mw.text.trim(output))
end

--WIP

function p.romanize(frame)
    return p.romanizekana(frame.args[1], frame.args.style, frame.args.case, frame.args.particles=="true")
end

--returns larger form of chiisai kana
--to make かぁぁぁぁ type sequences romanise as kāāa (like かぁあぁあ)
local function ookiikana(c)
    local inputs = {
        ["ぁ"] = "あ",
        ["ぃ"] = "い",
        ["ぅ"] = "う",
        ["ぇ"] = "え",
        ["ぉ"] = "お",
        ["っ"] = "つ",
        ["ゃ"] = "や",
        ["ゅ"] = "ゆ",
        ["ょ"] = "よ",
        ["ゎ"] = "わ",
        ["ァ"] = "ア",
        ["ィ"] = "イ",
        ["ゥ"] = "ウ",
        ["ェ"] = "エ",
        ["ォ"] = "オ",
        ["ッ"] = "ツ",
        ["ャ"] = "ヤ",
        ["ュ"] = "ユ",
        ["ョ"] = "ヨ",
        ["ヮ"] = "ワ",
    }
    return inputs[c] or c
end

--
--This function processes a kana string and makes standardised changes
---It tries to guess particles (をはへ) and converts them to the phonetic equivalent kana (less pointless because it would allow easy changing of romanisation systems, but still burdensome. potential for false positives and false negatives. it should probably be an optional mode)
--
local function processkana(kana, case, particles, particlelist)
    particlelist = particlelist or {}
    
    ----yeah, kinda pointless. this can be converted in the input
    --local other = {
    --    ["%"] = "パーセント",
    --    ["&"] = "ト",
    --    ["A"] = "エー",
    --    ["B"] = "ビー",
    --    ["C"] = "シー",
    --    ["D"] = "ディー ",
    --    ["E"] = "イー",
    --    ["F"] = "エフ",
    --    ["G"] = "ジー",
    --    ["H"] = "エイチ",
    --    ["I"] = "アイ",
    --    ["J"] = "ジェー",
    --    ["K"] = "ケー",
    --    ["L"] = "エル",
    --    ["M"] = "エム",
    --    ["N"] = "エヌ",
    --    ["O"] = "オー",
    --    ["P"] = "ピー",
    --    ["Q"] = "キュー",
    --    ["R"] = "アール",
    --    ["S"] = "エス",
    --    ["T"] = "ティー",
    --    ["U"] = "ユー",
    --    ["V"] = "ヴィー",
    --    ["W"] = "ダブリュー",
    --    ["X"] = "エックス ",
    --    ["Y"] = "ワイ",
    --    ["Z"] = "ゼット"
    --}
    --
    --for a,b in pairs(other) do
    --    kana = kana:gsub(a,"." .. b.. ".")
    --end

    if not particles and case ~= "sentence" then return kana end

    --    
    local part = {
        ["は"] = "ha",
        ["が"] = "ga",
        ["で"] = "de",
        ["に"] = "ni",
        ["と"] = "to",
        ["も"] = "mo",
        ["へ"] = "he",
        ["を"] = "wo",
        ["の"] = "no",
        ["から"] = "kara",
        ["まで"] = "made",
        ["より"] = "yori",
        ["よ"] = "yo",
        ["ね"] = "ne",
        ["や"] = "ya",
        ["か"] = "ka"
    }

    local temp = ""
    for m, t in string.gmatch(kana, "([.<>]?)([^%s]+)") do
        local modifier, text = m, t
        if part[text] then
            if particlelist[part[text]] then
                text = particlelist[part[text]]
            end
            if modifier ~= ">" then modifier = "<" end
        elseif case == "sentence" and modifier ~= "<" then
            modifier = ">"
        end
        temp = temp .. " " .. modifier .. text
    end

    return temp
end

function p.romanizekana(kana, style, case, particles)
    if not case then case = "sentence" end
    if particles == nil then particles = false end
    
    local vowelhats = {
        a = "ā",
        i = "ī",
        u = "ū",
        e = "ē",
        o = "ō"
    }

    local styles = {
        nihon = "nihonsiki",
        kunrei = "kunreisiki",
        hepburn = "hepburn",
        wikipedia = "wikipedia",--modified hepburn used by Wikipedia and FFWiki
        convenient = "convenient",--modified hepburn used by VGMDb and MyAnimeList
        --the only other notable systems are wa-puro (but it's not /really/ for representing japanese words), and the modified hepburn used by wiktionary and Google Translate (mostly similar to WP but uses ī)
        [""] = "wikipedia"--default
    }

    --if one of the above inputs aren't given, it will read the "style" input as a wiki page
    --so if a wiki wanted to have its own system they could define it locally
    local romanizationsystem = mw.loadData(styles[style or ""] and ("Module:Japanese/" .. styles[style or ""]) or style)

    local inputs, particlelist = romanizationsystem.inputs, (romanizationsystem.particles or {})

    --convert accepted non-kana into kana
    
    --split kana string into parts
    ---"."s separates boundaries
    ---e.g. ばあい->bāi, ば.あい->baai
    kana = processkana(kana, case, particles, particlelist)
    if kana:find("[.<>%s]") then
        local t = {}
        for whitespace, modifier, s in string.gmatch(kana, "(%s*)([.<>]*)([^.<>%s]+)") do
            local temp = p.romanizekana(s, style, "", false)
            if
                modifier == ">"
            then
                temp = mw.ustring.gsub(temp, "^%l", mw.ustring.upper)
            end
            table.insert(t, whitespace .. temp)
        end
        return table.concat(t)
    end


    local output = ""

    local currentinput
    
    local upperLimit = mw.ustring.len(kana) + 1

    local mode

    for i=1, upperLimit do
        if i == upperLimit and not currentinput then
            break
        end
        local thiskana = mw.ustring.sub(kana, i, i)
        if currentinput then
            local temp = false
            local testcombo = currentinput.combo[thiskana]
            if testcombo then
                local testinput = inputs[thiskana]
                
                --look ahead to test combo based on priority
                --e.g. "クウィ" should be "kwī", not "kū~i"
                if i+1 ~= upperLimit and testinput and testcombo.priority == false and testinput.combo[mw.ustring.sub(kana, i+1, i+1)] and testinput.combo[mw.ustring.sub(kana, i+1, i+1)].priority == true then
                else
                    currentinput = testcombo
                    temp = true
                end
            end
            if temp == false then
                local temp = true
                local c, v = currentinput.consonant, currentinput.vowel
                if i ~= upperLimit then
                    local testinput = inputs[thiskana]
                    local tc, tv = thiskana.consonant, thiskana.vowel
                    if (c == "—" and v== "x") and not (tc == "n" and tv== "x") then
                        mode = "sokuon"
                        temp = false
                    elseif (c == "n" and v== "x") and not (tc == "—" and tv== "x") then
                        mode = "nasal"
                        temp = false
                    end
                end

                if temp then
                    if c == "x" then c = "" end
                    if v == "x" then v = "" end
                    local prefix = ""
                    if mode then
                        if mode == "sokuon" then
                            prefix = currentinput.sokuoncon
                        elseif mode == "nasal" then
                            prefix = currentinput.nasalcon
                        end
                    end
                    mode = nil
                    output = output .. prefix .. c .. v
                end

                currentinput = nil
            end
        end
        if not currentinput then    
            if inputs[thiskana] then
                local c, v = inputs[thiskana].consonant, inputs[thiskana].vowel
                --handle small-vowel-kana sequences いぃぃぃぃぃ
                --this is the most low-tech part of this whole thing because the
                --function doesn't /know/ that ī is an extended form of i
                --this does nothing if we're not using hepburn vowel macrons
                if c == "~" and vowelhats[v] and output:match(vowelhats[v] .. "$") then
                    currentinput = inputs[ookiikana(thiskana)]
                else
                    currentinput = inputs[thiskana]
                end
            else
                if i ~= upperLimit then
                    local code = mw.ustring.codepoint(kana, i)
                    if code > 0xFF00 and code < 0xFF5F then
                        thiskana = mw.ustring.char(code-0xFEE0)
                    end
                end
                
                output = output .. thiskana
            end
        end
    end
    
    return mw.text.trim(output)
end

--this is loadData-d by sub-data
function p.romanizefuncs(data)
    --see https://finalfantasy.fandom.com/wiki/Module:Japanese/wikipedia for example of data
    
    local katakana = {
        a = "ア",
        i ="イ",
        u ="ウ",
        e ="エ",
        o ="オ",
        xa = "ァ",
        xi = "ィ",
        xu = "ゥ",
        xe = "ェ",
        xo = "ォ",
        vu = "ヴ",
        ka = "カ",
        ki = "キ",
        ku = "ク",
        ke = "ケ",
        ko = "コ",
        ga = "ガ",
        gi = "ギ",
        gu = "グ",
        ge = "ゲ",
        go = "ゴ",
        sa = "サ",
        si = "シ",
        su = "ス",
        se = "セ",
        so = "ソ",
        za = "ザ",
        zi = "ジ",
        zu = "ズ",
        ze = "ゼ",
        zo = "ゾ",
        ta = "タ",
        ti = "チ",
        tu = "ツ",
        te = "テ",
        to = "ト",
        xtu = "ッ",
        da = "ダ",
        di = "ヂ",
        du = "ヅ",
        de = "デ",
        ["do"] = "ド",
        na = "ナ",
        ni = "ニ",
        nu = "ヌ",
        ne = "ネ",
        no = "ノ",
        ha = "ハ",
        hi = "ヒ",
        hu = "フ",
        he = "ヘ",
        ho = "ホ",
        ba = "バ",
        bi = "ビ",
        bu = "ブ",
        be = "ベ",
        bo = "ボ",
        pa = "パ",
        pi = "ピ",
        pu = "プ",
        pe = "ペ",
        po = "ポ",
        ma = "マ",
        mi = "ミ",
        mu = "ム",
        me = "メ",
        mo = "モ",
        ya = "ヤ",
        yu = "ユ",
        yo = "ヨ",
        xya = "ャ",
        xyu = "ュ",
        xyo = "ョ",
        ra = "ラ",
        ri = "リ",
        ru = "ル",
        re = "レ",
        ro = "ロ",
        wa = "ワ",
        wo = "ヲ",
        xwa = "ヮ",
        n = "ン"
    }
    
    local hiragana = {
      a = "あ",
      i = "い",
      u ="う",
      e = "え",
      o = "お",
      xa = "ぁ",
      xi = "ぃ",
      xu = "ぅ",
      xe = "ぇ",
      xo = "ぉ",
      vu = "ゔ",
      ka = "か",
      ki = "き",
      ku = "く",
      ke = "け",
      ko = "こ",
      ga = "が",
      gi = "ぎ",
      gu = "ぐ",
      ge = "げ",
      go = "ご",
      sa = "さ",
      si = "し",
      su = "す",
      se = "せ",
      so = "そ",
      za = "ざ",
      zi = "じ",
      zu = "ず",
      ze = "ぜ",
      zo = "ぞ",
      ta = "た",
      ti = "ち",
      tu = "つ",
      te = "て",
      to = "と",
      xtu = "っ",
      da = "だ",
      di = "ぢ",
      du = "づ",
      de = "で",
      ["do"] = "ど",
      na = "な",
      ni = "に",
      nu = "ぬ",
      ne = "ね",
      no = "の",
      ha = "は",
      hi = "ひ",
      hu = "ふ",
      he = "へ",
      ho = "ほ",
      ba = "ば",
      bi = "び",
      bu = "ぶ",
      be = "べ",
      bo = "ぼ",
      pa = "ぱ",
      pi = "ぴ",
      pu = "ぷ",
      pe = "ぺ",
      po = "ぽ",
      ma = "ま",
      mi = "み",
      mu = "む",
      me = "め",
      mo = "も",
      ya = "や",
      yu = "ゆ",
      yo = "よ",
      xya = "ゃ",
      xyu = "ゅ",
      xyo = "ょ",
      ra = "ら",
      ri = "り",
      ru = "る",
      re = "れ",
      ro = "ろ",
      wa = "わ",
      wo = "を",
      xwa = "ゎ",
      n = "ん"
    }
    
    --this function creates a map of every possible kana combination and its romanisation output
    --it does things twice for hiragana and katakana. maybe we should be converting?
    --mixed kana yields mixed results. avoid mixing kana
    local inputs = {}

    local function newinput(c, v, priority)
        if not v then v = "x" end
        local thisinput = { consonant = c, vowel = v, combo = {}, priority = priority==true or priority==nil, sokuoncon = data.sokuoncons[c], nasalcon = data.nasal[c] }
        if not thisinput.sokuoncon then
            if c == "~" or c == "x" then
                thisinput.sokuoncon = "—"
            else
                thisinput.sokuoncon = c:sub(1,1)
            end
        end
        if not thisinput.nasalcon then
            thisinput.nasalcon = "n"
        end
        if data.choonpuvowels[v] then
            thisinput.combo["ー"] = newinput(c, data.choonpuvowels[v])
        end
        if data.vowelcombo[v] then
            for nextkana, newvowel in pairs(data.vowelcombo[v]) do
                thisinput.combo[katakana[nextkana]] = newinput(c, newvowel, false)
                thisinput.combo[hiragana[nextkana]] = newinput(c, newvowel, false)
            end
        end
        if data.extendcombo[v] then
            for nextkana, newvowel in pairs(data.extendcombo[v]) do
                thisinput.combo[katakana[nextkana]] = newinput(c, newvowel)
                thisinput.combo[hiragana[nextkana]] = newinput(c, newvowel)
            end
        end
        if data.yooncons[c .. v] then
            for nextkana, newvowel in pairs(data.yoonvowels) do
                thisinput.combo[katakana[nextkana]] = newinput(data.yooncons[c .. v], newvowel)
                thisinput.combo[hiragana[nextkana]] = newinput(data.yooncons[c .. v], newvowel)
            end
        end

        return thisinput
    end
    
    local script = { katakana, hiragana }
    for i=1, #script do for k, v in pairs(script[i]) do
        local cons = k:sub(1,1)
        local vowel = k:sub(2)
        
        if k == "xtu" then
            vowel = "x"
            cons = "—"
        elseif cons == "x" then
            cons = "~"
            --vowel = "x"
        elseif k == "n" then
            cons = "n"
            vowel = "x"
        elseif #k==1 then
            vowel = cons
            cons = "x"
        elseif data.specialcons[k] then
            cons = data.specialcons[k]
        end
        
        inputs[v] = newinput(cons, vowel)
        
        --specific kana combinations
        if data.kanacombo[k] then
            for k2, cv in pairs (data.kanacombo[k]) do
                inputs[v].combo[script[i][k2]] = newinput(cv[1], cv[2])
            end
        end
    end end

    return {
        inputs = inputs,
        particles = data.particles
    }

end

return p
-- </nowiki>