跳转到内容

模組:ZyPy/sandbox

维基百科,自由的百科全书
local p = {}

local getArgs = require('Module:Arguments').getArgs

-- ==============================================
-- 核心辅助工具函数(修复多音节、空格、连写兼容)
-- ==============================================

-- 去除首尾空格、合并多个空格、兼容全角空格
local function trim(s)
    if not s then return '' end
    s = s:gsub(' ', ' ') -- 全角空格转半角
    return s:gsub('^%s+', ''):gsub('%s+$', ''):gsub('%s+', ' ')
end

-- 拆分连写带声调拼音为单个音节(如nǐhǎo → {'nǐ', 'hǎo'})
local function splitPinyinSyllables(pinyinStr)
    pinyinStr = trim(pinyinStr)
    -- 已有空格直接拆分
    if pinyinStr:find('%s') then
        local t = {}
        for s in string.gmatch(pinyinStr, '%S+') do
            table.insert(t, s)
        end
        return t
    end
    -- 无空格连写,按带声调元音拆分音节边界
    local syllables = {}
    local toneVowelPattern = '[āáǎàōóǒòēéěèīíǐìūúǔùǖǘǚǜ]'
    local start = 1
    local len = #pinyinStr
    for i = 1, len do
        local c = pinyinStr:sub(i, i)
        if i > start and c:match(toneVowelPattern) then
            local prevChar = pinyinStr:sub(i-1, i-1)
            -- 前一个字符非元音,判定为新音节起点
            if not prevChar:match('[a-zA-Zāáǎàōóǒòēéěèīíǐìūúǔùǖǘǚǜ]') then
                table.insert(syllables, pinyinStr:sub(start, i-1))
                start = i
            end
        end
    end
    if start <= len then
        table.insert(syllables, pinyinStr:sub(start, len))
    end
    return syllables
end

-- ==============================================
-- 拼音 ↔ 注音 映射表
-- ==============================================

-- 拼音声母 → 注音
local pinyinInitialToZhuyin = {
    ['b'] = 'ㄅ', ['p'] = 'ㄆ', ['m'] = 'ㄇ', ['f'] = 'ㄈ',
    ['d'] = 'ㄉ', ['t'] = 'ㄊ', ['n'] = 'ㄋ', ['l'] = 'ㄌ',
    ['g'] = 'ㄍ', ['k'] = 'ㄎ', ['h'] = 'ㄏ',
    ['j'] = 'ㄐ', ['q'] = 'ㄑ', ['x'] = 'ㄒ',
    ['zh'] = 'ㄓ', ['ch'] = 'ㄔ', ['sh'] = 'ㄕ', ['r'] = 'ㄖ',
    ['z'] = 'ㄗ', ['c'] = 'ㄘ', ['s'] = 'ㄙ',
    ['y'] = 'ㄧ', ['w'] = 'ㄨ'
}

-- 拼音韵母 → 注音
local pinyinFinalToZhuyin = {
    ['a'] = 'ㄚ', ['o'] = 'ㄛ', ['e'] = 'ㄜ', ['ê'] = 'ㄝ',
    ['i'] = 'ㄧ', ['u'] = 'ㄨ', ['ü'] = 'ㄩ',
    ['ai'] = 'ㄞ', ['ei'] = 'ㄟ', ['ao'] = 'ㄠ', ['ou'] = 'ㄡ',
    ['an'] = 'ㄢ', ['en'] = 'ㄣ', ['ang'] = 'ㄤ', ['eng'] = 'ㄥ', ['er'] = 'ㄦ',
    ['ia'] = 'ㄧㄚ', ['ie'] = 'ㄧㄝ', ['iao'] = 'ㄧㄠ', ['iu'] = 'ㄧㄡ', ['iou'] = 'ㄧㄡ',
    ['ian'] = 'ㄧㄢ', ['in'] = 'ㄧㄣ', ['iang'] = 'ㄧㄤ', ['ing'] = 'ㄧㄥ',
    ['ua'] = 'ㄨㄚ', ['uo'] = 'ㄨㄛ', ['uai'] = 'ㄨㄞ', ['ui'] = 'ㄨㄟ', ['uei'] = 'ㄨㄟ',
    ['uan'] = 'ㄨㄢ', ['un'] = 'ㄨㄣ', ['uen'] = 'ㄨㄣ', ['uang'] = 'ㄨㄤ', ['ueng'] = 'ㄨㄥ',
    ['üe'] = 'ㄩㄝ', ['üan'] = 'ㄩㄢ', ['ün'] = 'ㄩㄣ',
    ['iong'] = 'ㄩㄥ', ['ong'] = 'ㄨㄥ'
}

-- 注音声母 → 拼音
local zhuyinInitialToPinyin = {
    ['ㄅ'] = 'b', ['ㄆ'] = 'p', ['ㄇ'] = 'm', ['ㄈ'] = 'f',
    ['ㄉ'] = 'd', ['ㄊ'] = 't', ['ㄋ'] = 'n', ['ㄌ'] = 'l',
    ['ㄍ'] = 'g', ['ㄎ'] = 'k', ['ㄏ'] = 'h',
    ['ㄐ'] = 'j', ['ㄑ'] = 'q', ['ㄒ'] = 'x',
    ['ㄓ'] = 'zh', ['ㄔ'] = 'ch', ['ㄕ'] = 'sh', ['ㄖ'] = 'r',
    ['ㄗ'] = 'z', ['ㄘ'] = 'c', ['ㄙ'] = 's',
    ['ㄧ'] = 'y', ['ㄨ'] = 'w'
}

-- 注音韵母 → 拼音
local zhuyinFinalToPinyin = {
    ['ㄚ'] = 'a', ['ㄛ'] = 'o', ['ㄜ'] = 'e', ['ㄝ'] = 'ê',
    ['ㄧ'] = 'i', ['ㄨ'] = 'u', ['ㄩ'] = 'ü',
    ['ㄞ'] = 'ai', ['ㄟ'] = 'ei', ['ㄠ'] = 'ao', ['ㄡ'] = 'ou',
    ['ㄢ'] = 'an', ['ㄣ'] = 'en', ['ㄤ'] = 'ang', ['ㄥ'] = 'eng', ['ㄦ'] = 'er',
    ['ㄧㄚ'] = 'ia', ['ㄧㄝ'] = 'ie', ['ㄧㄠ'] = 'iao', ['ㄧㄡ'] = 'iu',
    ['ㄧㄢ'] = 'ian', ['ㄧㄣ'] = 'in', ['ㄧㄤ'] = 'iang', ['ㄧㄥ'] = 'ing',
    ['ㄨㄚ'] = 'ua', ['ㄨㄛ'] = 'uo', ['ㄨㄞ'] = 'uai', ['ㄨㄟ'] = 'ui',
    ['ㄨㄢ'] = 'uan', ['ㄨㄣ'] = 'un', ['ㄨㄤ'] = 'uang', ['ㄨㄥ'] = 'ueng',
    ['ㄩㄝ'] = 'üe', ['ㄩㄢ'] = 'üan', ['ㄩㄣ'] = 'ün',
    ['ㄩㄥ'] = 'iong', ['ㄨㄥ'] = 'ong'
}

-- 带声调元音 → 元音+数字声调
local vowelToneMap = {
    ['ā'] = {'a', '1'}, ['á'] = {'a', '2'}, ['ǎ'] = {'a', '3'}, ['à'] = {'a', '4'},
    ['ō'] = {'o', '1'}, ['ó'] = {'o', '2'}, ['ǒ'] = {'o', '3'}, ['ò'] = {'o', '4'},
    ['ē'] = {'e', '1'}, ['é'] = {'e', '2'}, ['ě'] = {'e', '3'}, ['è'] = {'e', '4'},
    ['ī'] = {'i', '1'}, ['í'] = {'i', '2'}, ['ǐ'] = {'i', '3'}, ['ì'] = {'i', '4'},
    ['ū'] = {'u', '1'}, ['ú'] = {'u', '2'}, ['ǔ'] = {'u', '3'}, ['ù'] = {'u', '4'},
    ['ǖ'] = {'ü', '1'}, ['ǘ'] = {'ü', '2'}, ['ǚ'] = {'ü', '3'}, ['ǜ'] = {'ü', '4'}
}

-- 数字声调 → 注音声调符号
local toneMap = {['1'] = 'ˉ', ['2'] = 'ˊ', ['3'] = 'ˇ', ['4'] = 'ˋ', ['5'] = '˙'}

-- 元音+数字声调 → 带声调元音
local toneVowelMap = {
    ['a'] = {'ā', 'á', 'ǎ', 'à', 'a'},
    ['o'] = {'ō', 'ó', 'ǒ', 'ò', 'o'},
    ['e'] = {'ē', 'é', 'ě', 'è', 'e'},
    ['i'] = {'ī', 'í', 'ǐ', 'ì', 'i'},
    ['u'] = {'ū', 'ú', 'ǔ', 'ù', 'u'},
    ['ü'] = {'ǖ', 'ǘ', 'ǚ', 'ǜ', 'ü'}
}

-- ==============================================
-- 拼音 → 注音 核心转换(支持多音节、连写、空格)
-- ==============================================

-- 提取拼音声调(返回:无声调拼音 + 数字声调)
local function extractTone(pinyin)
    local tone = '5'
    local clean = pinyin
    -- 匹配带声调元音
    for v, info in pairs(vowelToneMap) do
        if clean:find(v) then
            clean = clean:gsub(v, info[1])
            tone = info[2]
            break
        end
    end
    -- 匹配末尾数字声调
    if tone == '5' then
        local num = clean:match('(%d)$')
        if num and tonumber(num) >=1 and tonumber(num) <=5 then
            tone = num
            clean = clean:sub(1, -2)
        end
    end
    return clean, tone
end

-- 拼音标准化(处理jqx+ü、缩写、v代替ü)
local function normalizePinyin(pinyin)
    pinyin = pinyin:gsub('v', 'ü') -- 兼容v代替ü
    -- jqx后的ü还原
    pinyin = pinyin:gsub('^j([uü])', 'jü'):gsub('^q([uü])', 'qü'):gsub('^x([uü])', 'xü')
    pinyin = pinyin:gsub('^jue', 'jüe'):gsub('^que', 'qüe'):gsub('^xue', 'xüe')
    pinyin = pinyin:gsub('^juan', 'jüan'):gsub('^quan', 'qüan'):gsub('^xuan', 'xüan')
    pinyin = pinyin:gsub('^jun', 'jün'):gsub('^qun', 'qün'):gsub('^xun', 'xün')
    -- 缩写还原
    pinyin = pinyin:gsub('iu$', 'iou'):gsub('ui$', 'uei'):gsub('un$', 'uen')
    -- y/w改写
    pinyin = pinyin:gsub('^y([aoeiuü])', 'i%1'):gsub('^y$', 'i')
    pinyin = pinyin:gsub('^w([aoeiuü])', 'u%1'):gsub('^w$', 'u')
    return pinyin
end

-- 分割拼音声母韵母
local function splitInitialFinal(pinyin)
    local initial, final = '', ''
    -- 优先匹配双字母声母zh/ch/sh
    if pinyin:sub(1,2) == 'zh' or pinyin:sub(1,2) == 'ch' or pinyin:sub(1,2) == 'sh' then
        initial = pinyin:sub(1,2)
        final = pinyin:sub(3)
    elseif pinyinInitialToZhuyin[pinyin:sub(1,1)] then
        initial = pinyin:sub(1,1)
        final = pinyin:sub(2)
    else
        final = pinyin -- 零声母
    end
    return initial, final
end

-- 对外暴露的拼音转注音函数(支持多音节)
local function pinyinToZhuyin(pinyinStr)
    if not pinyinStr or pinyinStr == '' then return '' end
    local syllables = splitPinyinSyllables(pinyinStr)
    local result = {}
    for _, syl in ipairs(syllables) do
        local clean, tone = extractTone(syl)
        clean = normalizePinyin(clean)
        local initial, final = splitInitialFinal(clean)
        local zhu = (pinyinInitialToZhuyin[initial] or '') .. (pinyinFinalToZhuyin[final] or '')
        -- 轻声符号在前,其他声调在后
        local converted = tone == '5' and '˙' .. zhu or zhu .. toneMap[tone]
        table.insert(result, converted)
    end
    return table.concat(result, ' ')
end

-- ==============================================
-- 注音 → 拼音 核心转换(支持多音节、空格)
-- ==============================================

-- 提取注音声调(返回:无声调注音 + 数字声调)
local function extractZhuyinTone(zhuyin)
    local tone = '5'
    local clean = zhuyin
    -- 匹配前置轻声符号
    if clean:sub(1,1) == '˙' then
        clean = clean:sub(2)
    else
        -- 匹配后置声调符号
        local lastChar = clean:sub(-1)
        if lastChar == 'ˉ' then tone = '1'
        elseif lastChar == 'ˊ' then tone = '2'
        elseif lastChar == 'ˇ' then tone = '3'
        elseif lastChar == 'ˋ' then tone = '4' end
        if tone ~= '5' then clean = clean:sub(1, -2) end
    end
    return clean, tone
end

-- 分割注音声母韵母
local function splitZhuyinInitialFinal(zhuyin)
    local initials = 'ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄧㄨ'
    local firstChar = zhuyin:sub(1,1)
    if initials:find(firstChar, 1, true) then
        return firstChar, zhuyin:sub(2)
    else
        return '', zhuyin -- 零声母
    end
end

-- 拼音还原(处理jqx+ü去点、y/w改写)
local function normalizePinyinBack(pinyin, zhuyinInitial)
    -- 缩写还原
    pinyin = pinyin:gsub('iou$', 'iu'):gsub('uei$', 'ui'):gsub('uen$', 'un')
    -- jqx后的ü去点
    if zhuyinInitial == 'ㄐ' or zhuyinInitial == 'ㄑ' or zhuyinInitial == 'ㄒ' then
        pinyin = pinyin:gsub('ü', 'u')
    end
    -- 零声母y/w改写
    if zhuyinInitial == '' then
        pinyin = pinyin:gsub('^i([aoeiuü])', 'y%1'):gsub('^i$', 'yi')
        pinyin = pinyin:gsub('^u([aoeiuü])', 'w%1'):gsub('^u$', 'wu')
        pinyin = pinyin:gsub('^ü', 'yu')
    end
    return pinyin
end

-- 给拼音加声调符号(严格遵循汉语拼音标调规则)
local function addToneMark(pinyin, tone)
    if tone == '5' then return pinyin end
    local toneNum = tonumber(tone)
    local targetVowel, pos = '', 0
    -- 标调优先级:a > o > e > i/u并列标在后
    if pinyin:find('a') then targetVowel, pos = 'a', pinyin:find('a')
    elseif pinyin:find('o') then targetVowel, pos = 'o', pinyin:find('o')
    elseif pinyin:find('e') then targetVowel, pos = 'e', pinyin:find('e')
    elseif pinyin:find('iu') then targetVowel, pos = 'u', pinyin:find('u')
    elseif pinyin:find('ui') then targetVowel, pos = 'i', pinyin:find('i')
    elseif pinyin:find('i') then targetVowel, pos = 'i', pinyin:find('i')
    elseif pinyin:find('u') then targetVowel, pos = 'u', pinyin:find('u')
    elseif pinyin:find('ü') then targetVowel, pos = 'ü', pinyin:find('ü')
    end
    if targetVowel == '' then return pinyin end
    return pinyin:sub(1, pos-1) .. toneVowelMap[targetVowel][toneNum] .. pinyin:sub(pos+1)
end

-- 对外暴露的注音转拼音函数(支持多音节)
local function zhuyinToPinyin(zhuyinStr)
    if not zhuyinStr or zhuyinStr == '' then return '' end
    zhuyinStr = trim(zhuyinStr)
    -- 按空格拆分音节
    local syllables = {}
    for s in string.gmatch(zhuyinStr, '%S+') do
        table.insert(syllables, s)
    end
    local result = {}
    for _, syl in ipairs(syllables) do
        local clean, tone = extractZhuyinTone(syl)
        local initial, final = splitZhuyinInitialFinal(clean)
        local pin = (zhuyinInitialToPinyin[initial] or '') .. (zhuyinFinalToPinyin[final] or '')
        pin = normalizePinyinBack(pin, initial)
        pin = addToneMark(pin, tone)
        table.insert(result, pin)
    end
    return table.concat(result, ' ')
end

-- ==============================================
-- 原模板核心逻辑(保留原有功能,新增自动互转)
-- ==============================================

function p.main(frame)
    local args = getArgs(frame, { wrappers = 'Template:Zy' })
    return p._main(args)
end

function p.nocat(frame)
    local args = getArgs(frame, { wrappers = 'Template:ZyPy' })
    args.nocat = true
    return p._main(args)
end

function p._main(args)
    -- 核心自动互转逻辑
    local inputPinyin = args[2]
    local inputZhuyin = args[3]
    -- 有拼音无注音 → 自动生成注音
    if inputPinyin and not inputZhuyin then
        args[3] = pinyinToZhuyin(inputPinyin)
    -- 有注音无拼音 → 自动生成拼音
    elseif inputZhuyin and not inputPinyin then
        args[2] = zhuyinToPinyin(inputZhuyin)
    end

    local pinyin   = args[2] and mw.text.tag{ name = 'span', attrs = { lang = 'zh-Latn-pinyin' }, content = args[2] }
    local zhuyin   = args[3] and mw.text.tag{ name = 'span', attrs = { lang = 'zh-Bopo' }, content = args[3] }
    local jyutping = args[4] and mw.text.tag{ name = 'span', attrs = { lang = 'yue-Latn-jyutping' }, content = args[4] }

    if not pinyin and not zhuyin then
        return args[1]
    end

    local ruby = mw.html.create('ruby'):addClass('zy')
    ruby:wikitext(args[1])
    ruby:tag('rp'):wikitext('(')
    ruby:tag('rt'):wikitext(string.format(
        '-{zh-hans:%s;zh-hant:%s;zh-tw:%s;zh-hk:%s;}-',
        pinyin or zhuyin,
        pinyin or zhuyin,
        zhuyin or pinyin,
        jyutping or pinyin or zhuyin
    ))
    ruby:tag('rp'):wikitext(')')

    local rubyText = tostring(ruby)

    -- 仅在部分变体下注音
    local v = args.v
    local t = { hans = 'tw', hant = 'cn' }
    if v and t[v] then
        rubyText = string.format(
            '-{zh-%s:-{zh;zh-hans;zh-hant|%s}-;zh-%s:%s;}-',
            v, rubyText, t[v], args[1]
        )
    end

    if args.nocat then
        return rubyText
    end

    -- 维护分类
    local getCategoryName = require('Module:Lang')._category_from_tag
    local result = { rubyText }
    if mw.title.getCurrentTitle().namespace == 0 then
        if pinyin then
            table.insert(result, '[[')
            table.insert(result, getCategoryName({ 'zh-Latn-pinyin' }))
            table.insert(result, ']]')
        end
        if zhuyin then
            table.insert(result, '[[')
            table.insert(result, getCategoryName({ 'zh-Bopo' }))
            table.insert(result, ']]')
        end
    end
    local count = (pinyin and 1 or 0) + (zhuyin and 1 or 0) + (jyutping and 1 or 0)
    if count == 3 then
        table.insert(result, '[[Category:使用了三种注音方式的页面]]')
    elseif count == 2 then
        table.insert(result, '[[Category:使用了两种注音方式的页面]]')
    else
        table.insert(result, '[[Category:使用了一种注音方式的页面]]')
    end

    return table.concat(result)
end

-- 单独暴露转换函数,方便调试
p.pinyinToZhuyin = pinyinToZhuyin
p.zhuyinToPinyin = zhuyinToPinyin

return p