Модуль:character info
Для документации этого модуля может быть создана страница Модуль:character info/Документация
--[[
https://en.wiktionary.org/wiki/Module:character_info
This module is in beta stage.
Its interface has been stabilised, but the module may still contain errors. Do not deploy widely until the module has been tested.
This module is intended to replace {{character info}} and templates which are implemented in terms of it.
]]
local m_unicode = require('Module:Unicode data')
local m_uni_aliases = mw.loadData('Module:Unicode data/aliases')
local m_scripts = require("Module:scripts/data")
local export = {}
local dingbat_scripts = {
["Zsym"] = true;
["Zmth"] = true;
["Zyyy"] = true;
}
function export.exotic_symbol_warning(frame)
local title = mw.title.getCurrentTitle()
if title.exists then
return ""
end
if mw.ustring.len(title.fullText) ~= 1 then
return ""
end
local codepoint = mw.ustring.codepoint(title.fullText)
local script_code = m_unicode.get_script(codepoint)
if dingbat_scripts[script_code] then
return frame:expandTemplate { title = "editnotice-exotic symbols" }
end
return ""
end
function export.show(frame)
local args = frame:getParent().args
local codepoint = args.codepoint or "";
local image = args.image or ""
if codepoint ~= "" then
codepoint = tonumber(codepoint) or mw.text.decode(codepoint)
if (type(codepoint) == "string") and (mw.ustring.len(codepoint) == 1) then
codepoint = mw.ustring.codepoint(codepoint)
elseif type(codepoint) ~= "number" then
error("Unrecognised string given for the codepoint parameter")
end
else
local title = mw.title.getCurrentTitle()
if title.fullText == frame:getParent():getTitle() then
codepoint = 0xfffd
elseif mw.ustring.len(title.fullText) == 1 then
codepoint = mw.ustring.codepoint(title.fullText)
else
if title.nsText == "Template" then return "" end
error("Page title is not a single Unicode character")
end
end
local table_markup = {}
table.insert(table_markup,
'{| style="border:1px solid #aaa; border-spacing:5px; background-color:#f9f9f9; color:black; margin:0 0 0.5em 0.5em; padding:4px; float:right; clear:right; width:260px; text-align:left; font-size:90%; line-height:1.5em;"\n')
if image ~= "" then
if not image:match("\127") then -- <hiero> tags generate these; pass them through
if image:match("^%[?%[?File:") or image:match("^%[?%[?Image:") then
image = image:gsub("^%[%[", ""):gsub("^File:", ""):gsub("^Image:", ""):gsub("|.*", ""):gsub("]]", "")
end
image = "[[File:" .. image .. "|280x200px]]"
end
table.insert(table_markup,
('|-\n| colspan="2" style="text-align: center;" | %s<br/>%s\n'):format(
image, args.caption or ""
)
)
elseif args.caption then
table.insert(table_markup,
('|-\n| colspan="2" style="text-align: center;" | %s\n'):format(
args.caption
)
)
end
local script_code = args.sc or m_unicode.get_script(codepoint)
local script_data = m_scripts[script_code]
local script_name = script_data.canonicalName
local NAMESPACE = mw.title.getCurrentTitle().namespace
local cat_name
if not args.nocat and ((NAMESPACE == 0) or (NAMESPACE == 100)) then -- main and Appendix
if script_data.character_category ~= nil then
-- false means no category, overriding the default below
cat_name = script_data.character_category or nil
elseif script_name then
cat_name = script_name .. " script characters"
end
end
table.insert(table_markup,
('|-\n! Character\n| <bdi class="character-sample %s">&#%u;</bdi>\n'):format(
script_code, codepoint
)
)
table.insert(table_markup,
('|-\n! Unicode name\n| style="font-size: smaller;" | %s\n'):format(
mw.text.encode(m_unicode.lookup_name(codepoint))
)
)
local aliases = m_uni_aliases[codepoint]
if aliases then
local classif = {}
for i, alias in ipairs(aliases) do
if not classif[alias[1]] then
classif[alias[1]] = {}
end
table.insert(classif[alias[1]], mw.text.encode(alias[2]))
end
if classif.correction then
for i, name in ipairs(classif.correction) do
table.insert(table_markup,
('|-\n! Corrected name\n| style="font-size: smaller;" | %s\n'):format(
name
)
)
end
end
if classif.abbreviation then
table.insert(table_markup,
('|-\n! Abbreviation(s)\n| | %s\n'):format(
table.concat(classif.abbreviation, ", ")
)
)
end
if classif.alternate then
for i, name in ipairs(classif.alternate) do
table.insert(table_markup,
('|-\n! Alternative name\n| style="font-size: smaller;" | %s\n'):format(
name
)
)
end
end
end
table.insert(table_markup,
('|-\n! Code point\n| [http://unicode.org/cldr/utility/character.jsp?a=%.4X U+%.4X]\n'):format(
codepoint, codepoint
)
)
local codepointstr = ('%.4X'):format(codepoint)
table.insert(table_markup,
'|-\n! [[w:List of XML and HTML character entity references|Entity number]]\n| &#'.. tonumber(codepointstr, 16) .. ';\n'
)
local block_name = mw.text.encode(m_unicode.lookup_block(codepoint))
table.insert(table_markup,
('|-\n! Unicode block\n| [[Appendix:Unicode/%s|%s]]\n'):format(
block_name, block_name
)
)
if block_name == "Hangul Syllables" then
local m_Kore = require('Module:ko-hangul')
local li, vi, ti = m_Kore.syllable2JamoIndices(codepoint)
local initial_to_letter = { [0] =
0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314A, 0x314B,
0x314C, 0x314D, 0x314E,
}
local vowel_to_letter = { [0] =
0x314F, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156,
0x3157, 0x3158, 0x3159, 0x315A, 0x315B, 0x315C, 0x315D, 0x315E,
0x315F, 0x3160, 0x3161, 0x3162, 0x3163,
}
local final_to_letter = {
0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3139,
0x313A, 0x313B, 0x313C, 0x313D, 0x313E, 0x313F, 0x3140, 0x3141,
0x3142, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x314A, 0x314B,
0x314C, 0x314D, 0x314E, -- KIYEOK-RIEUL = ???
}
local dubeolsik = {
['ㅂ'] = 'q'; ['ㅃ'] = 'Q'; ['ㅈ'] = 'w'; ['ㅉ'] = 'W'; ['ㄷ'] = 'e';
['ㄸ'] = 'E'; ['ㄱ'] = 'r'; ['ㄲ'] = 'R'; ['ㅅ'] = 't'; ['ㅆ'] = 'T';
['ㅛ'] = 'y'; ['ㅕ'] = 'u'; ['ㅑ'] = 'i'; ['ㅐ'] = 'o'; ['ㅒ'] = 'O';
['ㅔ'] = 'p'; ['ㅖ'] = 'P'; ['ㅁ'] = 'a'; ['ㄴ'] = 's'; ['ㅇ'] = 'd';
['ㄹ'] = 'f'; ['ㅎ'] = 'g'; ['ㅗ'] = 'h'; ['ㅓ'] = 'j'; ['ㅏ'] = 'k';
['ㅣ'] = 'l'; ['ㅋ'] = 'z'; ['ㅌ'] = 'x'; ['ㅊ'] = 'c'; ['ㅍ'] = 'v';
['ㅠ'] = 'b'; ['ㅜ'] = 'n'; ['ㅡ'] = 'm';
['ㅘ'] = 'h-k'; ['ㅙ'] = 'h-o'; ['ㅚ'] = 'h-l'; ['ㅝ'] = 'n-j';
['ㅞ'] = 'n-p'; ['ㅟ'] = 'n-l'; ['ㅢ'] = 'm-l';
-- XXX: "composite" letters; should they be further decomposed?
['ㄶ'] = 's-g'; ['ㄽ'] = 'f-t'; ['ㄵ'] = 's-w';
}
table.insert(table_markup,
((ti ~= 0) and
'|-\n! Composition\n| <big class="Kore" lang="">[[&#%u;]] + [[&#%u;]] + [[&#%u;]]</big>\n' or
'|-\n! Composition\n| <big class="Kore" lang="">[[&#%u;]] + [[&#%u;]]</big>\n'):format(
initial_to_letter[li],
vowel_to_letter[vi],
final_to_letter[ti]
)
)
table.insert(table_markup,
((ti ~= 0) and
'|-\n! \'\'[[w:Keyboard_layout#Hangul|Dubeolsik]]\'\' input\n| %s-%s-%s\n' or
'|-\n! \'\'[[w:Keyboard_layout#Hangul|Dubeolsik]]\'\' input\n| %s-%s\n'):format(
dubeolsik[mw.ustring.char(initial_to_letter[li])],
dubeolsik[mw.ustring.char(vowel_to_letter[vi])],
dubeolsik[mw.ustring.char(final_to_letter[ti] or 0)] -- or 0 to silence an error
)
)
else
local nfd = mw.ustring.toNFD(mw.ustring.char(codepoint))
if mw.ustring.len(nfd) ~= 1 then
local compo = {}
for nfdcp in mw.ustring.gcodepoint(nfd) do
if m_unicode.is_combining(nfdcp) then
-- XXX: convert to a spacing form
table.insert(compo, ('<bdi class="character-sample">[[&#%u;]]</bdi> (U+%04X)'):format(nfdcp, nfdcp))
else
table.insert(compo, ('<bdi class="character-sample">[[&#%u;]]</bdi> (U+%04X)'):format(nfdcp, nfdcp))
end
end
compo = table.concat(compo, " +<br/>")
table.insert(table_markup,
('|-\n! Composition \n| %s\n'):format(compo)
)
end
end
if args.langs then
table.insert(table_markup,
('|-\n! Used in languages\n| %s\n'):format(
args.langs
)
)
end
if args.standards then
table.insert(table_markup,
('|-\n! In other standards\n| %s\n'):format(
args.standards
)
)
end
if args.latex then
local latex, n = { '<code>' .. args.latex .. '</code>' }, 2
while args["latex" .. n] do
table.insert(latex, '<code>' .. args["latex" .. n] .. '</code>')
n = n + 1
end
table.insert(table_markup,
('|-\n! LaTeX input\n| %s\n'):format(
table.concat(latex, ", ")
)
)
end
-- [[ Egyptian Hieroglyphs
if args.gardiner then
table.insert(table_markup,
('|-\n! Gardiner number\n| [http://vincent.euverte.free.fr/Rosette/Rosette_410.php?Hiero=%s&Lang=E %s]\n'):format(
args.gardiner, args.gardiner
)
)
end
if args.mdc then
table.insert(table_markup,
('|-\n! Manual de Codage\n| %s\n'):format(
args.mdc
)
)
end
if args.egpz then
table.insert(table_markup,
('|-\n! EGPZ 1.0\n| %s\n'):format(
args.egpz
)
)
end
-- ]]
local function present_codepoint(codepoint, np)
local display = ""
local link_target
if m_unicode.is_printable(codepoint) then
link_target = m_unicode.get_entry_title(codepoint)
if not link_target or not mw.title.new(link_target).exists then
link_target = nil
end
display = ('<bdi class="character-sample %s">&#%u;</bdi>'):format(
m_unicode.get_script(codepoint), codepoint
)
end
return (
(link_target and '[[%s|<span title="%s">' or '<!-- %s --><span title="%s">') ..
(np and '<small>[U+%04X]</small> %s <!-- U+%04X --> →'
or '← <!-- U+%04X --> %s <small>[U+%04X]</small>') ..
(link_target and '</span>]]' or '</span>')
):format(
link_target or "", mw.text.encode(m_unicode.lookup_name(codepoint)),
codepoint, display, codepoint
)
end
table.insert(table_markup, (
'|-\n| colspan="2" |\n' ..
'{| style="width: 100%%;"\n' ..
'|-\n' ..
'| style="text-align: left;" | %s\n' ..
'| style="text-align: right;" | %s\n' ..
'|}\n'):format(
present_codepoint(codepoint - 1, false),
present_codepoint(codepoint + 1, true)
)
)
table.insert(table_markup, '|}')
if cat_name then
table.insert(table_markup, "[[Category:" .. cat_name .. "| " .. mw.ustring.char(codepoint) .. "]]")
end
return table.concat(table_markup)
end
return export