Модуль:character info

Материал из Викисловаря

Для документации этого модуля может быть создана страница Модуль:character info/Документация

--[[
https://en.wiktionary.org/wiki/Module:character_info
This module is in beta stage.
Its interface has been stabilised, but the module may still contain errors. Do not deploy widely until the module has been tested.
This module is intended to replace {{character info}} and templates which are implemented in terms of it.
]]

local m_unicode = require('Module:Unicode data')
local m_uni_aliases = mw.loadData('Module:Unicode data/aliases')
local m_scripts = require("Module:scripts/data")

local export = {}

local dingbat_scripts = {
	["Zsym"] = true;
	["Zmth"] = true;
	["Zyyy"] = true;
}

function export.exotic_symbol_warning(frame)
	local title = mw.title.getCurrentTitle()
	if title.exists then
		return ""
	end
	if mw.ustring.len(title.fullText) ~= 1 then
		return ""
	end
	local codepoint = mw.ustring.codepoint(title.fullText)
	local script_code = m_unicode.get_script(codepoint)

	if dingbat_scripts[script_code] then
		return frame:expandTemplate { title = "editnotice-exotic symbols" }
	end

	return ""
end

function export.show(frame)
	local args = frame:getParent().args
	local codepoint = args.codepoint or "";
	local image = args.image or ""

	if codepoint ~= "" then
		codepoint = tonumber(codepoint) or mw.text.decode(codepoint)
		if (type(codepoint) == "string") and (mw.ustring.len(codepoint) == 1) then
			codepoint = mw.ustring.codepoint(codepoint)
		elseif type(codepoint) ~= "number" then
			error("Unrecognised string given for the codepoint parameter")
		end
	else
		local title = mw.title.getCurrentTitle()
		if title.fullText == frame:getParent():getTitle() then
			codepoint = 0xfffd
		elseif mw.ustring.len(title.fullText) == 1 then
			codepoint = mw.ustring.codepoint(title.fullText)
		else
			if title.nsText == "Template" then return "" end
			error("Page title is not a single Unicode character")
		end
	end

	local table_markup = {}
	table.insert(table_markup,
		'{| style="border:1px solid #aaa; border-spacing:5px; background-color:#f9f9f9; color:black; margin:0 0 0.5em 0.5em; padding:4px; float:right; clear:right; width:260px; text-align:left; font-size:90%; line-height:1.5em;"\n')

	if image ~= "" then
		if not image:match("\127") then -- <hiero> tags generate these; pass them through
			if image:match("^%[?%[?File:") or image:match("^%[?%[?Image:") then
				image = image:gsub("^%[%[", ""):gsub("^File:", ""):gsub("^Image:", ""):gsub("|.*", ""):gsub("]]", "")
			end
			image = "[[File:" .. image .. "|280x200px]]"
		end

		table.insert(table_markup,
			('|-\n| colspan="2" style="text-align: center;" | %s<br/>%s\n'):format(
				image, args.caption or ""
			)
		)
	elseif args.caption then
		table.insert(table_markup,
			('|-\n| colspan="2" style="text-align: center;" | %s\n'):format(
				args.caption
			)
		)
	end

	local script_code = args.sc or m_unicode.get_script(codepoint)
	local script_data = m_scripts[script_code]
	local script_name = script_data.canonicalName

	local NAMESPACE = mw.title.getCurrentTitle().namespace

	local cat_name
	if not args.nocat and ((NAMESPACE == 0) or (NAMESPACE == 100)) then -- main and Appendix
		if script_data.character_category ~= nil then
			-- false means no category, overriding the default below
			cat_name = script_data.character_category or nil
		elseif script_name then
			cat_name = script_name .. " script characters"
		end
	end

	table.insert(table_markup,
		('|-\n! Character\n| <bdi class="character-sample %s">&#%u;</bdi>\n'):format(
			script_code, codepoint
		)
	)

	table.insert(table_markup,
		('|-\n! Unicode name\n| style="font-size: smaller;" | %s\n'):format(
			mw.text.encode(m_unicode.lookup_name(codepoint))
		)
	)
	local aliases = m_uni_aliases[codepoint]

	if aliases then
		local classif = {}
		for i, alias in ipairs(aliases) do
			if not classif[alias[1]] then
				classif[alias[1]] = {}
			end
			table.insert(classif[alias[1]], mw.text.encode(alias[2]))
		end

		if classif.correction then
			for i, name in ipairs(classif.correction) do
				table.insert(table_markup,
					('|-\n! Corrected name\n| style="font-size: smaller;" | %s\n'):format(
						name
					)
				)
			end
		end

		if classif.abbreviation then
			table.insert(table_markup,
				('|-\n! Abbreviation(s)\n| | %s\n'):format(
					table.concat(classif.abbreviation, ", ")
				)
			)
		end

		if classif.alternate then
			for i, name in ipairs(classif.alternate) do
				table.insert(table_markup,
					('|-\n! Alternative name\n| style="font-size: smaller;" | %s\n'):format(
						name
					)
				)
			end
		end
	end

	table.insert(table_markup,
		('|-\n! Code point\n| [http://unicode.org/cldr/utility/character.jsp?a=%.4X U+%.4X]\n'):format(
			codepoint, codepoint
		)
	)

	local codepointstr = ('%.4X'):format(codepoint)
	table.insert(table_markup,
		'|-\n! [[w:List of XML and HTML character entity references|Entity number]]\n| &amp;#'.. tonumber(codepointstr, 16) .. ';\n'
	)

	local block_name = mw.text.encode(m_unicode.lookup_block(codepoint))
	table.insert(table_markup,
		('|-\n! Unicode block\n| [[Appendix:Unicode/%s|%s]]\n'):format(
			block_name, block_name
		)
	)

	if block_name == "Hangul Syllables" then
		local m_Kore = require('Module:ko-hangul')
		local li, vi, ti = m_Kore.syllable2JamoIndices(codepoint)
		
		local initial_to_letter = { [0] =
			0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
			0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314A, 0x314B,
			0x314C, 0x314D, 0x314E,
		}
		
		local vowel_to_letter = { [0] =
			0x314F, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156,
			0x3157, 0x3158, 0x3159, 0x315A, 0x315B, 0x315C, 0x315D, 0x315E,
			0x315F, 0x3160, 0x3161, 0x3162, 0x3163,
		}
		
		local final_to_letter = {
			0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3139, 
			0x313A, 0x313B, 0x313C, 0x313D, 0x313E, 0x313F, 0x3140, 0x3141, 
			0x3142, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x314A, 0x314B, 
			0x314C, 0x314D, 0x314E, -- KIYEOK-RIEUL = ???
		}
		
		local dubeolsik = {
			['ㅂ'] = 'q'; ['ㅃ'] = 'Q'; ['ㅈ'] = 'w'; ['ㅉ'] = 'W'; ['ㄷ'] = 'e';
			['ㄸ'] = 'E'; ['ㄱ'] = 'r'; ['ㄲ'] = 'R'; ['ㅅ'] = 't'; ['ㅆ'] = 'T'; 
			['ㅛ'] = 'y'; ['ㅕ'] = 'u'; ['ㅑ'] = 'i'; ['ㅐ'] = 'o'; ['ㅒ'] = 'O'; 
			['ㅔ'] = 'p'; ['ㅖ'] = 'P'; ['ㅁ'] = 'a'; ['ㄴ'] = 's'; ['ㅇ'] = 'd'; 
			['ㄹ'] = 'f'; ['ㅎ'] = 'g'; ['ㅗ'] = 'h'; ['ㅓ'] = 'j'; ['ㅏ'] = 'k'; 
			['ㅣ'] = 'l'; ['ㅋ'] = 'z'; ['ㅌ'] = 'x'; ['ㅊ'] = 'c'; ['ㅍ'] = 'v'; 
			['ㅠ'] = 'b'; ['ㅜ'] = 'n'; ['ㅡ'] = 'm'; 
			
			['ㅘ'] = 'h-k'; ['ㅙ'] = 'h-o'; ['ㅚ'] = 'h-l'; ['ㅝ'] = 'n-j';
			['ㅞ'] = 'n-p'; ['ㅟ'] = 'n-l'; ['ㅢ'] = 'm-l'; 
			
			-- XXX: "composite" letters; should they be further decomposed?
			['ㄶ'] = 's-g'; ['ㄽ'] = 'f-t'; ['ㄵ'] = 's-w';
		}
		
		table.insert(table_markup,
			((ti ~= 0) and
			'|-\n! Composition\n| <big class="Kore" lang="">[[&#%u;]] + [[&#%u;]] + [[&#%u;]]</big>\n' or
			'|-\n! Composition\n| <big class="Kore" lang="">[[&#%u;]] + [[&#%u;]]</big>\n'):format(
				initial_to_letter[li],
				vowel_to_letter[vi],
				final_to_letter[ti]
			)
		)

		table.insert(table_markup,
			((ti ~= 0) and
			'|-\n! \'\'[[w:Keyboard_layout#Hangul|Dubeolsik]]\'\' input\n| %s-%s-%s\n' or
			'|-\n! \'\'[[w:Keyboard_layout#Hangul|Dubeolsik]]\'\' input\n| %s-%s\n'):format(
				dubeolsik[mw.ustring.char(initial_to_letter[li])],
				dubeolsik[mw.ustring.char(vowel_to_letter[vi])],
				dubeolsik[mw.ustring.char(final_to_letter[ti] or 0)] -- or 0 to silence an error
			)
		)
	else
		local nfd = mw.ustring.toNFD(mw.ustring.char(codepoint))
		if mw.ustring.len(nfd) ~= 1 then
			local compo = {}
	
			for nfdcp in mw.ustring.gcodepoint(nfd)	do
				if m_unicode.is_combining(nfdcp) then
					-- XXX: convert to a spacing form
					table.insert(compo, ('<bdi class="character-sample">[[&#%u;]]</bdi> (U+%04X)'):format(nfdcp, nfdcp))
				else
					table.insert(compo, ('<bdi class="character-sample">[[&#%u;]]</bdi> (U+%04X)'):format(nfdcp, nfdcp))
				end
			end
			
			compo = table.concat(compo, " +<br/>")
	
			table.insert(table_markup,
				('|-\n! Composition \n| %s\n'):format(compo)
			)
		end
	end

	if args.langs then
		table.insert(table_markup,
			('|-\n! Used in languages\n| %s\n'):format(
				args.langs
			)
		)
	end

	if args.standards then
		table.insert(table_markup,
			('|-\n! In other standards\n| %s\n'):format(
				args.standards
			)
		)
	end

	if args.latex then
		local latex, n = { '<code>' .. args.latex .. '</code>' }, 2
		while args["latex" .. n] do
			table.insert(latex, '<code>' .. args["latex" .. n] .. '</code>')
			n = n + 1
		end
		table.insert(table_markup,
			('|-\n! LaTeX input\n| %s\n'):format(
				table.concat(latex, ", ")
			)
		)
	end
	
	-- [[ Egyptian Hieroglyphs
	if args.gardiner then
		table.insert(table_markup,
			('|-\n! Gardiner number\n| [http://vincent.euverte.free.fr/Rosette/Rosette_410.php?Hiero=%s&Lang=E %s]\n'):format(
				args.gardiner, args.gardiner
			)
		)
	end
	
	if args.mdc then
		table.insert(table_markup,
			('|-\n! Manual de Codage\n| %s\n'):format(
				args.mdc
			)
		)
	end
	
	if args.egpz then
		table.insert(table_markup,
			('|-\n! EGPZ 1.0\n| %s\n'):format(
				args.egpz
			)
		)
	end
	-- ]]

	local function present_codepoint(codepoint, np)
		local display = ""
		local link_target

		if m_unicode.is_printable(codepoint) then
			link_target = m_unicode.get_entry_title(codepoint)
			if not link_target or not mw.title.new(link_target).exists then
				link_target = nil
			end

			display = ('<bdi class="character-sample %s">&#%u;</bdi>'):format(
				m_unicode.get_script(codepoint), codepoint
			)
		end

		return (
			(link_target and '[[%s|<span title="%s">' or '<!-- %s --><span title="%s">') ..
			(np and '<small>[U+%04X]</small> %s <!-- U+%04X --> →'
			or '← <!-- U+%04X --> %s <small>[U+%04X]</small>') ..
			(link_target and '</span>]]' or '</span>')
		):format(
			link_target or "", mw.text.encode(m_unicode.lookup_name(codepoint)),
			codepoint, display, codepoint
		)
	end

	table.insert(table_markup, (
		'|-\n| colspan="2" |\n' ..
		'{| style="width: 100%%;"\n' ..
		'|-\n' ..
		'| style="text-align: left;"  | %s\n' ..
		'| style="text-align: right;" | %s\n' ..
		'|}\n'):format(
			present_codepoint(codepoint - 1, false),
			present_codepoint(codepoint + 1, true)
		)
	)

	table.insert(table_markup, '|}')

	if cat_name then
		table.insert(table_markup, "[[Category:" .. cat_name .. "| " .. mw.ustring.char(codepoint) .. "]]")
	end

	return table.concat(table_markup)
end

return export