Module:Unicode chart/sandbox

From Wikipedia, the free encyclopedia

local p = {}
local getArgs = require('Module:Arguments').getArgs
local yesno = require('Module:Yesno')

local mRedirect = require('Module:Redirect')

local mUnicode = require('Module:Unicode data')
local mCategory = require('Module:Unicode data/category')
local mVersion = require('Module:Unicode data/version')
local mAliases = require('Module:Unicode data/aliases')

local frame

-------------------
-- General settings
-------------------
local pdfLink = "[https://www.unicode.org/charts/PDF/%s.pdf"
			.. " Official Unicode Consortium code chart] (PDF)"
local cellType = {
	reserved = {
		note = "Grey areas indicate non-assigned code points",
		flag = false
	},
	noncharacter = {
		note = "Black areas indicate [[Universal Character Set characters#Noncharacters|noncharacters]] (code points that are guaranteed never to be assigned as encoded characters in the Unicode Standard)",
		flag = false
	}
}

local hardcodedNumberedAbbrSets = {
	-- Block: Variation Selectors
	{first = 0xFE00, last = 0xFE0F, str = "VS<br>", startNum = 1},
	-- Block: Variation Selectors Supplement
	{first = 0xE0100, last = 0xE01EF, str = "VS<br>", startNum = 17},
	-- Block: Sutton SignWriting
	-- SIGNWRITING FILL MODIFIER-2 -> SW F#
	{first = 0x1DA9B, last = 0x1DA9F, str = 'SW<br>F', startNum = 2},
	-- Block: Sutton SignWriting
	-- SIGNWRITING ROTATION MODIFIER-2 -> SW R#
	{first = 0x1DAA1, last = 0x1DAAF, str = 'SW<br>R', startNum = 2},
}

local specialFormatSets = {
	--Unicode block: Tags
	--tag for character -> character
	{first = 0xE0021, last = 0xE007E,
		func = function(codepoint, abbr)
			return '&#x'.. string.format("%04X", (codepoint - 0xE0000)) .. ';'
		end
	},
}

-------------------------
-- pseudo-object oriented
-------------------------
function newCodepoint(x)
	if type(x) == "string" then
		return {
			hex = x,
			int = tonumber(x, 16)
		}
	elseif type(x) == "number" then
		return {
			int = x,
			hex = string.format("%04X", x)
		}
	end
end

-------------------------
-- Sundry small functions
-------------------------
local function expandTemplate(template, argslist)
	return frame:expandTemplate{
		title = template,
		args = argslist
	}
end

local function fromHex(hexStr)
	return tonumber(hexStr, 16)
end

local function splitColonList(strList)
	local tab  = {}
	local segments = mw.text.split(strList, '[;\n\t]')
	for _,v in pairs(segments) do
		local tmp = mw.text.split(v, ':')
		if tmp[1] and tmp[2] then
			tab[fromHex(tmp[1])] = mw.text.trim(tmp[2])
		end
	end
	return tab
end

local function getCategory(codepoint)
	local category = mUnicode.lookup_control(codepoint.int)
	if category ~= "unassigned" then
		return category
	elseif mUnicode.is_noncharacter(codepoint.int) then
		return "noncharacter"
	else
		return "reserved"
	end
end


local function getAliasValues(n, key)
	local tbl = {}
	if mAliases[n] then
		for i,t in ipairs(mAliases[n]) do
			if(not key or t[1] == key) then
				table.insert(tbl, t[2])
			end
		end
	end
	return tbl
end

---------------------
-- A single unicode cell within the table
---------------------
local function getCellAbbr(codepoint, category, args)
	local function getHardcodedNumberedAbbr(codepoint)
		for key, value in pairs(hardcodedNumberedAbbrSets) do
			if codepoint.int >= value.first
			and codepoint.int <= value.last then
				return value.str .. (codepoint.int - value.first + value.startNum)
			end
		end
		return nil
	end
	
		--for key, value in pairs(specialFormatSets) do
		--	if codepoint.int >= value.first
		--	and codepoint.int <= value.last then
		--		return value.func(codepoint.int, alias)
		--	end
		--end
	
	local function getAliasAbbr(codepoint)
		local tbl = getAliasValues(codepoint.int, "abbreviation")
		return tbl[1] or nil
	end
	
	local function abbrFromString(codepoint, args)
		local abbr = ""
		local name = mUnicode.lookup_name(codepoint.int)
		local words = mw.text.split(name, ' ')
		for _,w in pairs(words) do
			abbr = abbr .. string.sub(w, 1, 1)
		end
		return abbr
	end

	--override
	if (args['abbr_sub'] and args['abbr_sub'][codepoint.int]) then
		return args['abbr_sub'][codepoint.int]
	end
	--exception listed at top
	local abbr1 = getHardcodedNumberedAbbr(codepoint)
	if abbr1 then return abbr1 end
	--abbr on list
	local abbr2 = getAliasAbbr(codepoint)
	if abbr2 then return abbr2 end
	--make own abbr
	if category == "control" or category == "format" then
		return '<span class="red">' .. abbrFromString(codepoint) .. '</span>'
	end
	return false
end

local function aliasesStr(codepoint)
	local aliasStr = ""
	if mAliases[codepoint.int] then
		for i,t in ipairs(mAliases[codepoint.int]) do
			aliasStr = aliasStr .. " (alias " .. t[2] .. ")"
		end
	end
	return aliasStr
end

local function linkChar(unicodeChar, codepoint, args)
	if (args['link_sub'] and args['link_sub'][codepoint.int]) then
		return '[[' .. args['link_sub'][codepoint.int]
						.. '|' .. unicodeChar .. ']]'
	elseif args['link'] == "wiki" then
		local redir = mRedirect.luaMain(unicodeChar, false)
		-- '[[' .. redir .. '|' .. unicodeChar .. ']]'
		return expandTemplate('Link if exists', {unicodeChar})
	elseif args['link'] == "wikt" then
		return '[[wikt:' .. unicodeChar .. '|' .. unicodeChar .. ']]'
	end
end

local function createCell(cell, codepoint, args)
	-- sub functions
	local function emptyCell(categoryStr)
		cellType[categoryStr].flag = true
	--	flag[categoryStr] = true
	end
	local function abbrCell(abbr)
		cell:addClass("abbr-cell")
		cell:tag("div"):addClass("abbr-box"):wikitext(abbr)
	end
	
	-- main func begins
	local category = getCategory(codepoint)
	cell:addClass(category)
	local abbr = getCellAbbr(codepoint, category, args)
	
	if category == "reserved" or category == "noncharacter" then
		emptyCell(category)
	elseif abbr then
		abbrCell(abbr)
	else
		local unicodeChar = '&#x'.. codepoint.hex .. ';'
		unicodeChar = linkChar(unicodeChar, codepoint, args) or unicodeChar
		if args['suffix'] and args['suffix'][codepoint.int] then
			unicodeChar = unicodeChar
				.. '&#x' .. args['suffix'][codepoint.int] .. ';'
			cell:addClass("modified")
		end
		if args['wrapper'] then
			unicodeChar = expandTemplate(args['wrapper'], {unicodeChar})
		elseif args['font'] then
			cell:css("font-family", "'" .. args['font'] .. "'")
			--unicodeChar = tostring(
			--	mw.html.create("div")
			--		:css("font-family", "'" .. args['font'] .. "'")
			--		:wikitext(unicodeChar)
			--)
		end
		cell:wikitext(unicodeChar)
	end
	local name = mUnicode.lookup_name(codepoint.int)
	name = string.match(name, "<([a-z]+)-%w+>") or name
	cell:attr("title",
		'U+' .. codepoint.hex ..
		': ' .. name
		.. aliasesStr(codepoint)
	)
end

---------------------
-- For loops creating the grid of cells
---------------------
local function createTableBody(body, rangeStart, rangeEnd, args)
	--0 through F label row
	local labelRow = body:tag("tr")
	labelRow:tag("th")--empty corner cell
			:css("width", "45pt")
	for colIndex=0, 15 do
		labelRow:tag("th"):wikitext(string.format("%X", colIndex))
			:css("width", "20pt")
	end

	--real body of table
	local rowStart = fromHex(rangeStart.hex:sub(1, -2))--subtract last char from string
	local rowEnd = fromHex(rangeEnd.hex:sub(1, -2))
	for rowIndex=rowStart, rowEnd do
		local rowHex = string.format("%03X", rowIndex)
		local row = body:tag("tr")
		row:tag("th"):wikitext("U+".. rowHex .. "<i>x</i>")
				:attr("rowspan", "2")
		for colIndex=0, 15 do
			local cell = row:tag("td")
			--rowHex .. string.format("%X", colIndex)
			createCell(cell,
				newCodepoint(rowIndex*16 + colIndex),
				args
			)
		end
		local subrow = body:tag("tr")
		for colIndex=0, 15 do
			subrow:tag("td"):addClass("codepoint")
				:wikitext(string.format("%04X", rowIndex*16 + colIndex))
		end
	end
end

---------------------
-- Header at top of table
---------------------
local function createTableHeader(head, name, id)
	local page = mRedirect.luaMain(name .. " (Unicode block)", false)
	head:tag("th")
		:addClass("header")
		:attr("colspan", "100%")
		:wikitext(
			"<b>[[" .. page .. "|" .. name .. "]]</b>"
			.. "<br />" .. string.format(pdfLink, id)
			.. expandTemplate('ref label', {id .. '_as_of_Unicode_version', 1})
		)
end

---------------------
-- Footer at bottom of table
---------------------
local function createTableFooter(foot, id, note)
	local th = foot:tag("th")
			:addClass("footer")
			:attr("colspan", "100%")
			:wikitext("<b>Notes</b>")
	local list = th:tag("ol")
	list:tag("li"):wikitext(
		 expandTemplate('note', {id .. '_as_of_Unicode_version'}),
		 expandTemplate(
		 	'Unicode version',
		 	{prefix= 'Asof', version= mVersion}
		 )
	)
	--Notes about categories of cells
	for key, value in pairs(cellType) do
		if value.flag then
			list:tag("li"):wikitext(value.note)
		end
	end
	--Manual note
	if note then
		list:tag("li"):wikitext(note)
	end
end

---------------------
-- Creates table
---------------------
local function createTable(rangeStart, rangeEnd, args)
	local id = 'U' .. rangeStart.hex
	
	cellType.reserved.flag = false
	cellType.noncharacter.flag = false

	local tbl = mw.html.create("table")
					:addClass("wikitable")
					:addClass("unicode-block")
	
	if args['blockname'] then
		createTableHeader(tbl, args['blockname'], id)
	end
	createTableBody(tbl, rangeStart, rangeEnd, args)
	createTableFooter(tbl, id, args['note'])
	
	return tostring(tbl)
end

---------------------
-- Main
---------------------
function p.main(frameArg)
	frame = frameArg
	local args = getArgs(frame)
	
	for _, argName in ipairs({'abbr_sub', 'link_sub', 'suffix'}) do
		if args[argName] then
			args[argName] = splitColonList(args[argName])
		end
	end
	if args['blockname'] then
		local range = mUnicode.get_block_info(args['blockname'])
		if range == nil then
			return "invalid blockname"
		end
		return createTable(
			newCodepoint(range[1]),
			newCodepoint(range[2]),
			args
		)
	elseif args['rangestart'] and args['rangeend'] then
		return createTable(
			newCodepoint(args['rangestart']),
			newCodepoint(args['rangeend']),
			args
		)
	end
end

return p