Module:data consistency check
Documentation for this module may be created at Module:data consistency check/doc
local export = {}
local messages = {}
local function discrepancy(modname, ...)
if not messages[modname] then
messages[modname] = {}
end
table.insert(messages[modname], string.format(...))
end
local all_codes = {}
local language_names = {}
local family_names = {}
local script_names = {}
local nonempty_fams = {}
local nonempty_scrs = {}
local function check_languages()
local m_family_data = mw.loadData('Module:families/data')
local m_script_data = mw.loadData('Module:scripts/data')
local function link(name)
if not name then
return "???"
elseif name:find("[Ll]anguage$") then
return "[[:Category:" .. name .. "|" .. name .. "]]"
else
return "[[:Category:" .. name .. " language|" .. name .. " language]]"
end
end
local function check_language(modname, code, data)
if all_codes[code] then
discrepancy(modname, "Code <code>%s</code> is not unique, is also defined in [[Module:%s]].", code, all_codes[code])
else
all_codes[code] = modname
end
if not (data.names and data.names[1]) then
discrepancy(modname, "Code <code>%s</code> has no canonical name specified.", code)
elseif language_names[data.names[1]] then
discrepancy(modname, "%s (<code>%s</code>) has a canonical name that is not unique, it is also used by the code <code>%s</code>.", link(data.names[1]), code, language_names[data.names[1]])
else
language_names[data.names[1]] = code
end
if (data.type ~= "regular") and (data.type ~= "reconstructed") and (data.type ~= "appendix-constructed") then
discrepancy(modname, "%s (<code>%s</code>) is of an invalid type <code>%s</code>.", link(data.names and data.names[1]), code, data.type)
end
if not (data.scripts and data.scripts[1]) then
discrepancy(modname, "%s (<code>%s</code>) has no scripts listed.", link(data.names and data.names[1]), code)
else
for i, sccode in ipairs(data.scripts) do
if not m_script_data[sccode] then
discrepancy(modname, "%s (<code>%s</code>) lists an invalid script code <code>%s</code>.", link(data.names and data.names[1]), code, sccode)
end
nonempty_scrs[sccode] = true
end
end
if not m_family_data[data.family] then
discrepancy(modname, "%s (<code>%s</code>) has an invalid family code <code>%s</code>.", link(data.names and data.names[1]), code, data.family)
end
nonempty_fams[data.family] = true
end
-- Check two-letter codes
local modname = "languages/data2"
local data2 = mw.loadData("Module:" .. modname)
for code, data in pairs(data2) do
if not code:find("^[a-z][a-z]$") then
discrepancy(modname, '%s (<code>%s</code>) does not have a two-letter code.', link(data.names and data.names[1]), code)
end
check_language(modname, code, data)
end
-- Check three-letter codes
for i = string.byte('a'), string.byte('z') do
local letter = string.char(i)
local modname = "languages/data3/" .. letter
local data3 = mw.loadData("Module:" .. modname)
for code, data in pairs(data3) do
if not code:find("^" .. letter .. "[a-z][a-z]$") then
discrepancy(modname, '%s (<code>%s</code>) does not have a three-letter code starting with "<code>%s</code>".', link(data.names and data.names[1]), code, letter)
end
check_language(modname, code, data)
end
end
-- Check exceptional codes
local modname = "languages/datax"
local datax = mw.loadData("Module:" .. modname)
for code, data in pairs(datax) do
if code:find("^[a-z][a-z][a-z]?$") then
discrepancy(modname, '%s (<code>%s</code>) has a two- or three-letter code.', link(data.names and data.names[1]), code)
end
check_language(modname, code, data)
end
end
local function check_etym_languages()
local modname = "etymology language/data"
local m_etym_language_data = require("Module:" .. modname) -- no mw.loadData
local m_language_data = mw.loadData("Module:languages/alldata")
local m_family_data = mw.loadData('Module:families/data')
local function link(name)
if not name then
return "???"
elseif name:find("[Ll]anguage$") then
return name
else
return name .. " language"
end
end
for code, data in pairs(m_etym_language_data) do
if all_codes[code] then
discrepancy(modname, "Code <code>%s</code> is not unique, is also defined in [[Module:%s]].", code, all_codes[code])
else
all_codes[code] = modname
end
if not (data.names and data.names[1]) then
discrepancy(modname, "Code <code>%s</code> has no canonical name specified.", code)
elseif language_names[data.names[1]] then
--discrepancy(modname, "%s (<code>%s</code>) has a canonical name that is not unique, it is also used by the code <code>%s</code>.", link(data.names[1]), code, language_names[data.names[1]])
else
language_names[data.names[1]] = code
end
if data.parent then
if not m_language_data[data.parent] and not m_family_data[data.parent] and not m_etym_language_data[data.parent] then
discrepancy(modname, "Etymology-only %s (<code>%s</code>) has invalid parent language or family code <code>%s</code>.", link(data.names and data.names[1]), code, data.parent)
end
nonempty_fams[data.parent] = true
else
discrepancy(modname, "Etymology-only %s (<code>%s</code>) has no parent language or family code.", link(data.names and data.names[1]), code)
end
end
local checked = {}
for code, data in pairs(m_etym_language_data) do
local stack = {}
while data do
if checked[data] then
break
end
if stack[data] then
discrepancy(modname, "%s (<code>%s</code>) has a cyclic parental relationship to %s (<code>%s</code>)",
link(data.names and data.names[1]), code,
link(m_etym_language_data[data.parent].names and m_etym_language_data[data.parent].names[1]), data.parent
)
break
end
stack[data] = true
code, data = data.parent, data.parent and m_etym_language_data[data.parent]
end
for data in pairs(stack) do
checked[data] = true
end
end
end
local function check_families()
local modname = "families/data"
local m_family_data = mw.loadData("Module:" .. modname)
local function link(name)
if not name then
return "???"
elseif name:find("[Ll]anguages$") then
return "[[:Category:" .. name .. "|" .. name .. " family]]"
else
return "[[:Category:" .. name .. " languages|" .. name .. " family]]"
end
end
for code, data in pairs(m_family_data) do
if all_codes[code] then
discrepancy(modname, "Code <code>%s</code> is not unique, is also defined in [[Module:%s]].", code, all_codes[code])
else
all_codes[code] = modname
end
if not (data.names and data.names[1]) then
discrepancy(modname, "<code>%s</code> has no canonical name specified.", code)
elseif family_names[data.names[1]] then
discrepancy(modname, "%s (<code>%s</code>) has a canonical name that is not unique, it is also used by the code <code>%s</code>.", link(data.names[1]), code, family_names[data.names[1]])
else
family_names[data.names[1]] = code
end
if not data.family then
discrepancy(modname, "%s (<code>%s</code>) has no parent family specified.", link(data.names and data.names[1]), code)
elseif not m_family_data[data.family] then
discrepancy(modname, "%s (<code>%s</code>) has an invalid parent family code <code>%s</code>.", link(data.names and data.names[1]), code, data.family)
end
nonempty_fams[data.family] = true
end
for code, data in pairs(m_family_data) do
if not nonempty_fams[code] then
discrepancy(modname, "%s (<code>%s</code>) has no child families or languages.", link(data.names and data.names[1]), code)
end
end
local checked = { ['qfa-not'] = true }
for code, data in pairs(m_family_data) do
local stack = {}
while data do
if checked[code] then
break
end
if stack[code] then
discrepancy(modname, "%s (<code>%s</code>) has a cyclic parental relationship to %s (<code>%s</code>)",
link(data.names and data.names[1]), code,
link(m_family_data[data.family].names and m_family_data[data.family].names[1]), data.family
)
break
end
stack[code] = true
code, data = data.family, m_family_data[data.family]
end
for code in pairs(stack) do
checked[code] = true
end
end
end
local function check_scripts()
local modname = "scripts/data"
local m_script_data = mw.loadData("Module:" .. modname)
local function link(name)
if not name then
return "???"
elseif name:find("[Ss]cript$") then
return "[[:Category:" .. name .. "|" .. name .. "]]"
else
return "[[:Category:" .. name .. " script|" .. name .. " script]]"
end
end
for code, data in pairs(m_script_data) do
if not (data.names and data.names[1]) then
discrepancy(modname, "Code <code>%s</code> has no canonical name specified.", code)
elseif script_names[data.names[1]] then
--discrepancy(modname, "%s (<code>%s</code>) has a canonical name that is not unique, it is also used by the code <code>%s</code>.", link(data.names[1]), code, script_names[data.names[1]])
else
script_names[data.names[1]] = code
end
if not nonempty_scrs[code] then
discrepancy(modname, "%s (<code>%s</code>) is not used for any language%s.", link(data.names and data.names[1]), code, data.characters and "" or " and has no characters listed for auto-detection")
end
if data.characters then
if not pcall(mw.ustring.find, "", data.characters) then
discrepancy(modname, "%s (<code>%s</code>) specifies an invalid pattern for character detection: <code>%s</code>", link(data.names and data.names[1]), code, data.characters)
end
end
end
end
function export.perform(frame)
check_languages()
check_etym_languages()
-- families and scripts must be checked AFTER languages; languages checks fill out
-- the nonempty_fams and nonempty_scrs tables, used for testing if a family/script
-- is ever used in the data
check_families()
check_scripts()
-- Format the messages
local modnames = {}
for modname, msglist in pairs(messages) do
table.insert(modnames, modname)
messages[modname] = '\n===[[Module:' .. modname .. ']]===\n*' .. table.concat(msglist, '\n* ') .. '\n'
end
table.sort(modnames)
-- Are there any messages?
if #modnames == 0 then
return '<b class="success">Glory to Arstotzka.</b>'
else
local ret = '<b class="warning">Discrepancies detected:</b>'
for _, modname in ipairs(modnames) do
ret = ret .. messages[modname]
end
return ret
end
end
return export