This is the module sandbox page for Module:TaxonItalics (diff). See also the companion subpage for test cases (run). |
Module:TaxonItalics (talk · · hist · links · doc · subpages · sandbox · testcases)
This Lua module is used on approximately 581,000 pages, or roughly 1% of all pages. To avoid major disruption and server load, any changes should be tested in the module's /sandbox or /testcases subpages, or in your own module sandbox. The tested changes can be added to this page in a single edit. Consider discussing changes on the talk page before implementing them. |
The module is primarily intended for use by the automated taxobox system. It supports the correct italicization of scientific names. Botanical (ICNafp) names may contain "connecting terms"; these must not be italicized. The hybrid symbol, ×, should also not be italicized. The module optionally wikilinks and abbreviates italicized names.
For non-virus taxa, italics are used at the rank of genus or below. The module does not decide whether a scientific name should be italicized. Use ((Is italic taxon))
for this purpose.
The parameters can be combined. It can also be used via ((Taxon italics)).
Botanical names may contain only one infraspecific epithet; a string like "Fragaria vesca subsp. vesca f. semperflorens" is a classification, not a name, and is not handled by the module.
Using |linked=yes
Using |abbreviated=yes
By default, a parenthesized part of a taxon name is assumed to be a subgenus name, and is italicized:
To treat a parenthesized part as a disambiguation term, use |dab=yes
For even more examples, see the testcases.
--[[=========================================================================
Italicize a taxon name appropriately by invoking italicizeTaxonName.
The algorithm used is:
* If the name has italic markup at the start or the end, do nothing.
* Else
* Remove (internal) italic markup.
* If the name is made up of four words and the third word is a
botanical connecting term, de-italicize the connecting term and add italic
markup to the outside of the name.
* Else if the name is made up of three words and the second word is a
botanical connecting term or a variant of "cf.", de-italicize the
connecting term and add italic markup to the outside of the name.
* Else just add italic markup to the outside of the name.
The module also:
* Ensures that the hybrid symbol, ×, and parentheses are not italicized, as
well as any string inside parentheses if dab is true.
* Has an option to abbreviate all parts of taxon names other than the last
to the first letter (e.g. "Pinus sylvestris var. sylvestris" becomes
"P. s. var. sylvestris").
* Has an option to wikilink the italicized name to the input name.
=============================================================================]]
local p = {}
local l = {} -- used to store purely local functions
--connecting terms in three part names (e.g. Pinus sylvestris var. sylvestris)
local cTerms3 = {
--subsp.
subspecies = "subsp.",
["subsp."] = "subsp.",
subsp = "subsp.",
["ssp."] = "subsp.",
ssp = "subsp.",
--var.
varietas = "var.",
["var."] = "var.",
var = "var.",
--subvar.
subvarietas = "subvar.",
["subvar."] = "subvar.",
subvar = "subvar.",
--f.
forma = "f.",
["f."] = "f.",
f = "f.",
--subf.
subforma = "subf.",
["subf."] = "subf.",
subf = "subf."
}
--connecting terms in two part names (e.g. Pinus sect. Pinus)
local cTerms2 = {
--subg.
subgenus = "subg.",
["subgen."] = "subg.",
["subg."] = "subg.",
subg = "subg.",
--supersect.
supersection = "supersect.",
["supersect."] = "supersect.",
supersect = "supersect.",
--sect.
section = "sect.",
["sect."] = "sect.",
sect = "sect.",
--subsect.
subsection = "subsect.",
["subsect."] = "subsect.",
subsect = "subsect.",
--ser.
series = "ser.",
["ser."] = "ser.",
ser = "ser.",
--subser.
subseries = "subser.",
["subser."] = "subser.",
subser = "subser.",
--cf.
cf = "cf.",
["cf."] = "cf.",
["c.f."] = "cf."
}
--[[=========================================================================
Main function to italicize a taxon name appropriately. For the purpose of the
parameters, see p.italicizeTaxonName().
=============================================================================]]
function p.main(frame)
local name = frame.args[1] or ''
local linked = frame.args['linked'] == 'yes'
local abbreviated = frame.args['abbreviated'] == 'yes'
local dab = frame.args['dab'] == 'yes'
return p.italicizeTaxonName(name, linked, abbreviated, dab)
end
--[[=========================================================================
Utility local function to abbreviate an input string to its first character
followed by ".".
Both "×" and an HTML entity at the start of the string are skipped over in
determining first character, as is an opening parenthesis and an opening ",
which cause a matching closing character to be included.
=============================================================================]]
function l.abbreviate(str)
local result = ""
local hasParentheses = false
local isQuoted = false
if mw.ustring.len(str) < 2 then
--single character strings are left unchanged
result = str
else
--skip over an opening parenthesis that could be present at the start of the string
if mw.ustring.sub(str,1,1) == "(" then
hasParentheses = true
result = "("
str = mw.ustring.sub(str,2,mw.ustring.len(str))
elseif mw.ustring.sub(str,1,1) == '"' then
isQuoted = true
result = '"'
str = mw.ustring.sub(str,2,mw.ustring.len(str))
end
--skip over a hybrid symbol that could be present at the start of the string
if mw.ustring.sub(str,1,1) == "×" then
result = "×"
str = mw.ustring.sub(str,2,mw.ustring.len(str))
end
--skip over an HTML entity that could be present at the start of the string
if mw.ustring.sub(str,1,1) == "&" then
local i,dummy = mw.ustring.find(str,";",2,plain)
result = result .. mw.ustring.sub(str,1,i)
str = mw.ustring.sub(str,i+1,mw.ustring.len(str))
end
--if there's anything left, reduce it to its first character plus ".",
--adding the closing parenthesis or quote if required
if str ~= "" then
result = result .. mw.ustring.sub(str,1,1) .. "."
if hasParentheses then result = result .. ")"
elseif isQuoted then result = result .. '"'
end
end
end
return result
end
--[[=========================================================================
The function which does the italicization. Parameters:
name (string) – the taxon name to be processed
linked (boolean) – should a wikilink be generated?
abbreviated (boolean) – should the first parts of the taxon name be
reduced to capital letters?
dab (boolean) – should any parenthesized part be treated as a disambiguation
term and left unitalicized?
=============================================================================]]
function p.italicizeTaxonName(name, linked, abbreviated, dab)
name = mw.text.trim(name)
-- if the name begins with '[', then assume formatting is present
if mw.ustring.sub(name,1,1) == '[' then return name end
-- otherwise begin by replacing any use of the HTML italic tags
-- by Wikimedia markup; replace any entity alternatives to the hybrid symbol
-- by the symbol itself; prevent the hybrid symbol being treated as
-- a 'word' by converting a following space to the HTML entity
local italMarker = "''"
name = string.gsub(mw.text.trim(name), "</?i>", italMarker)
name = string.gsub(string.gsub(name, "×", "×"), "×", "×")
name = string.gsub(name, "</?span.->", "") -- remove any span markup
name = string.gsub(name, "× ", "× ")
-- now italicize and abbreviate if required
local result = name
if name ~= '' then
if string.sub(name,1,2) == italMarker or string.sub(name,-2) == italMarker then
-- do nothing if the name already has italic markers at the start or end
else
name = string.gsub(name, italMarker, "") -- first remove any internal italics
local words = mw.text.split(name, " ", true)
if #words == 4 and cTerms3[words[3]] then
-- the third word of a four word name is a connecting term
-- ensure the connecting term isn't italicized
words[3] = '<span style="font-style:normal;">' .. cTerms3[words[3]] .. '</span>'
if abbreviated then
words[1] = l.abbreviate(words[1])
words[2] = l.abbreviate(words[2])
end
result = words[1] .. " " .. words[2] .. " " .. words[3] .. " " .. words[4]
elseif #words == 3 and cTerms2[words[2]] then
-- the second word of a three word name is a connecting term
-- ensure the connecting term isn't italicized
words[2] = '<span style="font-style:normal;">' .. cTerms2[words[2]] .. '</span>'
if abbreviated then
words[1] = l.abbreviate(words[1])
end
result = words[1] .. " " .. words[2] .. " " .. words[3]
elseif abbreviated then -- not a name as above; only deal with abbreviation
if #words > 1 then
result = l.abbreviate(words[1])
for i = 2, #words-1, 1 do
result = result .. " " .. l.abbreviate(words[i])
end
result = result .. " " .. words[#words]
end
else
result = name
end
-- deal with any hybrid symbol as it should not be italicized
result = string.gsub(result, "×", '<span style="font-style:normal;">×</span>')
-- deal with any parentheses as they should not be italicized
if dab then
result = string.gsub(string.gsub(result,"%(",'<span style="font-style:normal;">('),"%)",')</span>')
else
result = string.gsub(string.gsub(result,"%(",'<span style="font-style:normal;">(</span>'),"%)",'<span style="font-style:normal;">)</span>')
end
-- any question marks surrounded by spans can have the spans joined
result = string.gsub(result,'</span>%?<span style="font%-style:normal;">','?')
-- add outside markup
if linked then
if result ~= name then
result = "[[" .. name .. "|" .. italMarker .. result .. italMarker .. "]]"
else
result = italMarker .. "[[" .. name .. "]]" .. italMarker
end
else
result = italMarker .. result .. italMarker
end
end
end
return result
end
--[[=========================================================================
Utility function used by other modules to check if a connecting term is
present in a name. The value of name is assumed to be plain text.
=============================================================================]]
function p.hasCT(frame)
return p.hasConnectingTerm(frame.args[1] or '')
end
function p.hasConnectingTerm(name)
local words = mw.text.split(name, " ", true)
return (#words == 4 and cTerms3[words[3]])
or (#words == 3 and cTerms2[words[2]])
end
return p