Task 13 is a single use task that removes or replaces deprecated |subscription=
and |registrarion=
parameters in existing cs1|2 templates.
cs1|2 has deprecated |subscription=
and |registration=
at this RFC (aspect B3). This task:
|subscription=
and |registration=
parameters that are not assigned one of the three allowed values (yes
, y
, true
)|url=
, |article-url=
, |chapter-url=
, |entry-url=
, |section-url=
(the url list)|doi=
, |DOI=
, |jstor=
, |JSTOR=
, |bibcode=
, |hdl=
, |HDL=
, |ol=
, |OL=
, |osti=
, |OSTI=
(the identifier list)|subscription=
and |registration=
parameters when the citation template does not have any of the url list parameters; cs1|2 identifier parameters are presumed to lie behind a paywall or registration barrier; cs1|2 does not highlight the norm so |subscription=
and |registration=
are superfluous in these citation templates|subscription=
and |registration=
with the appropriate |<xxx->url-access=
parameter when the citation template holds only one of the url list parametersTask 13 skips pages that include ((bots|deny=Monkbot13))
.
Empty |subscription=
and |registration=
parameters are deleted. This task does not do awb general fixes.
// this script removes / replaces deprecated |subscription= and |registration= parameters from cs1|2 templates
//
// to make a list for awb use category: CS1 errors: deprecated parameters
string IS_CS1 = @"(?:[Cc]ite\s*(?=(?:AV media(?: notes)?)|[Aa][Vv] media|[Aa][Vv] media notes|article|ar[Xx]iv|biorxiv|book|conference|document|encyclopa?edia|episode|interview|journal|magazine|mailing ?list|manual|(?:news(?!group|paper))|paper|podcast|press release|report|serial|sign|speech|techreport|thesis|video|web)|[Cc]itation|[Cc]ite(?=\s*\|))";
bool gSkip_subscription = true; // presume that we will skip this page
bool gSkip_registration = true;
string[] url_params = { "url", "article-url", "chapter-url", "entry-url", "section-url"};
//---------------------------< P R O C E S S A R T I C L E >--------------------------------------------------
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
Skip = false;
// gSkip_subscription = false; // debug; for now we will not skip anything
// gSkip_registration = false;
string pattern;
//---------------------------< E M P T I E S >----------------------------------------------------------------
// delete empty |subscription= and |registration= parameters
ArticleText = Regex.Replace(ArticleText, @"\| *subscription *=\s*([\|\}])", "$1");
ArticleText = Regex.Replace(ArticleText, @"\| *registration *=\s*([\|\}])", "$1");
//---------------------------< H I D E >----------------------------------------------------------------------
// HIDE TEMPLATES: find templates that are not CS1; replace the opening (( with __0P3N__ and the closing )) with __CL0S3__
while (Regex.Match (ArticleText, @"\{\{(?!\s*" + IS_CS1 + @")([^\{\}]*)\}\}").Success)
{
ArticleText = Regex.Replace(ArticleText, @"\{\{(?!\s*" + IS_CS1 + @")([^\{\}]*)\}\}", "__0P3N__$1__CL0S3__");
}
// wikilinks with parenthetical disambiguation
pattern = @"\[\[([^\|\]]+) +\(([^\)\|]+)\)\|([^\]]+)\]\]";
ArticleText = Regex.Replace(ArticleText, pattern, "__WL_0P3N__$1__D4B_O__$2__D4B_C____P1P3__$3__WL_CL0S3__");
// link label wikilinks
pattern = @"\[\[([^\|\]]+)\|([^\]]+)\]\]";
ArticleText = Regex.Replace(ArticleText, pattern, "__WL_0P3N__$1__P1P3__$2__WL_CL0S3__");
//---------------------------< S U B S C R I P T I O N >------------------------------------------------------
ArticleText = sup_reg_common (ArticleText, "subscription");
//---------------------------< R E G I S T R A T I O N >------------------------------------------------------
ArticleText = sup_reg_common (ArticleText, "registration");
//---------------------------< U N H I D E >------------------------------------------------------------------
// UNHIDE: replace __WL_0P3N__ with [[, __P1P3__ with |, __WL_CL0S3__ with ]]
ArticleText = Regex.Replace(ArticleText, @"__WL_0P3N__", "[[");
ArticleText = Regex.Replace(ArticleText, @"__D4B_O__", " ("); // make sure that there is a space before the '('
ArticleText = Regex.Replace(ArticleText, @"__D4B_C__", ")");
ArticleText = Regex.Replace(ArticleText, @"__P1P3__", "|");
ArticleText = Regex.Replace(ArticleText, @"__WL_CL0S3__", "]]");
// UNHIDE: replace __0P3N__ with ((
ArticleText = Regex.Replace(ArticleText, @"__0P3N__", "((");
// UNHIDE: replace __CL0S3__ with ))
ArticleText = Regex.Replace(ArticleText, @"__CL0S3__", "))");
Skip = gSkip_subscription && gSkip_registration;
// Summary = "[[User:Monkbot/task_13: remove replace deprecated subscription registration parameters|Task 13]]: (developmental testing): ";
Summary = "[[User:Monkbot/task_13: remove replace deprecated subscription registration parameters|Task 13]]: ([[Wikipedia:Bots/Requests_for_approval/Monkbot_13|BRFA testing]]): ";
// Summary = "[[User:Monkbot/task_13: remove replace deprecated subscription registration parameters|Task 13]]: ";
if (!gSkip_subscription && !gSkip_registration)
Summary = Summary + "Fix deprecated |subscription= and |registration= in cs1|2 templates;";
else if (!gSkip_subscription)
Summary = Summary + "Fix deprecated |subscription= in cs1|2 templates;";
else
Summary = Summary + "Fix deprecated |registration= in cs1|2 templates;";
gSkip_subscription = true; // reset
gSkip_registration = true;
return ArticleText;
}
//---------------------------< S U P _ R E G _ C O M M O N >--------------------------------------------------
string sup_reg_common (string ArticleText, string sr_param)
{
string pattern = @"(\{\{\s*" + IS_CS1 + @"[^\}]*)\|\s*" + sr_param + @"\s*=\s*(?:yes|true|y)([^\}]*)";
ArticleText = Regex.Replace(ArticleText, pattern,
delegate(Match match)
{
string raw_capture = match.Groups[0].Value; // the whole captured citation
string raw_prefix = match.Groups[1].Value; // citation template up to the start of |subscription=
string raw_postfix = match.Groups[2].Value; // citation after |subscription=
int url_count = 0; // number of url-holding parameters to which |subscription= might apply
string url_param = @""; // will be assigned the last-found url-holding parameter name
foreach (string param in url_params)
{
pattern = @"\|\s*" + param + @"\s*=\s*[^\|\}]"; // just looking for url-holding parameter with something in it
if (Regex.Match (raw_capture, pattern).Success) // look in the raw capture for url-holding parameters
{
url_count++; // count this one
url_param = param; // save the parameter name
}
}
if (1 < url_count) // more than one url-holding parameter, can't know which parameter |subscription= was meant for
return raw_capture; // so do nothing
pattern = @"\|\s*(?:doi|DOI|jstor|JSTOR|bibcode|hdl|HDL|ol|OL|osti|OSTI)\s*=\s*[^\|\}]"; // access params apply to these
if (Regex.Match (raw_capture, pattern).Success) // look for identifiers that have a value to which |subscription= might apply
{
if (0 == url_count)
{
if ("subscription" == sr_param) // for edit summary
gSkip_subscription = false;
else
gSkip_registration = false;
return raw_prefix + raw_postfix; // identifier without url-holding parameter; remove |subscription= because does not apply
}
else // if here, url_count must be 1
return raw_capture; // identifier plus url-holding parameter; can't know to which |subscription= applies
}
else // no identifiers found
{
if (0 == url_count)
{
if ("subscription" == sr_param) // for edit summary
gSkip_subscription = false;
else
gSkip_registration = false;
return raw_prefix + raw_postfix; // no identifier and no url-holding parameter; remove |subscription= because does not apply
}
else // if here, url_count must be 1
{
if ("subscription" == sr_param) // for edit summary
gSkip_subscription = false;
else
gSkip_registration = false;
return raw_prefix + @"|" + url_param + @"-access=" + sr_param + @" " + raw_postfix; // replace |subscription= with |<url param>-access=subscription
}
}
});
return ArticleText;
}