#!/usr/bin/perl
# Insist that all variables be declared
use strict;
# Allow for utf8 characters
use utf8;
# Use the MediaWiki::Bot library
use MediaWiki::Bot;
# Use the Encode library
use Encode;
# Declare some global variables
my ($user, $pass) = ("","");
my ($task, $edit_summary, $is_minor) = ("","","");
my (@pagelist, @tasklist, @templatelist, @regexplist);
my ($namespace_string, $templates) = ("","");
# Set default values
$is_minor = 1; # Label edits as minor
$user = 'SporkBot';
$edit_summary = 'Orphan per [[WP:TFD|TFD outcome]]';
$task = 0;
my @tasklist = (
"Remove a template and trailing whitespace and newline",
"Replace a template and whitespace with a single space",
"Replace a template with nothing",
"Substitute a template and use subst=subst:",
"Substitute a template"
);
# Do we want to orphan or substitute?
print "What task is being performed?\n";
for( my $i=0; $i <= $#tasklist; $i++) {
print $i .") ". $tasklist[$i] ."\n";
}
print "Press enter for default [".$task."]\n";
print "Task: ";
$task = &my_get_response($task);
# Get the name of the template to orphan
print "Which template(s) would you like to orphan?\n";
print "For a list, delimit with commas\n";
print "Template: ";
$templates = &my_get_response("BLANK RESPONSE");
# Replace underscores and excess spaces
$templates =~ s/[_ ]+/ /g;
$templates =~ s/^ [ ]*//;
$templates =~ s/ [ ]*$//;
# Strip off leading "Template:" and
$templates =~ s/[ ]*[Tt]emplate[ ]*:[ ]*//;
# Create the template orphan list
@templatelist = split /[ ]*,[ ]*/, $templates;
# Set the namespace
$namespace_string = &my_get_namespace_list();
# Get username
print "Username [".$user."]: ";
$user = &my_get_response($user);
# Get password
print "Password [".$pass."]: ";
$pass = &my_get_response($pass);
# Create an editor object on the English language WP
my $editor=MediaWiki::Bot->new($user);
$editor->set_wiki('en.wikipedia.org','w');
# Turn debugging on, to see what the script is doing
$editor->{debug} = 1;
# Log in
if ( $editor->login($user, $pass) ) {
print "Failed to login!\n";
exit;
}
# Attempt to get the link to the TFD in question, using first on list
my @plist = $editor->what_links_here_ns("Template:".$templatelist[0],4);
# Loop over all pages in the page list
foreach my $pagestruct (@plist) {
# Only want links, not transclusions
if ( $pagestruct->{'type'} =~ /^$/ ) {
# Get the actual page name from the page structure
my $page = $pagestruct->{'title'};
if ( $page =~ /\/log\//i ) {
if ( ($task == 3) or ($task == 4) ) {
$edit_summary = "Replace template per ".
"[[".$page."|TFD outcome]]; no change in content";
} else {
$edit_summary = "Remove template per ".
"[[".$page."|TFD outcome]]";
}
}
}
}
# Allow user to override edit summary
print "The current edit summary is set to:\n$edit_summary\n";
print "Provide a new edit summary, or press enter to use the default\n";
print "Summary: ";
$edit_summary = &my_get_response($edit_summary);
# Create a list of regular expressions
foreach (@templatelist) {
my $str = $_;
my @rlist = $editor->what_links_here_ns("Template:".$str, 10);
# Escape any characters used in regular expressions
$str =~ s/([()\-\.\[\]])/\\$1/gi;
# First character is case insensitive
$str =~ s/^([A-Za-z])(.*)$/[\u$1\l$1]$2/;
# Wikipedia allows for underscores and spaces in template references
$str =~ s/ /[_ ]+/gi;
# Add template to the list
push @regexplist, $str;
# Add redirects to the list
foreach (@rlist) {
my $tstruct = $_;
# Only want redirects, not links or transclusions
if( $tstruct->{'type'} =~ /redirect/ ) {
$str = $tstruct->{'title'};
# Remove leading Template: in name
$str =~ s/^Template://;
# Escape any characters used in regular expressions
$str =~ s/([()\-\.\[\]])/\\$1/gi;
# First character is case insensitive
$str =~ s/^([A-Za-z])(.*)$/[\u$1\l$1]$2/;
# Wikipedia allows for underscores and spaces in template references
$str =~ s/ /[_ ]+/gi;
push @regexplist, $str;
}
}
}
# Get the list of all transclusions in the specified namespaces
foreach my $template (@templatelist) {
if ($namespace_string) {
# Loop over each namespace and get the list of transclusions
foreach my $ns (split /\&/, $namespace_string) {
if ( $ns =~ /ns([0-9]+)[=]1/ ) {
my @alist = $editor->what_links_here_ns("Template:".$template, $1);
foreach my $pagestruct (@alist) {
# Only want transclusions, not links
if ( $pagestruct->{'type'} =~ /transclusion/ ) {
push @pagelist, $pagestruct->{'title'};
}
}
}
}
} else {
# Get the list of transclusions in all namespaces
my @alist = $editor->what_links_here_ns("Template:".$template, '');
foreach my $pagestruct (@alist) {
# Only want transclusions, not links
if ( $pagestruct->{'type'} =~ /transclusion/ ) {
push @pagelist, $pagestruct->{'title'};
}
}
}
}
# Remove duplicates from the page list
@pagelist = sort(@pagelist);
my $prev = "not ".$pagelist[0];
@pagelist = grep($_ ne $prev && ($prev = $_, 1), @pagelist);
# Confirm job paramters before starting
if( &my_confirm_job() ) {
# Job not confirmed, so exit
exit;
}
# Loop over all pages in the page list
foreach my $page (@pagelist) {
# Show which pages are being processed
print "Processing: ".$page."\n";
# Pull the wikitext for the page
my $text=$editor->get_text($page);
# Save the old wikitext for diff
my $old = $text;
# Split the page into chunks to isolate comments
my @chunks = split(/(<!--.*?-->)/s, $text);
# Loop over sections
foreach $text (@chunks) {
if ($text =~ /<!--/) {
# Skip chunks consisting of only comments
} else {
foreach my $str (@regexplist) {
if ($task == 0) {
# Remove the template and trailing spaces/newlines
$text =~ s/{{[_ ]*(?:[Tt]emplate:|)[_ ]*($str)[_ ]*(\||}})[ ]*[\r\n]?//g;
} elsif ($task == 1) {
# Replace the template with whitespace
$text =~ s/[ ]*{{[_ ]*(?:[Tt]emplate:|)[_ ]*($str)[_ ]*(\||}})[ ]*/ /g;
} elsif ($task == 2) {
# Replace the template with nothing
$text =~ s/{{[_ ]*(?:[Tt]emplate:|)[_ ]*($str)[_ ]*(\||}})//g;
} elsif ($task == 3) {
# Substitute the template with subst=subst:
$text =~ s/{{[_ ]*(?:[Tt]emplate:|)[_ ]*($str)[_ ]*(\||}})/{{subst:$1|subst=subst:$2/g;
} elsif ($task == 4) {
# Substitute the template
$text =~ s/{{[_ ]*(?:[Tt]emplate:|)[_ ]*($str)[_ ]*(\||}})/{{subst:$1$2/g;
}
}
}
}
$text = join('', @chunks);
&my_simple_diff($old,$text);
print "Press enter to submit, s to skip, or ctrl-c to quit\n";
my $response = &my_get_response("");
if ($response =~ /s/i) {
print "Skipping...\n";
} else {
print "Submitting...\n";
# Submit to Wikipedia.
# Warning: This does not warn of edit conflicts.
$editor->edit($page, $text, $edit_summary, $is_minor);
}
# Take a break (frequent edits are forbidden per bot policy)
print "Sleep 10\n";
sleep 10;
}
sub my_get_namespace_list() {
my $all_ns = '&ns0=1&ns1=1&ns2=1&ns3=1&ns4=1&ns5=1&ns6=1&ns7=1'
.'&ns8=1&ns9=1&ns10=1&ns11=1&ns12=1&ns13=1&ns14=1'
.'&ns15=1&ns100=1&ns101=1&ns108=1&ns109=1';
my $no_ns = '&ns0=0&ns1=0&ns2=0&ns3=0&ns4=0&ns5=0&ns6=0&ns7=0'
.'&ns8=0&ns9=0&ns10=0&ns11=0&ns12=0&ns13=0&ns14=0'
.'&ns15=0&ns100=0&ns101=0&ns108=0&ns109=0';
my ($num,$list) = ("","");
print "Provide a list of namespaces in which the script will operate\n";
print "-------------- ------------------- -------------- -----------------\n";
print " 0 Article 1 Talk 2 User 3 User talk\n";
print " 4 Wikipedia 5 Wikipedia talk 6 File 7 File talk\n";
print " 8 MediaWiki 9 MediaWiki talk 10 Template 11 Template talk\n";
print " 12 Help 13 Help talk 14 Category 15 Category talk\n";
print "100 Portal 101 Portal talk 108 Book 109 Book talk\n";
print "-------------- ------------------- -------------- -----------------\n";
print "* Delimit list with commas for multiple namespaces\n";
print "* Start the list with 'no' for the compliment of the list\n";
print "* Leave blank for everything\n";
print "Namespace: ";
my $ns = <STDIN>;
# Remove spaces and newlines
$ns =~ s/[\t\r\n ]//g;
if( $ns =~ /^$/ ) {
# Everything
$list = "";
} elsif( $ns =~ /^no[t ]*([0-9].*)$/ ) {
# Set compliment
$ns = $1;
$list = $all_ns;
foreach $num (split /[ ]*,[ ]*/, $ns) {
$list =~ s/ns($num)=1/ns$1=0/g;
}
} else {
# Set
$list = $no_ns;
foreach $num (split /[ ]*,[ ]*/, $ns) {
$list =~ s/ns($num)[=]0/ns$num=1/g;
}
}
$list =~ s/\&ns[0-9]+=0//g;
return $list;
}
sub my_confirm_job()
{
my $opt = 4;
while( ($opt < 5) && ($opt > 0) ) {
if ( $opt == 1 ) {
print "----------------------------------------------------------------\n";
print " Template list \n";
print "----------------------------------------------------------------\n";
foreach (@templatelist) {
print $_."\n";
}
} elsif ( $opt == 2 ) {
print "----------------------------------------------------------------\n";
print " Regular expression list \n";
print "----------------------------------------------------------------\n";
foreach (@regexplist) {
print $_."\n";
}
} elsif ( $opt == 3 ) {
print "----------------------------------------------------------------\n";
print " Page list \n";
print "----------------------------------------------------------------\n";
foreach (@pagelist) {
print $_."\n";
}
} elsif ( $opt == 4 ) {
print "----------------------------------------------------------------\n";
print " Edit Summary \n";
print "----------------------------------------------------------------\n";
print "\n".$edit_summary."\n\n";
print "----------------------------------------------------------------\n";
print " Job Summary \n";
print "----------------------------------------------------------------\n";
print "\n";
print " Username: ". $user ."\n";
print " Task: ". $tasklist[$task] . "\n";
print " Number of templates: ". scalar @templatelist ."\n";
print " Number of templates + redirects ". scalar @regexplist ."\n";
print " Number of pages to process ". scalar @pagelist ."\n";
print " Namespace string ". $namespace_string ."\n";
print "----------------------------------------------------------------\n";
print "\n";
} else {
print " Unknown option\n";
$opt = 5;
}
print " 0) Start job\n";
print " 1) Show list of templates\n";
print " 2) Show list of regular expressions\n";
print " 3) Show list of pages\n";
print " 4) Redisplay summary\n";
print " 5) Quit\n";
print "Option: ";
$opt = <STDIN>;
# Remove newlines, underscores, and excess spaces
$opt =~ s/[_ \r\n]+//g;
if ( $opt == 5 ) {
return 1;
}
}
return 0;
}
# Simple keyboard response
sub my_get_response()
{
my $val = shift;
my $in = <STDIN>;
# Replace newlines and tabs with spaces
$in =~ s/[\t\r\n]/ /g;
# Remove leading spaces
$in =~ s/^ [ ]*//;
# Remove trailing spaces
$in =~ s/ [ ]*$//;
if( $in =~ /^$/ ) {
return $val;
} else {
return $in;
}
}
# Very basic module for showing diffs
sub my_simple_diff()
{
my $a = shift;
my $b = shift;
my @alines = split /[\r\n]/, $a;
my @blines = split /[\r\n]/, $b;
my $i = 0;
my $j = 0;
while( $i < scalar @alines and $j < scalar @blines ) {
if( $alines[$i] ne $blines[$j] ) {
if( $alines[$i+1] eq $blines[$j] ) {
print "**** Diff ($i,-) ****\n";
print "- ".$alines[$i]."\n";
$i++;
} else {
print "**** Diff ($i,$j) ****\n";
print "+ ". $blines[$j]."\n";
print "- ". $alines[$i]."\n";
}
}
$i++;
$j++;
}
}
# Custom module for namespace restricted "what links here" list
sub MediaWiki::Bot::what_links_here_ns {
my $self = shift;
my $article = shift;
my $ns = shift;
my @links;
$article = MediaWiki::Bot::uri_escape_utf8( $article );
my $res =
$self->_get( 'Special:Whatlinkshere', 'view',
"&target=$article&limit=5000&namespace=$ns" );
unless (ref($res) eq 'HTTP::Response' && $res->is_success) { return 1; }
my $content = $res->decoded_content;
while (
$content =~ m{<li><a]+" title="([^"]+)">[^<]+</a>([^<]*)}g ) {
my $title = $1;
my $type = $2;
if ( $type !~ /\(redirect page\)/ && $type !~ /\(transclusion\)/ ) {
$type = "";
}
if ( $type =~ /\(redirect page\)/ ) { $type = "redirect"; }
if ( $type =~ /\(transclusion\)/ ) { $type = "transclusion"; }
push @links, { title => $title, type => $type };
}
return @links;
}