Jump to content

User:AnomieBOT/source/tasks/TemplateSubster.pm: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
AnomieBOT (talk | contribs)
Updating published sources: d::Redirects: * Redo redirects_to and redirects_to_resolved to use the new prop=redirects. Much faster and more accurate, but let's hope I didn't introduce any bugs.
AnomieBOT (talk | contribs)
Updating published sources: TemplateSubster: * Try to better handle username mentions inside URLs.
Line 296: Line 296:
}
}
} while(exists($q{'rvcontinue'}));
} while(exists($q{'rvcontinue'}));

# Signatures
$otxt=~s/\Q$sig\E/[[User:$u]] ([[User talk:$u|talk]])/g;
$otxt=~s/\Q$sig\E/[[User:$u]] ([[User talk:$u|talk]])/g;

# Try to handle User links inside URLs. Not perfect, but the best we
# can do in the situation.
my $eu = $u;
$eu =~ s/ /_/g;
$eu =~ s/([%"&])/ sprintf("%%%02X", ord($1)) /ge;
my $tmp;
do {
$tmp = $otxt;
$otxt=~s!((?:\[|https?:)//[^][<>"\x00-\x20\x7F\p{Zs}]+)\Q$bot\E!$1$eu!g;
} while ( $tmp ne $otxt );

# Other usename mentions
$otxt=~s/\Q$bot\E/$u/g;
$otxt=~s/\Q$bot\E/$u/g;
}
}

$otxt=~s/\Q$botr\E/$bot/g;
$otxt=~s/\Q$botr\E/$bot/g;
$botr=~s/&/%26/g;
$botr=~s/&/%26/g;

Revision as of 12:09, 25 September 2014

package tasks::TemplateSubster;

=pod

=begin metadata

Bot:      AnomieBOT
Task:     TemplateSubster
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 45
Status:   Approved 2010-10-06
Created:  2010-09-27

Subst templates in [[:Category:Wikipedia templates to be automatically substituted]].
See [[User:AnomieBOT/docs/TemplateSubster|documentation]] for details.

=end metadata

=cut

use utf8;
use strict;

use Data::Dumper;
use AnomieBOT::Task qw/:time/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my $max_transclusions=100;

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'curtitle'}=undef;
    $self->{'cm iter'}=undef;
    $self->{'ei iter'}=undef;
    $self->{'force'}=undef;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2010-10-06<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 45]]

=cut

sub approved {
    return 3;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('TemplateSubster',0,10,qw/d::Redirects d::Templates d::Talk/);

    my $help="User:".$api->user."/docs/TemplateSubster";

    if(!exists($self->{'sig'})){
        $res=$api->query(action=>'parse', text=>"~\x7e~", pst=>1, onlypst=>1);
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to load bot sig: ".$res->{'error'}."\n");
            return 60;
        }
        $self->{'sig'}=$res->{'parse'}{'text'}{'*'};
    }

    # Spend a max of 5 minutes on this task before restarting
    my $endtime=time()+300;

    $res=$api->query(titles=>'User:'.$api->user.'/TemplateSubster force', prop=>'info|links', plnamespace=>10, pllimit=>'max', inprop=>'protection');
    if($res->{'code'} ne 'success'){
        $api->warn("Failed to get force list: ".$res->{'error'}."\n");
        return 60;
    }
    $res=(values %{$res->{'query'}{'pages'}})[0];
    if(exists($res->{'missing'})){
        $self->{'force'}={};
    } else {
        my $ok=0;
        foreach (@{$res->{'protection'}}) {
            $ok=1 if($_->{'type'} eq 'edit' && $_->{'level'} eq 'sysop');
        }
        if($ok){
            $self->{'force'}={ map { $_->{'title'}=>1 } @{$res->{'links'}} };
        } else {
            $api->whine("[[User:".$api->user."/TemplateSubster force]] is unprotected", "In an effort to prevent disruption, I refuse to subst templates that have over $max_transclusions transclusions unless they are listed at [[User:".$api->user."/TemplateSubster force]]. But it defeats the purpose if that page is not fully protected. Please have that page protected.");
            $self->{'force'}={};
        }
    }

    if(!defined($self->{'cm iter'})){
        $self->{'cm iter'}=$api->iterator(
            list        => 'categorymembers',
            cmtitle     => 'Category:Wikipedia templates to be automatically substituted',
            cmtype      => 'page',
            cmlimit     => 'max',
            cmprop      => 'title',
        );
        $self->{'curtitle'}=undef;
        $self->{'ei iter'}=undef;
    }
    unless(defined($self->{'curtitle'})){
        my $r=$self->nexttitle($api);
        return $r if $r;
    }

    while(defined($self->{'curtitle'})){
        # Don't resolve in case only a redirect should be substed for some strange reason.
        my %r=$api->redirects_to($self->{'curtitle'});
        if(exists($r{''})){
            $api->warn("Failed to get redirects to $self->{curtitle}: ".$r{''}{'error'}."\n");
            return 60;
        }
        if(!defined($self->{'ei iter'})){
            $self->{'ei iter'}=$api->iterator(
                generator    => 'embeddedin',
                geititle     => $self->{'curtitle'},
                geilimit     => 'max',
                prop         => 'info',
            );
        }
        while($_=$self->{'ei iter'}->next){
            return 0 if $api->halting;
            if(!$_->{'_ok_'}){
                $api->warn("Failed to retrieve transclusions for $self->{curtitle}: ".$_->{'error'}."\n");
                return 60;
            }

            my $title=$_->{'title'};

            # Can't edit user js or css
            next if($_->{'ns'}==2 && $title=~/\.(?:js|css)$/);

            # Can't edit Mediawiki namespace either
            next if($_->{'ns'}==8);

            # Skip if we checked this revision already
            my $revid=$_->{'lastrevid'};
            my $key=$self->{'curtitle'}."|$title";
            next if ($api->store->{$key}//0)==$revid;

            # Ok, check the page
            my $tok=$api->edittoken($title, EditRedir=>1);
            if($tok->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$tok->{'content'}."\n");
                return 300;
            }
            if($tok->{'code'} eq 'pageprotected'){
                # Don't worry about protected pages, just mark them and continue
                $api->store->{$key}=$revid;
                next;
            }
            if($tok->{'code'} eq 'botexcluded'){
                # Don't retry on bot exclusion either
                $api->warn("TemplateSubster excluded from $title: ".$tok->{'error'}."\n");
                $api->store->{$key}=$revid;
                next;
            }
            if($tok->{'code'} ne 'success'){
                $api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
                next;
            }
            next if exists($tok->{'missing'});

            # Get page text
            my $intxt=$tok->{'revisions'}[0]{'*'};

            # Perform the removal
            my %remv=();
            my $fail=0;
            my $outtxt=$api->process_templates($intxt, sub {
                return undef if $fail;
                my $name=shift;
                my $params=shift;
                my $wikitext=shift;
                my $data=shift;
                my $oname=shift;

                return undef unless exists($r{"Template:$name"}) || exists($r{$name});
                foreach ($api->process_paramlist(@$params)){
                    return undef if $_->{'name'}=~/^\s*(?:nosubst|demo)\s*$/;
                }
                $remv{"{{$oname}}"}=1;
                my $ret=$self->do_subst($api, $title, $wikitext);
                $fail=1 if !defined($ret);
                return $ret;
            });
            return 60 if $fail;

            # Need to edit?
            if(%remv){
                my @remv=keys %remv;
                $remv[-1]='and '.$remv[-1] if @remv>1;
                my $summary="[[$help|Substing templates]]: ".join((@remv>2)?', ':' ', @remv).". See [[$help]] for info.";
                $api->log("Substing templates: ".join((@remv>2)?', ':' ', @remv)." in $title");
                my $r=$api->edit($tok, $outtxt, $summary, 1, 1);
                if($r->{'code'} ne 'success'){
                    $api->warn("Write failed on $title: ".$r->{'error'}."\n");
                    next;
                }
                $revid=$r->{'edit'}{'newrevid'};
            }

            # Mark that we checked it
            $api->store->{$key}=$revid;

            # If we've been at it long enough, let another task have a go.
            return 0 if time()>=$endtime;
        }
        my $r=$self->nexttitle($api);
        return $r if $r;
    }

    # No more pages to check
    return 3600;
}

sub nexttitle {
    my $self=shift;
    my $api=shift;
    $self->{'ei iter'}=undef;
    $self->{'curtitle'}=undef;
    my $r;
    while($r=$self->{'cm iter'}->next){
        if(!$r->{'_ok_'}){
            $api->warn("Failed to retrieve category member list: ".$r->{'error'}."\n");
            return 60;
        }
        my $rr=$api->query(list=>'embeddedin', eititle=>$r->{'title'}, eilimit=>$max_transclusions);
        if($rr->{'code'} ne 'success'){
            $api->warn("Failed to retrieve $r->{title} embeddedin list: ".$rr->{'error'}."\n");
            return 60;
        }
        if(!@{$rr->{'query'}{'embeddedin'}}){
            # No transclusions
            next;
        } elsif(exists($rr->{'query-continue'}) && !($self->{'force'}{$r->{'title'}} // 0)){
            $api->whine("[[$r->{title}]] has too many transclusions", "In an effort to prevent disruption, I refuse to subst templates that have over $max_transclusions transclusions unless they are listed at [[User:".$api->user."/TemplateSubster force]]. Please either edit the template to remove it from [[:Category:Wikipedia templates to be automatically substituted]], manually subst the existing transclusions, or add it to [[User:".$api->user."/TemplateSubster force]] to let me know it is OK to subst them.");
            next;
        }
        $self->{'curtitle'}=$r->{'title'};
        return undef;
    }
    $self->{'cm iter'}=undef;
    return 3600;
}

sub do_subst {
    my ($self,$api,$title,$txt)=@_;
    my $bot=$api->user;
    my $sig=$self->{'sig'};

    my $botr=$bot;
    $botr=~s/(.)/ sprintf("&#%d;",ord($1)) /ge;

    my $itxt=$txt;
    $itxt=~s/^{{/{{subst:/;
    $itxt=~s/}}$/|subst=subst:}}/;
    $itxt=~s/\Q$bot\E/$botr/g;

    my $res=$api->query(action=>"parse", text=>$itxt, title=>$title, pst=>1, onlypst=>1);
    if($res->{'code'} ne 'success'){
        $api->warn("Failed to expand template: ".$res->{'error'}."\n");
        return undef;
    }
    my $otxt=$res->{'parse'}{'text'}{'*'};
    if($otxt=~/\Q$bot\E/){
        my %q=(
            titles => $title,
            prop => 'revisions',
            rvprop => 'user',
            rvlimit => 1,
        );
        my $u='';
        do {
            $res=$api->query(%q);
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to fetch revisions for $title: ".$res->{'error'}."\n");
                return undef;
            }
            if(exists($res->{'query-continue'}{'revisions'}{'rvcontinue'})){
                $q{'rvcontinue'}=$res->{'query-continue'}{'revisions'}{'rvcontinue'};
                $q{'rvprop'}='user|content';
            } else {
                delete $q{'rvcontinue'};
            }
            $res=(values %{$res->{'query'}{'pages'}})[0]{'revisions'}[0];
            if(!exists($res->{'*'}) || $res->{'*'}=~/\Q$txt\E/){
                $u=$res->{'user'};
            } else {
                delete $q{'rvcontinue'};
            }
        } while(exists($q{'rvcontinue'}));

        # Signatures
        $otxt=~s/\Q$sig\E/[[User:$u]] ([[User talk:$u|talk]])/g;

        # Try to handle User links inside URLs. Not perfect, but the best we
        # can do in the situation.
        my $eu = $u;
        $eu =~ s/ /_/g;
        $eu =~ s/([%"&])/ sprintf("%%%02X", ord($1)) /ge;
        my $tmp;
        do {
            $tmp = $otxt;
            $otxt=~s!((?:\[|https?:)//[^][<>"\x00-\x20\x7F\p{Zs}]+)\Q$bot\E!$1$eu!g;
        } while ( $tmp ne $otxt );

        # Other usename mentions
        $otxt=~s/\Q$bot\E/$u/g;
    }

    $otxt=~s/\Q$botr\E/$bot/g;
    $botr=~s/&/%26/g;
    $botr=~s/#/%23/g;
    $botr=~s/;/%3B/g;
    $otxt=~s/\Q$botr\E/$bot/g;
    return $otxt;
}

1;