User:AnomieBOT/source/tasks/TemplateReplacer13.pm: Difference between revisions
Appearance
Content deleted Content added
Updating published sources: TemplateReplacer13, TemplateReplacer14: * Add some cleanups for misnamed "External links" sections. * Recognize more variations, in particular a level-three heading. General: * Bugfix in rawpage(). * Bugfix in process_para |
Updating published sources: TemplateReplacer13: * <nowiki>Don't consider "http://example.com" and "http://example.com/" to be different.</nowiki> |
||
Line 130: | Line 130: | ||
'website' => [ |
'website' => [ |
||
'%X', |
'%X', |
||
'%X/', |
|||
], |
], |
||
'imdb_id' => [ |
'imdb_id' => [ |
||
Line 302: | Line 303: | ||
next unless grep($_ eq $param, @process); |
next unless grep($_ eq $param, @process); |
||
my $id=$infobox_params{$param}; |
my $id=$infobox_params{$param}; |
||
$id=~s{/$}{}; |
|||
foreach (@{$ext_links{$param}}){ |
foreach (@{$ext_links{$param}}){ |
||
my $link=$_; |
my $link=$_; |
Revision as of 01:20, 4 March 2009
Approved 2009-03-01 Wikipedia:Bots/Requests for approval/AnomieBOT 24 |
package tasks::TemplateReplacer13;
=pod
=begin metadata
Task: TemplateReplacer13
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 24
Status: Approved 2009-03-01
Rate: Max 6 edits/minute
Created: 2009-02-23
Replace the obsolete {{tlx|Infobox Film}} <code>website</code>,
<code>imdb_id</code>, and <code>amg_id</code> parameters with {{tl|official}},
{{tl|imdb title}}, and {{tl|amg movie}} in the External links section,
respectively.
=end metadata
=cut
use utf8;
use strict;
use AnomieBOT::Task;
use Digest::SHA qw/sha256_base64/;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
my $film_extlink_templates_re=undef;
my $no_edit_just_to_remove_parameters=1;
sub new {
my $class=shift;
my $self=$class->SUPER::new();
bless $self, $class;
return $self;
}
=pod
=for info
Approved 2009-03-01<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 24]]
=cut
sub approved {
return 1;
}
sub run {
my ($self, $api)=@_;
my $res;
$api->task('TemplateReplacer13');
$api->read_throttle(0);
$api->edit_throttle(10);
return 60 if(!defined($self->load_interlanguage_map($api)));
if(!defined($film_extlink_templates_re)){
my @links=();
my $res=$api->query([],
list => 'categorymembers',
cmtitle => 'Category:Film external link templates',
cmnamespace => 10,
cmlimit => 'max',
);
if($res->{'code'} ne 'success'){
$self->warn("Failed to retrieve film external link template list: ".$res->{'error'}."\n");
return 60;
}
unshift @{$res->{'query'}{'categorymembers'}}, { title=>'Template:Official' };
foreach (@{$res->{'query'}{'categorymembers'}}){
my $t=$_->{'title'};
next if $t=~m{/(?:doc|sandbox)$}i;
$t="\Q".substr($t,9);
$t=~s/^(.)/(?i:$1)/;
$t=~s/ /[ _]/g;
push @links, $t;
my $res2=$api->query([],
list => 'backlinks',
bltitle => $_->{'title'},
blfilterredir => 'redirects',
bllimit => 'max',
);
if($res2->{'code'} ne 'success'){
$self->warn("Failed to retrieve redirects for Template:Infobox Film: ".$res2->{'error'}."\n");
return 60;
}
foreach (@{$res2->{'query'}{'backlinks'}}){
$_="\Q".substr($_->{'title'},9);
s/^Template:(.)/(?i:$1)/;
s/ /[ _]/g;
push @links, $_;
}
}
$film_extlink_templates_re=join('|', @links);
$film_extlink_templates_re=qr/{{\s*(?:$film_extlink_templates_re)\s*(?:\||}})/o;
}
my $req="[[WP:BOTREQ#IMDb links|request]]";
# Spend a max of 5 minutes on this task before restarting
my $endtime=time()+300;
$self->_output_log($api);
# Get a list of templates redirecting to our target
my %templates=();
$templates{"Template:Infobox Film"}=1;
$res=$api->query([],
list => 'backlinks',
bltitle => "Template:Infobox Film",
blfilterredir => 'redirects',
bllimit => 'max',
);
if($res->{'code'} ne 'success'){
$self->warn("Failed to retrieve redirects for Template:Infobox Film: ".$res->{'error'}."\n");
return 60;
}
$templates{$_->{'title'}}=1 foreach (@{$res->{'query'}{'backlinks'}});
# Matching external links; "%X" is the text of the template parameter
my @to_process=qw/website imdb_id amg_id/;
my %ext_links=(
'website' => [
'%X',
'%X/',
],
'imdb_id' => [
'http://www.imdb.com/title/tt%X',
'http://www.imdb.com/title/tt%X/',
'http://imdb.com/title/tt%X',
'http://imdb.com/title/tt%X/',
],
'amg_id' => [
'http://allmovie.com/cg/avg.dll?p=avg&sql=%X',
'http://www.allmovie.com/cg/avg.dll?p=avg&sql=%X',
],
);
# External link generating templates; parameter 1 is the id, and optional
# parameter 2 is the infobox's "name" parameter.
my %ext_templates=(
'website' => 'official',
'imdb_id' => 'imdb title',
'amg_id' => 'amg movie',
);
# Get the list of pages to check
my %q=(
list => 'embeddedin',
eititle => "Template:Infobox Film",
einamespace => 0,
eilimit => 'max',
);
do {
$res=$api->query(%q);
if($res->{'code'} ne 'success'){
$self->warn("Failed to retrieve transclusion list for Template:Infobox Film: ".$res->{'error'}."\n");
return 60;
}
if(exists($res->{'query-continue'})){
$q{'eicontinue'}=$res->{'query-continue'}{'embeddedin'}{'eicontinue'};
} else {
delete $q{'eicontinue'};
}
# Process found pages
foreach (@{$res->{'query'}{'embeddedin'}}){
my $pageid=$_->{'pageid'};
next if defined($api->fetch($pageid));
# Cleanup the log
my $log=$api->fetch('log');
$log={} unless defined($log);
delete $log->{$_}{$pageid} foreach (keys %$log);
$api->store('log', $log);
my $title=$_->{'title'};
$self->warn("Processing $title\n");
# WTF?
if(exists($_->{'missing'})){
$self->warn("$title is missing? WTF?\n");
next;
}
# Ok, check the page
my $tok=$api->edittoken($title);
if($tok->{'code'} eq 'shutoff'){
$self->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$self->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
next;
}
next if exists($tok->{'missing'});
# Get page text
my $intxt=$tok->{'revisions'}[0]{'*'};
# Step 1: Find the parameters for the infobox. Also, strip the
# parameters we are intending to process.
my %infobox_params=();
my $ct=0;
my @process=();
my $outtxt=$self->process_templates($intxt, sub {
my $name=shift;
my @params=@{shift()};
shift; # $wikitext
shift; # $data
my $oname=shift;
return undef unless exists($templates{"Template:$name"});
if($ct++>0){ # More than one infobox?
$self->_log($api, 'Multiple infoboxen', $pageid, $title, "$ct instances of the infobox detected.");
$api->store($pageid, \1);
return undef;
}
my @out=();
foreach ($self->process_paramlist(@params)){
$_->{'value'}=~s/^\s+|\s+$//g;
$infobox_params{$_->{'name'}}=$_->{'value'} unless $_->{'value'} eq '';
if(exists($ext_links{$_->{'name'}})){
push @process, $_->{'name'} unless $_->{'value'} eq '';
} else {
push @out, $_->{'text'};
}
}
return "{{$oname|".join("|", @out)."}}";
});
next if $ct>1;
if($ct<1){
$self->_log($api, 'No infobox', $pageid, $title, "No instance of the infobox was found in the page.");
$api->store($pageid, \2);
next;
}
unless(@process){
# Nothing to do here.
$api->store($pageid, \1000000);
next;
}
# Step 2: Extract the external links section
my $nowiki;
($outtxt,$nowiki)=$self->strip_nowiki($outtxt);
my @sections=();
my $extlink_section=undef;
my $i=0;
foreach (split /(?=(?:^|\n)==(=?)[^=](?:.*[^=])?\1==\s*\n)/, $outtxt){
next if ($i++&1)==1; # Odd-numbered indicies are $1 from the pattern above
my $s=$self->replace_nowiki($_, $nowiki);
push @sections, \$s;
s/^(\n?==)(=?)\s*External\s*(\2==(?:\s*<!--.*-->)?\s*(?:\n|$))/$1$2 External links $3/i;
s/^(\n?==)(=?)(.*)External link\(s\)(.*\2==(?:\s*<!--.*-->)?\s*(?:\n|$))/$1$2$3External links$4/i;
s/^(\n?==)(=?)(.*)External link((?!s).*\2==(?:\s*<!--.*-->)?\s*(?:\n|$))/$1$2$3External links$4/i;
s/^(\n?==)(=?)(.*)External references?(.*\2==(?:\s*<!--.*-->)?\s*(?:\n|$))/$1$2$3External links$4/i;
$extlink_section=\$s if /^\n?==(=?)\s*(?:External links(?: (?:and|&|&) (?:references|sources|resources|further reading))?|(?:References|Sources|Resources) (?:and|&|&) External links)\s*\1==(?:\s*<!--.*-->)?\s*(?:\n|$)/i;
}
if(!defined($extlink_section)){
# Crap, we have to create an external links section.
$self->_log($api, 'Added "External links"', $pageid, $title, "No \"External links\" section was found in the page. Check if one was added in the right place.");
my $x=pop @sections;
my ($pre,$post)=$self->extract_end_content($api, $$x);
return 60 if(!defined($pre));
$pre=~s/\s+$/\n/;
push @sections, \$pre;
my $dummy="\n== External links ==\n\n";
$extlink_section=\$dummy;
push @sections, $extlink_section;
push @sections, \$post;
} elsif($extlink_section==$sections[-1]){
# Last section, strip off the post-content junk
my $x=pop @sections;
my ($pre,$post)=$self->extract_end_content($api, $$x);
return 60 if(!defined($pre));
$extlink_section=\$pre;
push @sections, $extlink_section;
push @sections, \$post;
}
# Step 3: Process our parameters
my $res=$api->query([],
action=>'parse',
text=>$$extlink_section,
prop=>'externallinks',
);
if($res->{'code'} ne 'success'){
$self->warn("Failed to parse external links section for $title: ".$res->{'error'}."\n");
return 60;
}
my @el=();
@el=@{$res->{'parse'}{'externallinks'}} if exists($res->{'parse'}{'externallinks'});
my $add='';
my $has_website=0;
PARAM:
foreach my $param (@to_process){
next unless grep($_ eq $param, @process);
my $id=$infobox_params{$param};
$id=~s{/$}{};
foreach (@{$ext_links{$param}}){
my $link=$_;
$link=~s/%X/$id/g;
next PARAM if grep($_ eq $link, @el);
}
my $tmpl=$ext_templates{$param};
$add.="\n* {{$tmpl|$id";
$add.='|'.$infobox_params{'name'} if exists($infobox_params{'name'});
$add.="}}";
$has_website=1 if $param eq 'website';
}
# Step 4: Reassemble the page, if anything changed in step 3
if($add ne ''){
if($has_website && $$extlink_section=~s/\n\*/$add\n*/){
# Move "website" to the top of the external links
} elsif($$extlink_section=~s/(\n\*\s*$film_extlink_templates_re.*?)\n/$1$add\n/){
# Put it after any other existing film external link templates
} elsif($$extlink_section=~s/(\s*\n===)/$add$1/){
# There is a subsection in there (e.g. "Reviews"), put the
# links before it.
} else {
# Just tack it on the end.
$$extlink_section=~s/(\s*)$/$add$1/;
}
$outtxt=join('', map { $$_ } @sections);
} elsif($no_edit_just_to_remove_parameters){
$api->store($pageid, \1000001);
next;
} else {
$outtxt=$self->replace_nowiki($outtxt, $nowiki);
}
# Step 5: Perform the edit.
$process[-1]='and '.$process[-1] if @process>1;
my $summary='Moving deprecated '.join((@process>2)?', ':' ', @process).' from {{Infobox Film}} to External links per '.$req;
$self->warn("$summary in $title\n");
my $r=$api->edit($tok, $outtxt, $summary, 0, 1);
if($r->{'code'} ne 'success'){
$self->warn("Write failed on $title: ".$r->{'error'}."\n");
next;
}
# Mark this page as done
$api->store($pageid, \2000000);
# If we've been at it long enough, let another task have a go.
if(time()>=$endtime){
$self->_output_log($api);
return 0;
}
}
} while(exists($q{'eicontinue'}));
# No more pages to check, try again in 10 minutes or so in case of errors.
$self->_output_log($api);
return 600;
}
sub _log {
my $self=shift;
my $api=shift;
my $section=shift;
my $pageid=shift;
my $title=shift;
my $message=shift;
my $log=$api->fetch('log');
$log={} unless defined($log);
$log->{$section}={} unless exists($log->{$section});
$log->{$section}{$pageid}=[$title, $message];
$api->store('log', $log);
$self->warn("LOG: $title: $message\n");
}
sub _output_log {
my $self=shift;
my $api=shift;
$self->warn("Updating log");
my $tok=$api->edittoken("User:AnomieBOT/TemplateReplacer13 log");
if($tok->{'code'} eq 'shutoff'){
$self->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$self->warn("Failed to get edit token for log: ".$tok->{'error'}."\n");
return;
}
my $header="This is a log of issues encountered during the processing of the task TemplateReplacer13. Do not edit this page, the bot will overwrite it.\n";
my $intxt=exists($tok->{'missing'})?$header:$tok->{'revisions'}[0]{'*'};
my $outtxt=$header;
my $log=$api->fetch('log');
$log={} unless defined($log);
foreach my $section (sort keys %$log){
my @out=();
foreach my $pageid (keys %{$log->{$section}}){
next unless defined($api->fetch($pageid));
my ($title,$message)=@{$log->{$section}{$pageid}};
push @out, "* [[:$title]]: $message\n";
}
next unless @out;
$outtxt.="\n== $section ==\n".join('', @out) if @out;
}
if($outtxt ne $intxt){
my $r=$api->edit($tok, $outtxt, 'Updating log', 0, 0);
if($r->{'code'} ne 'success'){
$self->warn("Could not write log: ".$r->{'error'}."\n");
return;
}
}
}
1;