User:Bot1058/goodarticlenominators.php

From Wikipedia, the free encyclopedia
<?php
/** goodarticlenominators.php - Create a list of Good Article nominators
 *  Version 1.0
 *
 *  (c) 2020 WBM - http://en.wikipedia.org/wiki/User:Wbm1058
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *   
 *  Developers (add your self here if you worked on the code):
 *    WBM - [[User:Wbm1058]] - July 2020
 **/
ini_set("display_errors", 1);
error_reporting(E_ALL ^ E_NOTICE);
require_once 'botclasses2.php';  // Botclasses.php was written by User:Chris_G and is available under the GNU General Public License
include("logininfo.php");
const namespaces = "User|Wikipedia|File|MediaWiki|Template|Module|Help|Category|Portal|Book|Draft";

# {{la|Limyra Bridge}} ==> Bridge near Limyra
# {{la|Arniston (ship)}} ==> Arniston (East Indiaman)
# {{la|A.M. (album)}} ==> A.M. (Wilco album)
const good_articles = ["Talk:Bridge near Limyra", "Talk:Arniston (East Indiaman)", "Talk:A.M. (Wilco album)"];
const good_users = ["Gun Powder Ma", "Socrates2008", "Teemu08"];
const good_times = ["2009-03-28T00:03:02Z", "2008-01-16T11:07:56Z", "2007-07-09T00:53:22Z"];
const good_revids = [280114392, 184706636, 143399073];

function talkpagename ($pagename) {
	if (substr($pagename,0,5) == "Talk:") {
		echo "\n? Error in function talkpagename, page $pagename is a talk page!\n";
		return $pagename;
	}
	else if (preg_match("/^((" . namespaces . "):)/i",$pagename,$tpcp)) {
		$talkname = str_replace($tpcp[1],$tpcp[2].' talk:',$pagename);
	}
	else {
		$talkname = "Talk:" . $pagename;
	}
	return $talkname;
}

$GANrevisions = array();
$revisions = array();
$oldest_GANtimestamp = time();
$oldest_timestamp = time();
$newest_GAtime = 0;
$nominators = array(array());
$nbr_nominators = 0;

echo "Logging in...\n";
$objwiki = new wikipedia();
$objwiki->login($user, $pass);
echo "...done.\n";

$section = "all";
#$continue= "&rawcontinue=&rvcontinue=20100918175237%7C385570413";
$continue = "&rawcontinue=&rvcontinue=20080430154059%7C209246686";
#$continue= "&rawcontinue=&rvcontinue=20080105170646%7C182329884";
#$continue= "&rawcontinue=&rvcontinue=20070530230027%7C134680461";
$nomrev = 0;

while ($continue != '&rawcontinue=') {
	list($GANrevisions,$continue) = $objwiki->getrevisions("Wikipedia:Good article nominations",$section,$continue);
	#print_r($GANrevisions);

	for ($GR = 0; $GR < count($GANrevisions); $GR++) {
		#echo "[" . $GR . "] User:" . $GANrevisions[$GR]['user'] . "  Timestamp:" . $GANrevisions[$GR]['timestamp'] . "\n";
		$nomrev += 1;
		$nomuser[$nomrev] = $GANrevisions[$GR]['user'];
		$nomtime[$nomrev] = $GANrevisions[$GR]['timestamp'];
		$nomtext[$nomrev] = $GANrevisions[$GR]['slots']['main']['*'];
		echo "[" . $nomrev . "] User:" . $nomuser[$nomrev] . "  Timestamp:" . $nomtime[$nomrev] . "\n";
	}
	if ($nomrev > 20980) break;
}

$goodarticles = $objwiki->categorymembers("Category:Good articles");

#print_r($goodarticles);
$notfound = 0;
$notfounds = array();
$notfoundtimes = array();
$WPGANfound = 0;

for ($a = 0; $a < count($goodarticles); $a++) {
#for ($a = 0; $a < 4000; $a++) {
	if (substr($goodarticles[$a],0,5) == "Talk:") {
		echo "\n? ERROR, page " . $goodarticles[$a] . " is a talk page!\n";
		die;
	}
	$goodtalk = talkpagename($goodarticles[$a]);
	if ($goodtalk == "Category:Lists of good articles") continue;
	$foundGAnom = false;

	for ($k = 0; $k < count(good_articles); $k++) {
		if ($goodtalk == good_articles[$k]) {
			$foundGAnom = true;
			$GAnom[$a] = good_users[$k];
			$timestamp[$a] = strtotime(good_times[$k]);
			echo "[" . $a . "] " . $goodarticles[$a] .
			    " ** GA nominee found in const good_articles, User:" . $GAnom[$a] . "  Timestamp:" . $timestamp[$a] . " (" . date("Y-m-d H:i:s", $timestamp[$a]) . ")\n";
			goto foundGAnom;
		}
	}

	$section = "top";
	$searched_all = false;
	searchall:
	list($revisions,$continue) = $objwiki->getrevisions($goodtalk,$section);
	#print_r($revisions);
	$breakcounter = 0;
	while (!is_array($revisions)) {
		echo "\nNot an array: " . $revisions;
		if ($breakcounter == 5) {
			die("Revisions retrieval failed!");
		}
		else {
			sleep(10);
			list($revisions,$continue) = $objwiki->getrevisions($goodtalk,$section);
			$breakcounter += 1;
		}
	}

	if (count($revisions) == 500) {
		echo "[" . $a . "] " . $goodarticles[$a] . " has 500+ revisions";
	}
	else {
		echo "[" . $a . "] " . $goodarticles[$a] . " has " . count($revisions) . " revisions";
	}

	searchloop:
	for ($b = 0; $b < count($revisions); $b++) {
		$content = $revisions[$b]['slots']['main']['*'];
		#echo "\n" . $b . " Content:\n" . $content;

		if (preg_match("/\{\{GA\s*(\||\})/", $content)) {
			#echo "\n[[Template:GA]] found, User:" . $revisions[$b]['user'] . "  Timestamp:" . $revisions[$b]['timestamp'] . "\n";
			$GAtime[$a] = strtotime($revisions[$b]['timestamp']);
		}

		if (preg_match("/\{\{(GA nominee|GAnominee|GANominee|GAnom|GAcandidate|GAC|GAN|Good article nominee)/", $content)) {
			#echo "\nTemplate:GA nominee found, User:" . $revisions[$b]['user'] . "  Timestamp:" . $revisions[$b]['timestamp'] . "\n";
			$foundGAnom = true;
			$GAnom[$a] = $revisions[$b]['user'];
			$timestamp[$a] = strtotime($revisions[$b]['timestamp']);
			if ($timestamp[$a] < $oldest_GANtimestamp) $oldest_GANtimestamp = $timestamp[$a];
			if ($timestamp[$a] < $oldest_timestamp) $oldest_timestamp = $timestamp[$a];
		}
		else if ($foundGAnom == true) {
			echo " ** [[Template:GA nominee]] found, User:" . $GAnom[$a] . "  Timestamp:" . $timestamp[$a] . " (" . date("Y-m-d H:i:s", $timestamp[$a]) . ")\n";
			goto foundGAnom;
		}
	}
	if ($foundGAnom == false) {
		echo "\n$continue\n";
		if ($continue != '&rawcontinue=') {
			list($revisions,$continue) = $objwiki->getrevisions($goodtalk,$section,$continue);
			if (count($revisions) == 500) {
				echo "[" . $a . "] " . $goodarticles[$a] . " has 500+ more revisions";
			}
			else {
				echo "[" . $a . "] " . $goodarticles[$a] . " has " . count($revisions) . " more revisions";
			}
			goto searchloop;
		}
		if ($searched_all == false) {
			echo "\n? Did not find {{GA nominee}} template at top of page, searching all sections...\n";
			$section = "all";
			$searched_all = true;
			goto searchall;
		}
		if ($GAtime[$a] > 0) {
			echo "\n? Did not find {{GA nominee}} template, {{GA}} time: " . $GAtime[$a] . " (" . date("Y-m-d H:i:s", $GAtime[$a]) . ")\n";
			if ($GAtime[$a] > $newest_GAtime) $newest_GAtime = $GAtime[$a];
		}
		else {
			echo "\n? Did not find {{GA nominee}} template, {{GA}} not found\n";
		}

		for ($b = 1; $b <= $nomrev; $b++) {
			#if ($b < 15) echo "\n[" . $b . "] " . $nomtext[$b];
			if (preg_match("/\{\{(la|La|Article|Article links|Articlelinks|Article-links|Page links)\|" . preg_quote($goodarticles[$a], '/') . "\}\}.*/", $nomtext[$b], $m)) {
				#echo "\nMatched!";
				#print_r($m);
				$foundGAnom = true;
				$GAnom[$a] = $nomuser[$b];
				$timestamp[$a] = strtotime($nomtime[$b]);
				if ($timestamp[$a] < $oldest_timestamp) $oldest_timestamp = $timestamp[$a];
			}
			else if ($foundGAnom == true) {
				echo " ** [[WP:Good article nomination]] found, User:" . $GAnom[$a] . "  Timestamp:" . $timestamp[$a] . " (" . date("Y-m-d H:i:s", $timestamp[$a]) . ")\n";
				$WPGANfound += 1;
				goto foundGAnom;
			}
		}
		if ($foundGAnom == false) {
			$notfound += 1;
			$notfounds[$notfound] = $goodtalk;
			$notfoundtimes[$notfound] = $GAtime[$a];
		}
		else {
			echo " ** [[WP:Good article nomination]] found, User:" . $GAnom[$a] . "  Timestamp:" . $timestamp[$a] . " (" . date("Y-m-d H:i:s", $timestamp[$a]) . ")\n";
			$WPGANfound += 1;
		}
	}
	else echo " ** [[Template:GA nominee]] found, User:" . $GAnom[$a] . "  Timestamp:" . $timestamp[$a] . " (" . date("Y-m-d H:i:s", $timestamp[$a]) . ")\n";

	foundGAnom:
	if ($foundGAnom == true) {
		if ($nbr_nominators == 0) {
			$nominators[0][0] = $GAnom[$a];
			$nominators[0][1] = 1;
			$nbr_nominators = 1;
		}
		else {
			$found_nominator = FALSE;
			for ($y = 0; $y <= $nbr_nominators; $y++) {
				if ($nominators[$y][0] == $GAnom[$a]) {
					$nominators[$y][1] += 1;
					$found_nominator = TRUE;
				}
			}
			if ($found_nominator == FALSE) {
				$nbr_nominators += 1;
				$nominators[$nbr_nominators][0] = $GAnom[$a];
				$nominators[$nbr_nominators][1] = 1;
			}
		}
	}
}

foreach($nominators as $res)
     $sortAux[] = $res[1];
array_multisort($sortAux, SORT_DESC, $nominators);

echo "\n\nNumber of nominations by nominator for $nbr_nominators nominators:\n";
#print_r($nominators);
$subtotal = 0;
for ($y = 0; $y < $nbr_nominators; $y++) {
	$subtotal += $nominators[$y][1];
	if ($y < 9) echo "   ";
	elseif ($y < 99) echo "  ";
	elseif ($y < 999) echo " ";
	echo $y+1 . "  " . $nominators[$y][0] . " => " . $nominators[$y][1] . " += " . $subtotal;
	echo "\n";
}

echo "\n\nOldest {{GA nominee}} timestamp: " . $oldest_GANtimestamp . " (" . date("Y-m-d H:i:s", $oldest_GANtimestamp) . ")";
echo "\nOldest timestamp: " . $oldest_timestamp . " (" . date("Y-m-d H:i:s", $oldest_timestamp) . ")";
echo "\nNewest unfound {{GA}}: " . $newest_GAtime . " (" . date("Y-m-d H:i:s", $newest_GAtime) . ")";
echo "\n[[WP:Good article nomination]]s found: " . $WPGANfound;
echo "\n\n{{GA nominee}} not found: " . $notfound . "\n";
for ($i = 1; $i <= $notfound; $i++) {
	if ($notfoundtimes[$i] > 0) {
		echo "\n[". $i . "] => " . $notfounds[$i] . " {{GA}} " . $notfoundtimes[$i] . " (" . date("Y-m-d H:i:s", $notfoundtimes[$i]) . ")";
	}
	else {
		echo "\n[". $i . "] => " . $notfounds[$i] . " {{GA}} not found";
	}
}
echo "\n\nMission accomplished.\n\n";

?>