User:RFC bot/rfcbot.php
Appearance
<?php
/** rfcbot.php - Automatic update of Wikipedia RFC lists
* STABLE Version 4.2.6
*
* © 2011 James Hare and contributors - http://en.wikipedia.org/wiki/User:Harej
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Developers (add your self here if you worked on the code):
* James Hare - [[User:Harej]] - Wrote everything
* Terry E. - [[User:TerryE]] - Code recommendations for improved operation
**/
ini_set("display_errors", 1);
error_reporting(E_ALL ^ E_NOTICE);
include("./public_html/botclasses.php"); // Botclasses.php was written by User:Chris_G and is available under the GNU General Public License
include("logininfo.php");
// Definitions
$RFC_categories = array("bio", "hist", "econ", "sci", "lang", "media", "pol", "reli", "soc", "style", "policy", "proj", "unsorted");
$RFC_submissions = array();
$RFC_dashboard = array();
$RFC_listofentries = array();
foreach ($RFC_categories as $cat) {
$RFC_submissions[$cat] = "<noinclude>'''The following discussions are requested to have community-wide attention:'''\n\n</noinclude>";
$RFC_dashboard[$cat] = "";
$RFC_listofentries[$cat] = array();
}
$RFC_submissions["reli"] = "'''The following discussions are requested to have community-wide attention:'''\n{{Philosophy/Nav}}\n\n"; # This specific exception was provided for upon request of WikiProject Philosophy.
$RFC_pagetitles = array(
"bio" => "Wikipedia:Requests for comment/Biographies",
"econ" => "Wikipedia:Requests for comment/Economy, trade, and companies",
"hist" => "Wikipedia:Requests for comment/History and geography",
"lang" => "Wikipedia:Requests for comment/Language and linguistics",
"sci" => "Wikipedia:Requests for comment/Maths, science, and technology",
"media" => "Wikipedia:Requests for comment/Art, architecture, literature, and media",
"pol" => "Wikipedia:Requests for comment/Politics, government, and law",
"reli" => "Wikipedia:Requests for comment/Religion and philosophy",
"soc" => "Wikipedia:Requests for comment/Society, sports, and culture",
"style" => "Wikipedia:Requests for comment/Wikipedia style and naming",
"policy" => "Wikipedia:Requests for comment/Wikipedia policies and guidelines",
"proj" => "Wikipedia:Requests for comment/WikiProjects and collaborations",
"unsorted" => "Wikipedia:Requests for comment/Unsorted",
);
echo "Logging in...";
$objwiki = new wikipedia();
$objwiki->login($botuser, $botpass);
echo " done.\n";
/* Connect to the database */
echo "Retrieving database login credentials...";
$toolserver_mycnf = parse_ini_file("/home/messedrocker/.my.cnf");
$toolserver_username = $toolserver_mycnf['user'];
$toolserver_password = $toolserver_mycnf['password'];
unset($toolserver_mycnf);
echo " done.\n";
echo "Logging into database...";
mysql_connect("sql",$toolserver_username,$toolserver_password);
@mysql_select_db('u_messedrocker_reqs') or die(mysql_error());
echo " done.\n";
function query($query) {
// We need to use this function in case our MySQL connection times out.
global $toolserver_username;
global $toolserver_password;
if (!mysql_ping()) {
mysql_connect("sql",$toolserver_username,$toolserver_password);
@mysql_select_db('u_messedrocker_reqs') or die(mysql_error());
}
echo $query . "\n";
return mysql_query($query) or die(mysql_error());
}
// Step 1: Check for transclusions
echo "Checking for transclusions...";
$transcludes = $objwiki->getTransclusions("Template:Rfc");
echo " done.\n";
#print_r($transcludes);
$listing = array();
for ($pg = 0; $pg < count($transcludes); $pg++) {
echo "Getting page " . $pg . ": " . $transcludes[$pg] . "\n";
$contents = $objwiki->getpage($transcludes[$pg]);
// Syntax Correction. RFC templates with common errors are corrected and then saved on the wiki.
preg_match_all("/(\{{2}\s?Rfc(tag)?\s?[^}]*\}{2}(\n|,| )*){2,}/i", $contents, $fix);
for ($j=0; $j < count($fix[0]); $j++) {
preg_match_all("/(?=\{{2}\s?Rfc(tag)?\s?\|\s?)[^}]*/i", $fix[0][$j], $parts);
$newtag = "";
for ($k=0; $k < count($parts[0]); $k++) {
$newtag .= $parts[0][$k] . "|";
}
$newtag = str_replace("{{rfc|", "", $newtag);
$newtag = str_replace("{{rfctag|", "", $newtag);
$newtag = str_replace("}}", "", $newtag);
$newtag = "{{rfc|" . $newtag . "}}\n\n";
$newtag = str_replace("|}}", "}}", $newtag);
$contents = str_replace($fix[0][$j], $newtag, $contents);
$objwiki->edit($transcludes[$pg],$contents,"Fixing RFC template syntax",false,true);
}
// Step 2: Seeding RFC IDs.
// Before we read the RFC IDs and match them up to a title, description, etc.,
// we want to make sure each RFC template has a corresponding RFC ID.
preg_match_all("/\{{2}\s?Rfc(tag)?\s?[^}]*\}{2}/i", $contents, $match);
for ($result=0; $result < count($match[0]); $result++) {
if (strpos($match[0][$result], "|rfcid=") === false) { // if the rfcid is not found within an RFC template
$id = substr(strtoupper(md5(rand())), 0, 7); # a seven-character random string with capital letters and digits
$contents = str_replace($match[0][$result], $match[0][$result] . "|rfcid=" . $id . "}}", $contents);
$contents = str_replace("}}|rfcid", "|rfcid", $contents);
$objwiki->edit($transcludes[$pg],$contents,"Adding RFC ID",false,true);
mysql_query("insert into `frs` (`id`, `initround`) values (\"" . mysql_real_escape_string($id) . "\", false)");
}
}
// Step 3: Check for RFC templates
preg_match_all("/\{{2}\s?Rfc(tag)?\s?[^}]*\}{2}/i", $contents, $match);
for ($result=0; $result > -1; $result++) { # For each result on a page
if (isset($match[0][$result])) {
// Category
preg_match_all("/\{{2}\s?Rfc(tag)?[^2]\s?[^}]*\}{2}/i", $contents, $m);
$categorymeta = preg_replace("/\{*\s?(Rfc(?!id)(tag)?)\s?\|?\s?(1=)?\s?/i", "", $m[0][$result]);
preg_match("/\|time=([^|]|[^}])*/", $categorymeta, $forcedtimecheck); # An RFC can be forced to have a certain timestamp with the time= parameter in RFC template.
if ($forcedtimecheck[0] != "" || isset($forcedtimecheck[0])) {
$prettytimestamp = str_replace("|time=", "", $forcedtimecheck[0]);
$prettytimestamp = str_replace("}", "", $prettytimestamp);
$timestamp = strtotime($prettytimestamp);
}
// Description and Timestamp
if (!isset($timestamp)) {
preg_match_all("/\{{2}\s?Rfc(tag)?\s?[^}]*\}{2}(.|\n)*?([0-2]\d):([0-5]\d),\s(\d{1,2})\s(\w*)\s(\d{4})\s\(UTC\)/im", $contents, $m);
print_r($m[0]);
$description = preg_replace("/\{{2}\s?Rfc(tag)?\s?[^}]*\}{2}\n*/i", "", $m[0][$result]); // get rid of the RFC template
$description = preg_replace("/={2,}\n+/", "'''\n\n", $description); // replace section headers with boldness
$description = preg_replace("/\n+={2,}/", "\n\n'''", $description);
$description = "{{rfcquote|text=\n" . $description . "}}"; // indents the description
preg_match("/([0-2]\d):([0-5]\d),\s(\d{1,2})\s(\w*)\s(\d{4})\s\(UTC\)/i", $description, $t);
$timestamp = strtotime($t[0]);
echo "Timestamp: " . $timestamp . "\n";
}
else {
$description = $prettytimestamp;
}
// RFC ID
preg_match("/\|rfcid=([^|]|[^}])*/", $categorymeta, $rfcidcheck);
if ($rfcidcheck[0] != "" || isset($rfcidcheck[0])) {
$id = str_replace("|rfcid=", "", $rfcidcheck[0]);
$id = str_replace("}}", "", $id);
}
echo "RFC ID: " . $id . "\n";
$categorymeta = preg_replace("/\s*\}*/", "", $categorymeta);
$categorymeta = preg_replace("/=*/", "", $categorymeta);
$categorymeta = preg_replace("/\|time([^|]|[^}])*/", "", $categorymeta);
$categorymeta = preg_replace("/\|rfcid([^|]|[^}])*/", "", $categorymeta);
echo "Category: " . $categorymeta . "\n";
$category = explode("|", $categorymeta);
unset($forcedtimecheck);
unset($rfcidcheck);
// Step 4: Inspecting for expiration. Something that's expired gets removed; something that's not expired gets moved up to the big leagues! Whee!
if (time() - $timestamp > 2592000 && $timestamp != "" || preg_match("/\/Archive \d+/", $transcludes[$pg])) {
echo "RFC expired. Removing tag.\n";
$contents = preg_replace("/\{{2}rfc(tag)?.*\}{2}(\n|\s)?/i", "", $contents);
$objwiki->edit($transcludes[$pg],$contents,"Removing expired RFC template",false,true);
mysql_query("delete from `frs` where `id` = \"" . mysql_real_escape_string($id) . "\"");
}
else {
$listing[$id]["title"] = $transcludes[$pg];
$listing[$id]["description"] = $description;
$listing[$id]["timestamp"] = $timestamp;
for ($c = 0; $c < count($category); $c++) { # lol c++
if (in_array($category[$c], $RFC_categories)) {
$listing[$id]["category"][] = $category[$c];
}
}
if (count($listing[$id]["category"]) == 0) {
$listing[$id]["category"][0] = "unsorted";
}
}
unset ($section);
unset ($timestamp);
unset ($forcedtimecheck);
unset ($prettytimestamp);
unset ($categorymeta);
unset ($description);
unset ($timestamp);
unset ($rfcidcheck);
unset ($category);
}
else {
break;
}
}
}
// Step 5: Sorting by timestamp.
foreach ($listing as $id => $arr) {
$timestamp[$id] = $arr["timestamp"]; # i.e., $timestamp[$id] = $listing[$id]["timestamp"]
}
$keys = array_keys($timestamp);
$values = array_values($timestamp);
array_multisort($values, SORT_DESC, $keys);
$timestamp = array_combine($keys, $values);
// Step 6: Adding each listing into the submissions.
foreach ($timestamp as $id => $time) {
for ($i = 0; $i < count($listing[$id]["category"]); $i++) {
$RFC_submissions[$listing[$id]["category"][$i]] .= "'''[[" . $listing[$id]["title"] . "#rfc_" . $id . "|" . $listing[$id]["title"] . "]]'''\n" . $listing[$id]["description"] . "\n";
$RFC_dashboard[$listing[$id]["category"][$i]] .= "[[" . $listing[$id]["title"] . "#rfc_" . $id . "|" . $listing[$id]["title"] . "]]{{dot}}";
$RFC_listofentries[$listing[$id]["category"][$i]][] = $listing[$id]["title"];
}
}
foreach ($RFC_dashboard as $type => $filling) {
$RFC_dashboard[$type] = substr($RFC_dashboard[$type], 0, -7);
}
foreach ($RFC_submissions as $type => $filling) {
$RFC_submissions[$type] = preg_replace("/\n{3,}/", "\n\n", $RFC_submissions[$type]);
}
// Step 7: Creating edit summaries.
$rfclisting = "{{navbox\n| name = {{subst:FULLPAGENAME}}\n| title = Requests for comment\n| basestyle = background: #BDD8FF;\n| liststyle = line-height: 220%;\n| oddstyle = background: #EEEEEE;\n| evenstyle = background: #DEDEDE;\n";
$counter = 0;
foreach ($RFC_pagetitles as $abbreviation => $pagename) {
$RFC_submissions[$abbreviation] .= "{{RFC list footer|" . $abbreviation . "|hide_instructions={{{hide_instructions}}} }}";
$counter += 1;
$rfclisting .= "| group" . $counter . " = [[" . $pagename . "|" . str_replace("Wikipedia:Requests for comment/", "", $pagename) . "]]\n| list" . $counter . " = " . $RFC_dashboard[$abbreviation] . "\n";
$query = mysql_query("SELECT * from `rfc` WHERE `category` = \"" . mysql_real_escape_string($abbreviation) . "\"");
$row = mysql_fetch_assoc($query);
$oldlist = unserialize($row['pagetitles']); // Retrieving the old list from the database
$newlist = $RFC_listofentries[$abbreviation];
$added = "Added: ";
$justadded = array_diff($newlist, $oldlist);
if (count($justadded) > 0) print_r($justadded);
foreach ($justadded as $key => $item) {
$added .= "[[" . $item . "]], ";
}
$added = substr($added, 0, -2);
if ($added == "Added") {
$added = ""; // If no pages are added to the list, then there's nothing to be reported and this is blanked accordingly.
}
else {
$added .= " ";
}
$removed = "Removed: ";
$justremoved = array_diff($oldlist, $newlist);
if (count($justremoved) > 0) print_r($justremoved);
foreach ($justremoved as $key => $item) {
$removed .= "[[" . $item . "]], ";
}
$removed = substr($removed, 0, -2);
if ($removed == "Removed") {
$removed = "";
}
$summary = $added . $removed;
if ($summary == "") $summary = "Maintenance";
// Step 8: Submission.
if (count($justadded) > 0 || count($justremoved) > 0) {
query("delete from `rfc` where `category` = \"" . $abbreviation . "\"");
query("insert into `rfc` (`category`, `pagetitles`) values (\"" . $abbreviation . "\", \"" . mysql_real_escape_string(serialize($newlist)) . "\")");
}
$objwiki->edit($pagename,$RFC_submissions[$abbreviation],$summary,false,true);
}
$rfclisting .= "}}";
$objwiki->edit("Wikipedia:Dashboard/Requests for comment",$rfclisting,"Updating RFC listings",false,true);
// Step 9: Parsing WP:FRS to create FRS user list arrays.
$RFC_pagetitles["all"] = "Wikipedia:Requests for comment/All RFCs"; # This page does not actually exist. It is a hack to allow people to sign up to receive requests for all RFCs.
do {
$frs = $objwiki->getpage("Wikipedia:Feedback request service");
} while ($frs == "");
$frs = preg_replace("/\n+/", "", $frs); # Get rid of the newlines. Who needs 'em anyway?
preg_match("/==Requests for comment==.*<!-- END OF RFC SECTION. DO NOT REMOVE THIS COMMENT. -->/i", $frs, $m); # This might be the one time in my life I actually want a greedy regex.
$frs = str_replace("==Requests for comment==", "", $m[0]);
$frs = str_replace("===<!--rfc-->", "", $frs);
$frs = str_replace("<!-- END OF RFC SECTION. DO NOT REMOVE THIS COMMENT. -->", "", $frs);
$temppool = explode("===", $frs);
$counter = 0;
$frs_users = array();
$ineligible = array();
foreach ($RFC_pagetitles as $abbreviation => $pagetitle) {
echo "Compiling user index for " . $abbreviation . "\n";
$temppool[$counter] = str_replace(str_replace("Wikipedia:Requests for comment/", "", $pagetitle), "", $temppool[$counter]);
$temppool[$counter] = str_replace("===", "", $temppool[$counter]);
$counter += 1;
$prepool[$abbreviation] = explode("}}", $temppool[$counter]);
for ($i = 0; $i < count($prepool[$abbreviation]); $i++) {
if (in_array($prepool[$abbreviation][$i], $ineligible)) {
continue;
}
foreach ($frs_users as $key => $item) {
if (in_array($prepool[$abbreviation][$i], $frs_users[$key])) {
$frs_users[$abbreviation][] = $prepool[$abbreviation][$i]; // Qualifying for one of them qualifies you for all of them. This is to save processing time.
continue 2;
}
}
$prepool[$abbreviation][$i] = preg_replace("/\* ?\{\{frs user ?\s?\|\s?/i", "", $prepool[$abbreviation][$i]);
$prepool[$abbreviation][$i] = preg_replace("/\s+\|\s+/", "|", $prepool[$abbreviation][$i]);
// Finding reasons to disqualify users.
$test_value = $prepool[$abbreviation][$i];
$test_value = preg_replace("/\|\d+/", "", $test_value);
$test_value = str_replace(" ", "_", $test_value);
$last_edit_check = $objwiki->query("?action=query&list=usercontribs&ucuser=" . $test_value . "&format=php");
$lastedit = time() - strtotime($last_edit_check["query"]["usercontribs"][0]["timestamp"]);
$block_check = $objwiki->query("?action=query&list=users&ususers=" . $test_value . "&usprop=blockinfo&format=php");
if ($prepool[$abbreviation][$i] != "" # A hack to deal with blank entries in the array.
&& $prepool[$abbreviation][$i] != " " # See above.
&& $prepool[$abbreviation][$i] != "Unsorted" # The bot thinks the different categories are users (because of how I wrote the bot), and as it were, there actually is a User:Unsorted who didn't sign up but is listed because of the Unsorted category.
&& isset($last_edit_check["query"]["usercontribs"][0]["timestamp"]) # To check if a user exists.
&& $lastedit < 2592000 # To check if a user has edited in the past 30 days.
&& !isset($block_check["query"]["users"][0]["blockedby"]) # To check if a user has an extant block.
) {
$frs_users[$abbreviation][] = $prepool[$abbreviation][$i];
}
else {
echo $test_value . " is not eligible.\n";
$ineligible[] = $prepool[$abbreviation][$i];
}
}
for ($i = 0; $i < count($frs_users[$abbreviation]); $i++) {
$frs_users[$abbreviation][$i] = explode("|", $frs_users[$abbreviation][$i]);
if ($frs_users[$abbreviation][$i][1] == "" || $frs_users[$abbreviation][$i][1] == "limit") {
$frs_users[$abbreviation][$i][1] = 1;
}
}
}
unset($temppool);
unset($prepool);
// Step 10: Determining exemption on the basis of reaching user-defined request limit in a given month.
// A separate script resets the number of requests on record at the beginning of each month.
// The following comments should get you through understanding the array complex.
// $frs_users [ RFC category abbreviation ] [ number of user on list ] [ 0 == username; 1 == limit ]
// $pool [ RFC category abbreviation ] [ number of user on list ]
foreach ($frs_users as $abbreviation => $user) { # $user[0] is username; $user[1] is their limit
echo "Creating pool for " . $abbreviation . "\n";
for ($i = 0; $i < count($frs_users[$abbreviation]); $i++) {
$query = mysql_query("SELECT * from `frsuser` WHERE `username` = \"" . mysql_real_escape_string($frs_users[$abbreviation][$i][0]) . "\"") or die(mysql_error());
$row = mysql_fetch_assoc($query);
if (!isset($row['username'])) {
echo "Creating FRS user row for " . $frs_users[$abbreviation][$i][0] . "\n";
query("insert into `frsuser` (`username`, `reqcount`) values (\"" . mysql_real_escape_string($frs_users[$abbreviation][$i][0]) . "\", 0)");
$pool[$abbreviation][] = $frs_users[$abbreviation][$i][0];
}
elseif ($row['reqcount'] < $frs_users[$abbreviation][$i][1]) {
$pool[$abbreviation][] = $frs_users[$abbreviation][$i][0];
}
}
}
unset($frs_users);
print_r($pool["all"]);
// Step 11: Feedback requests sent out.
// Everything's been building to this, kids. Every variable is defined. All's in the system.
$query = mysql_query("SELECT * from `frs` WHERE `initround` = false");
while ($row = mysql_fetch_assoc($query)) {
// Creating a list specific to the RFC. For interdisciplinary RFCs, the contribution base is wider,
// so an array for 'candidates' is made based on merging these arrays together and then weeding out
// the redundant ones.
$candidates = array();
if (isset($listing[$row['id']])) {
echo "Processing article: " . $listing[$row['id']]['title'] . " with RFC ID: " . $row['id'] . "\n";
for ($i = 0; $i < count($listing[$row['id']]["category"]); $i++) {
if (is_array($pool[$listing[$row['id']]["category"][$i]])) {
$candidates = array_merge($candidates, $pool[$listing[$row['id']]["category"][$i]]);
}
}
if (count($pool["all"]) > 0) $candidates = array_merge($candidates, $pool["all"]);
$candidates = array_unique($candidates);
// Specific exemption for those who already participated.
// This is anticipating that an RFC may go through more than one request round.
$row['contacted'] = substr($row['contacted'], 1); # To cut out the comma at the beginning
$already_did = explode(",", $row['contacted']);
$candidates = array_diff($candidates, $already_did);
// Finally, the bot randomly selects who will be contacted.
if (count($candidates) == 0) break; // No users available for comment. Womp womp.
if (count($candidates) > 0 && count($candidates) <= 5) $random_count = 1; // $random_count is the number of people whose talk pages will be edited by the bot.
if (count($candidates) > 5 && count($candidates) <= 15) $random_count = 2;
if (count($candidates) > 15) $random_count = 3;
$randomuser = array();
for ($i = 0; $i < $random_count; $i++) {
// The reason for this switch is because for small enough candidate fields, some people were getting selected twice.
// For instance, if 0-4 were all the candidates, this would be an example random number generation: 3, 1, 3
// This led to people receiving two notifications. So I have resolved that with the following do-while loops.
switch ($i) {
case 0:
$randomuser[$i] = rand(1, count($candidates)) - 1;
$randomuser[$i] = $candidates[$randomuser[$i]];
break;
case 1:
do {
$draftee = rand(1, count($candidates)) - 1;
$draftee = $candidates[$draftee];
} while ($draftee == $randomuser[0]);
$randomuser[$i] = $draftee;
unset($draftee);
break;
case 2:
do {
$draftee = rand(1, count($candidates)) - 1;
$draftee = $candidates[$draftee];
} while ($draftee == $randomuser[0] || $draftee == $randomuser[1]);
$randomuser[$i] = $draftee;
unset($draftee);
break;
}
echo $randomuser[$i] . " will be contacted for an RFC at " . $listing[$row['id']]['title'] . "\n";
$randomuser_talkpage = $objwiki->getpage("User talk:" . $randomuser[$i]);
$objwiki->edit("User talk:" . $randomuser[$i],$randomuser_talkpage . "\n\n{{subst:FRS message|title=" . $listing[$row['id']]['title'] . "|rfcid=" . $row['id'] . "}} ~~~~","Please comment on [[" . $listing[$row['id']]['title'] . "]]",false,false);
query("delete from `frs` where `id` = \"" . $row['id'] . "\"");
$row['contacted'] = $row['contacted'] . "," . $randomuser[$i];
query("insert into `frs` (`id`, `contacted`, `initround`) values (\"" . mysql_real_escape_string($row['id']) . "\", \"" . mysql_real_escape_string($row['contacted']) . "\", 1)");
$innerquery = mysql_query("select * from `frsuser` where `username` = \"" . mysql_real_escape_string($randomuser[$i]) . "\"") or die(mysql_error());
while ($innerrow = mysql_fetch_assoc($innerquery)) {
$currentcount = $innerrow['reqcount'] + 1;
}
query("delete from `frsuser` where `username` = \"" . mysql_real_escape_string($randomuser[$i]) . "\"");
query("insert into `frsuser` (`username`, `reqcount`) values (\"" . mysql_real_escape_string($randomuser[$i]) . "\", " . $currentcount . ")");
}
}
}
// And that's it!
?>