User:DYKUpdateBot/Code: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
bugfix; {{Article history}} support for DYK nomination pages shouldn't override the hook
Line 1: Line 1:
Below is the code for [[User:DYKUpdateBot|DYKUpdateBot]]. The bot runs on [[WP:Pywikibot]].
Below is the code for [[User:DYKUpdateBot|DYKUpdateBot]]. Many thanks to the developers of the [http://jwbf.sourceforge.net/ JavaWikiBotFramework] (JWBF), which made this possible. The bot runs on [http://jwbf.svn.sourceforge.net/viewvc/jwbf/trunk/jwbf/src/net/sourceforge/jwbf/?pathrev=178 revision 178] of the JWBF and [http://commons.apache.org/lang/api-2.5/index.html version 2.5] of Apache's Commons Lang library.


<source lang="java">
<source lang="python">
import java.io.BufferedReader;
import os
import java.io.File;
import pathlib
import java.io.FileReader;
import pywikibot
import java.text.ParseException;
import mwparserfromhell
import java.text.SimpleDateFormat;
import html
from datetime import datetime, timedelta, timezone
import java.util.Arrays;
from functools import partial
import java.util.Calendar;
import java.util.Date;
from re import search
import java.util.GregorianCalendar;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Scanner;


import org.apache.commons.lang.StringEscapeUtils;
import org.jdom.Document;
import org.jdom.Element;


class DYKUpdateBot():
import net.sourceforge.jwbf.actions.mw.MediaWiki;
TDYK_LOC = 'Template:Did you know'
import net.sourceforge.jwbf.bots.MediaWikiBot;
NEXT_UPDATE_QUEUE_LOC = 'Template:Did you know/Queue/Next'
import net.sourceforge.jwbf.contentRep.mw.SimpleArticle;
LAST_UPDATE_TIME_LOC = 'Template:Did you know/Next update/Time'
TIME_BETWEEN_UPDATES_LOC = 'User:DYKUpdateBot/Time Between Updates'
QUEUE_ROOT_LOC = 'Template:Did you know/Queue/'
WTDYK_LOC = 'Wikipedia talk:Did you know'
ARCHIVE_LOC = 'Wikipedia:Recent additions'
ERROR_OUTPUT_LOC = 'User:DYKUpdateBot/Errors'
DRIFT_LOC = 'User:DYKUpdateBot/ResyncDrift'
SECONDS_BETWEEN_STATUS_CHECKS = 600
NUM_QUEUES = 7


def run(self) -> None:
public class DYKUpdateBot extends EnWikiBot {
DYKUpdateBotUtils.log('PID: {0}'.format(os.getpid()))


pywikibot.Site().login()
private static final String TDYKLoc = "Template:Did you know";
while self._is_on() and pywikibot.Site().logged_in():
private static final String QueueRootLoc = "Template:Did you know/Queue/";
DYKUpdateBotUtils.log(datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S %Z'))
private static final String TimeLoc = "Template:Did you know/Next update/Time";
private static final String NextUpdateQueueLoc = "Template:Did you know/Queue/Next";
private static final String ClearTemplate = "{{User:DYKUpdateBot/REMOVE THIS LINE}}";
private static final String TimeBetweenUpdatesLoc = "User:DYKUpdateBot/Time Between Updates";
private static final String ArchiveLoc = "Wikipedia:Recent additions";
private static final String WTDYKLoc = "Wikipedia talk:Did you know";
private static final String ErrorOutputLoc = "User:DYKUpdateBot/Errors";
private static final String DriftLoc = "User:DYKUpdateBot/ResyncDrift";
private static final String BaseCommonsAPIURL = "https://commons.wikimedia.org/w/";
private static final int TimeBetweenEdits = 5; // in seconds
private static final int TimeBetweenStatusChecks = 600; // in seconds
private static final int NumQueues = 7;
private static final int NumExceptionsBeforeAttemptedReset = 55;
private static final String[] shipTemplates = {"ship", "sclass", "Jsub",
"barge", "GTS", "HSC", "MS", "MV", "PS", "SS", "tugboat", "HMAS",
"HMCS", "HMNZS", "HMS", "RMS", "USAT", "USCGC", "USNS", "USRC", "USS",
"SMS", "SMU", "GS", "HNLMS", "HNoMS" };
private StringBuilder errorLog;
private int nextQueue;
public DYKUpdateBot(int timeBetweenEdits, int numExceptionsBeforeAttemptedReset,
String purgeLoc, String userName, String password) {
super(timeBetweenEdits, numExceptionsBeforeAttemptedReset, purgeLoc, userName,
password);
}
/**
* Loops every TimeBetweenStatusChecks seconds until it's time to update DYK
*/
public void run() {
boolean dykResetExceptionThrown = false;
do {
dykResetExceptionThrown = false;
try {
login();
nextQueue = findNextQueueNumber();
errorLog = new StringBuilder();
lastDelId = getLastDelId();
while (isOn()) {
checkifLoggedIn();
log(new Date().toString()); // output the date and time
// figure out when next update is
SimpleArticle dykTimePage = readContent(TimeLoc);
String dykTime = dykTimePage.getText().trim();
if (dykTime.lastIndexOf("\n") != -1) { // if there are multiple lines, get the last line
dykTime = dykTime.substring(dykTime.lastIndexOf("\n")).trim();
}
GregorianCalendar nextUpdateTime = new GregorianCalendar(BotLocale);
// first set it to the last update time
try {
nextUpdateTime.setTime(APITimestampFormat.parse(dykTime));
} catch (ParseException e) {
logError("Time at [[" + TimeLoc + "]] is not formatted correctly");
postErrors();
sleep(TimeBetweenStatusChecks * 1000);
continue;
}
// then get the number of seconds between updates, and add it
int timeBetweenUpdates;
try {
timeBetweenUpdates = Integer.parseInt(
readContent(TimeBetweenUpdatesLoc).getText().trim());
} catch (Exception e) {
logError("Time between updates at [[" + TimeBetweenUpdatesLoc +
"]] is not formatted correctly");
postErrors();
sleep(TimeBetweenStatusChecks * 1000);
continue;
}
// add the correct number of seconds to show the time for the next update
nextUpdateTime.add(Calendar.SECOND, timeBetweenUpdates);
// figure out what the current time is
GregorianCalendar currentTime = new GregorianCalendar(BotLocale);
// update DYK if it's time
long secondsUntilUpdate = (nextUpdateTime.getTimeInMillis() -
currentTime.getTimeInMillis())/1000;
log("Seconds left until next update: " + secondsUntilUpdate);
GregorianCalendar nextNextUpdateTime = new GregorianCalendar(BotLocale);
// calendar for checking if image is protected the whole time it's on the Main Page
nextNextUpdateTime.setTimeInMillis(nextUpdateTime.getTimeInMillis());
nextNextUpdateTime.add(Calendar.SECOND, timeBetweenUpdates);
if (secondsUntilUpdate <= 0) {
updateDYK(dykTimePage, timeBetweenUpdates, nextNextUpdateTime);
} else if (secondsUntilUpdate < 7200) {
checkFormatting(secondsUntilUpdate, nextNextUpdateTime);
}
postErrors();
if (secondsUntilUpdate < TimeBetweenStatusChecks && secondsUntilUpdate > 0) {
currentTime.setTime(new Date());
sleep(Math.abs(nextUpdateTime.getTimeInMillis() - currentTime.getTimeInMillis()));
} else {
sleep(TimeBetweenStatusChecks * 1000);
}
}
} catch (DYKResetException e) {
log("Reset exception caught, resetting...");
dykResetExceptionThrown = true;
} catch (Exception e) {
e.printStackTrace(System.out);
log("Exception occurred; exiting at " + new Date().toString());
}
} while (dykResetExceptionThrown);
}
/**
* Checks if all pages are formatted correctly for the next update
* If something's wrong, the bot will post to WT:DYK 2 hours before the update
* Most of this code is copied from updateDYK()
* @param number of seconds until the next update
* @param time when the update after next will go live;
* aka when the set for the next update will be taken off
*/
private void checkFormatting(long secondsUntilUpdate, GregorianCalendar nextNextUpdateTime) {
// figure out which queue is next
nextQueue = findNextQueueNumber();
if (nextQueue == 0) return; // couldn't parse
String wikilinkToQueue = "[[" + QueueRootLoc + nextQueue + "|Queue " + nextQueue + "]]";
// get the wikitext of the queue
String queueText = removeUnnecessarySpaces(readContent(QueueRootLoc + nextQueue).getText());
// make sure the queue has {{DYKbotdo}}
if (!queueText.contains("{{DYKbotdo")) {
logError(wikilinkToQueue + " is not tagged with {{tl|DYKbotdo}}");
if (secondsUntilUpdate < 7200) {
// post to WT:DYK if less than two hours left
try {
// get the text of the message and update it
Scanner in = new Scanner(new File("almostLate.txt"));
StringBuilder errorBuilder = new StringBuilder();
while (in.hasNext()) {
errorBuilder.append(in.nextLine()).append("\n");
}
in.close();
String errorMessage = errorBuilder.toString().trim();
while (errorMessage.contains("queueNum")) {
errorMessage = errorMessage.replace("queueNum", "" + nextQueue);
}
if (errorMessage.contains("hoursLeft")) {
errorMessage = errorMessage.replace("hoursLeft", "two hours");
}
String setIdentifier = APITimestampFormat.format(nextNextUpdateTime.getTime());
if (errorMessage.contains("uniqueSetIdentifier")) {
// if Template:Did you know/Next update/Time changes, the
// set identifier will also change
errorMessage = errorMessage.replace("uniqueSetIdentifier", setIdentifier);
}
do {
try {
SimpleArticle WTDYK = readContent(WTDYKLoc);
// edit WT:DYK if an alert isn't already posted for this set
if (!WTDYK.getText().contains(setIdentifier)) {
WTDYK.addText("\n\n" + errorMessage);
WTDYK.setEditSummary("DYK is almost late");
writeContent(WTDYK);
}
return;
} catch (EditConflictException e) {
log("Edit conflict caught");
// will try again because of while(true)
}
} while (true);
} catch (DYKResetException e) {
throw e;
} catch (Exception e) {
logError("Error occurred while posting 'dyk is late' message");
}
}
return; // don't continue checking for formatting errors, as the queue may be empty
}


results = ValidationResults()
// make sure the queue has <!--Hooks--> and <!--HooksEnd-->
seconds_until_next_update = DYKUpdateBot.SECONDS_BETWEEN_STATUS_CHECKS # placeholder
int indexOfHooksinQueue = queueText.indexOf("<!--Hooks-->");
time_next_update, time_next_update_leaving = self._calculate_next_update_time(results.rgstr_errors)
int indexOfHooksEndinQueue = queueText.indexOf("<!--HooksEnd-->", indexOfHooksinQueue);
if not results.rgstr_errors:
if (indexOfHooksinQueue == -1 || indexOfHooksEndinQueue == -1) {
time_now = pywikibot.Site().server_time().replace(tzinfo=timezone.utc)
logError(wikilinkToQueue + " is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>");
seconds_until_next_update = int((time_next_update - time_now).total_seconds())
return; // can't find hooks, bail out
DYKUpdateBotUtils.log('Seconds left until next update: {0}'.format(seconds_until_next_update))
}
String newHooks = queueText.substring(indexOfHooksinQueue + 12, indexOfHooksEndinQueue);
// make sure image doesn't have |right and is set to 100x100px
String newHooksLowerCase = newHooks.toLowerCase();
if (newHooksLowerCase.contains("[[file:") ||
newHooksLowerCase.contains("[[image:")) { // image file
int startIndex = Math.max(newHooksLowerCase.lastIndexOf("[[file:") + 7,
newHooksLowerCase.lastIndexOf("[[image:") + 8);
int endIndex = startIndex;
for (int i=1; newHooks.indexOf("]]", endIndex + 2) != -1; i++) {
endIndex = newHooks.indexOf("]]", endIndex + 2);
if (newHooks.substring(startIndex, endIndex).split("\\[\\[").length == i) {
break;
}
}
String imageWikitext = newHooks.substring(startIndex, endIndex);
if (imageWikitext.contains("|right")) {
logError("Warning: File formatting contains |right in " + wikilinkToQueue);
}
if (!imageWikitext.contains("100x100px")) {
logError("Warning: File size is not set to 100x100px in " + wikilinkToQueue);
}
}
// make sure all curly braces are matched
if (queueText.split("\\{\\{").length != queueText.split("\\}\\}").length) {
logError("Unmatched left <nowiki>(\"{{\") and right (\"}}\")</nowiki> curly braces in " + wikilinkToQueue);
}
// make sure file is protected
DYKFile incomingFile = findFile(newHooks);
if (incomingFile != null) checkIfProtected(incomingFile.getFilename(), nextNextUpdateTime, true);
// fetch T:DYK
String dykMainText = readContent(TDYKLoc).getText();
// make sure T:DYK has <!--Hooks--> and <!--HooksEnd-->
int indexOfHooksonTDYK = dykMainText.indexOf("<!--Hooks-->");
int indexOfHooksEndonTDYK = dykMainText.indexOf("<!--HooksEnd-->", indexOfHooksonTDYK);
if (indexOfHooksonTDYK == -1 || indexOfHooksEndonTDYK == -1) {
logError("[[" + TDYKLoc + "]] is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>");
}
}
/**
* Updates DYK
* @param the page indicating the time of the last update
* @param time when the update after next will go live;
* aka when the set for the next update will be taken off
*/
private void updateDYK(SimpleArticle dykTimePage, final int timeBetweenUpdates,
GregorianCalendar nextNextUpdateTime) {
// figure out which queue to update from
nextQueue = findNextQueueNumber();
if (nextQueue == 0) return; // couldn't parse
// get the wikitext of the queue
SimpleArticle queue = new SimpleArticle(readContent(QueueRootLoc + nextQueue));
String queueText = removeUnnecessarySpaces(queue.getText());
// make sure the queue has {{DYKbotdo}}
int dykbotdoIndex = queueText.indexOf("{{DYKbotdo");
String wikilinkToQueue = "[[" + QueueRootLoc + nextQueue + "|Queue " + nextQueue + "]]";
if (dykbotdoIndex == -1) {
logError(wikilinkToQueue + " is not tagged with {{tl|DYKbotdo}}");
return;
}
String dykbotdo = queueText.substring(dykbotdoIndex, queueText.indexOf("\n", dykbotdoIndex)).trim();
// make sure the queue has <!--Hooks--> and <!--HooksEnd-->, then find hooks
int indexOfHooksinQueue = queueText.indexOf("<!--Hooks-->");
int indexOfHooksEndinQueue = queueText.indexOf("<!--HooksEnd-->", indexOfHooksinQueue);
if (indexOfHooksinQueue == -1 || indexOfHooksEndinQueue == -1) {
logError(wikilinkToQueue + " is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>");
return;
}
queueText = checkIfEachHookOnNewLine(queueText, indexOfHooksinQueue, indexOfHooksEndinQueue);
indexOfHooksEndinQueue = queueText.indexOf("<!--HooksEnd-->", indexOfHooksinQueue); // this may have changed from above line
String newHooks = queueText.substring(indexOfHooksinQueue + 12, indexOfHooksEndinQueue);
// make sure all curly braces are matched
if (queueText.split("\\{\\{").length != queueText.split("\\}\\}").length) {
logError("Unmatched left <nowiki>(\"{{\") and right (\"}}\")</nowiki> curly braces in " + wikilinkToQueue);
return;
}
// make sure the image/file is protected
DYKFile incomingFile = findFile(newHooks);
if (incomingFile != null && !checkIfProtected(incomingFile.getFilename(), nextNextUpdateTime, true)) {
return;
}
// fetch T:DYK
SimpleArticle dykMain = new SimpleArticle(readContent(TDYKLoc));
String dykMainText = dykMain.getText();
// make sure T:DYK has <!--Hooks--> and <!--HooksEnd-->, then find hooks
int indexOfHooksonTDYK = dykMainText.indexOf("<!--Hooks-->");
int indexOfHooksEndonTDYK = dykMainText.indexOf("<!--HooksEnd-->", indexOfHooksonTDYK);
if (indexOfHooksonTDYK == -1 || indexOfHooksEndonTDYK == -1) {
logError("[[" + TDYKLoc + "]] is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>");
return;
}
// replace old hooks with new hooks
String oldHooks = dykMainText.substring(indexOfHooksonTDYK + 12, indexOfHooksEndonTDYK).trim();
dykMainText = dykMainText.substring(0, indexOfHooksonTDYK + 12) + newHooks +
dykMainText.substring(indexOfHooksEndonTDYK, dykMainText.length());
GregorianCalendar time = new GregorianCalendar(BotLocale);
// edit T:DYK
dykMain.setText(dykMainText);
dykMain.setEditSummary("Bot automatically updating DYK template with hooks copied from " +
"[[" + QueueRootLoc + nextQueue + "|" + "queue " + nextQueue + "]]");
try {
dykMain.setEditTimestamp(OverrideEditConflicts);
} catch (ParseException e) {} // impossible
writeContent(dykMain);
// purge the main page
purge("Main Page", true);
// reset DYK time
String dykTimePageText = dykTimePage.getText();
String dykTime = dykTimePageText.trim();
if (dykTime.lastIndexOf("\n") != -1) { // if there are multiple lines, get the last line
dykTime = dykTime.substring(dykTime.lastIndexOf("\n")).trim();
}
String timeEditSummary = "Resetting the clock";
GregorianCalendar writeTime = new GregorianCalendar(BotLocale);
writeTime.setTimeInMillis(time.getTimeInMillis());
writeTime.set(Calendar.SECOND, 0);
writeTime.set(Calendar.MILLISECOND, 0);
int drift = calculateDrift(writeTime, timeBetweenUpdates);
if (drift != 0) {
writeTime.add(Calendar.MINUTE, drift);
timeEditSummary += ", with drift";
}
String wikiTimeString = APITimestampFormat.format(new Date(writeTime.getTimeInMillis()));
dykTimePage.setText(dykTimePageText.substring(0, dykTimePageText.indexOf(dykTime)) +
wikiTimeString);
dykTimePage.setEditSummary(timeEditSummary);
try {
dykTimePage.setEditTimestamp(OverrideEditConflicts);
} catch (ParseException e) {} // impossible
writeContent(dykTimePage);
// find old file and associated tags
DYKFile file = findFile(oldHooks);
checkFileTags(file);
// archive old hooks
archive(oldHooks, time, file);
// remove any commented-out wikitext from queueText
while (queueText.indexOf("<!--") != -1) {
int endCommentIndex = queueText.indexOf("-->") + 3;
if (endCommentIndex == -1) {
endCommentIndex = queueText.length();
}
queueText = queueText.substring(0, queueText.indexOf("<!--")) +
queueText.substring(endCommentIndex);
}
// parse the credits
LinkedList<DYKCredit> credits = parseCredits(queueText, newHooks);
// tag article talk pages
tagArticles(time, credits);
// tag user talk pages
giveUserCredits(credits, dykbotdo);
// clear queue
queue.setText(ClearTemplate);
queue.setEditSummary("Update is done, removing the hooks");
try {
queue.setEditTimestamp(OverrideEditConflicts);
} catch (ParseException e) {} // impossible
writeContent(queue);
// update next queue number
int updatedNextQueue = (nextQueue % NumQueues) + 1;
SimpleArticle nextQueuePage = new SimpleArticle("" + updatedNextQueue, NextUpdateQueueLoc);
nextQueuePage.setEditSummary("Next queue is [[" + QueueRootLoc + updatedNextQueue + "|" +
"queue " + updatedNextQueue + "]]");
try {
nextQueuePage.setEditTimestamp(OverrideEditConflicts);
} catch (ParseException e) {} // impossible
writeContent(nextQueuePage);
// delete/unprotect and tag outgoing file
//boolean fileDeleted = deleteFile(file); // Jan 2017 - delete and unprotect don't work,
//if (!fileDeleted) unprotectFile(file); // likely due to authentication issues
tagFile(file, time);


if seconds_until_next_update < 7200:
nextQueue = updatedNextQueue;
self.validate_before_update(results, time_next_update_leaving)
}
if seconds_until_next_update <= 0:
results.timedelta_between_updates = time_next_update_leaving - time_next_update
/**
self.update_dyk(time_now, results)
* Reads the next queue number from NextUpdateQueueLoc
* @return next queue number, or 0 if there was an error parsing
*/
private int findNextQueueNumber() {
SimpleArticle nextQueuePage = new SimpleArticle(readContent(NextUpdateQueueLoc));
int nextQueue = 0;
try {
nextQueue = Integer.parseInt(nextQueuePage.getText());
} catch (NumberFormatException e) {
logError("Could not parse [[" + NextUpdateQueueLoc + "]]; check if it's a number 1-" + NumQueues);
}
return nextQueue;
}
/**
*
* @param updateTime the calendar corresponding to the time of the current update
* @param timeBetweenUpdates in seconds
* @return drift in minutes; negative is advance, positive is delay
*/
private int calculateDrift(GregorianCalendar updateTime, final int timeBetweenUpdates) {
final long millisecondsPerMinute = 60 * 1000;
final long millisecondsPerDay = 24 * 60 * millisecondsPerMinute; //86400000
long leastDifferenceFrom0000 = Long.MAX_VALUE;
HashSet<Long> differences = new HashSet<Long>();
GregorianCalendar updateIter = new GregorianCalendar(BotLocale);
updateIter.setTimeInMillis(updateTime.getTimeInMillis());
while (true) {
long currentDifferenceFrom0000 = updateIter.getTimeInMillis()%millisecondsPerDay;
if (currentDifferenceFrom0000 > millisecondsPerDay/2) {
currentDifferenceFrom0000 = -(millisecondsPerDay - currentDifferenceFrom0000);
}
if (Math.abs(leastDifferenceFrom0000) > Math.abs(currentDifferenceFrom0000)) {
leastDifferenceFrom0000 = currentDifferenceFrom0000;
}
if (differences.contains(currentDifferenceFrom0000) || differences.size() >= 24) {
break;
}
differences.add(currentDifferenceFrom0000);
updateIter.add(Calendar.SECOND, timeBetweenUpdates);
}
String driftText = readContent(DriftLoc).getText();
int maxAdvance = 0;
int maxDelay = 0;
try {
String[] driftLines = driftText.split("\n");
maxAdvance = Integer.parseInt(driftLines[0].split(":")[1].trim()); // in minutes
maxDelay = Integer.parseInt(driftLines[1].split(":")[1].trim());; //in minutes
} catch (Exception e) {
log("Couldn't parse drift");
return 0;
}
if (leastDifferenceFrom0000 > 0) {
return -Math.min(maxAdvance, (int)(leastDifferenceFrom0000/millisecondsPerMinute));
} else if (leastDifferenceFrom0000 < 0) {
return Math.min(maxDelay, (int)(-leastDifferenceFrom0000/millisecondsPerMinute));
} else {
return 0;
}
}
/**
* Archives the latest set to ArchiveLoc
* @param the hooks to be archived
* @param a Calendar object containing the time that DYK was updated
*/
private void archive(String hooks, Calendar updateTime, DYKFile file) {
do {
try {
if (file != null) {
// if the file was cropped, point to the original file in the archives
String originalFile = file.getCroppedFrom();
if (originalFile != null) {
int fileStartIndex = hooks.indexOf(file.getFilename());
hooks = hooks.substring(0, fileStartIndex) + originalFile + hooks.substring(fileStartIndex + file.getFilename().length());
}
}
SimpleArticle archivePage = new SimpleArticle(readContent(ArchiveLoc));
String timeHeading = new SimpleDateFormat("'*'''''''''''HH:mm, d MMMM yyyy '(UTC)'''''''''''", BotLocale).
format(updateTime.getTime());
String sectionHeading = new SimpleDateFormat("'==='d MMMM yyyy'==='", BotLocale).format(updateTime.getTime());
String archiveText = archivePage.getText();
// check if there is a section heading already for today
int thisDateIndex = archiveText.indexOf(sectionHeading);
if (thisDateIndex == -1) { // if there isn't, create a new section heading and add the new set
int firstSectionIndex = archiveText.indexOf("===", archiveText.indexOf("<!--BOTPOINTER-->"));
if (firstSectionIndex == -1) { // if no archive sections exist (ie at the very beginning of a month)
firstSectionIndex = archiveText.indexOf("\n", archiveText.indexOf("<!--BOTPOINTER-->")) + 1;
}
archiveText = archiveText.substring(0, firstSectionIndex) +
sectionHeading + "\n" + timeHeading + "\n" + hooks + "\n\n" +
archiveText.substring(firstSectionIndex);
} else { // otherwise add the set under the section heading for today
int writeIndex = thisDateIndex + sectionHeading.length();
archiveText = archiveText.substring(0, writeIndex) + "\n" + timeHeading + "\n" +
hooks + "\n" + archiveText.substring(writeIndex);
}
archivePage.setText(archiveText);
archivePage.setEditSummary("Archiving latest set");
writeContent(archivePage);
return;
} catch (EditConflictException e) {
log("Edit conflict caught");
// will try again because of while(true)
} catch (DYKResetException e) {
throw e;
} catch (Exception e) {
logError("Error occurred while archiving");
return;
}
} while(true);
}
/**
* Parses the credits; associates each article title with the user to be credited
* and the hook
* @param the wikitext of the queue
* @param the hooks in the queue
* @return parsed credits
*/
private LinkedList<DYKCredit> parseCredits(String queueText, String hooks) {
LinkedList<DYKCredit> credits = new LinkedList<DYKCredit>();
// unescape all html encoding in the hooks; for example, "M&amp;M" will become "M&M"
hooks = StringEscapeUtils.unescapeHtml(hooks);
// find all credit templates and parse article titles, users, and hooks
int dykMakeIndex = queueText.indexOf("{{DYKmake");
int dykNomIndex = queueText.indexOf("{{DYKnom");
while (dykMakeIndex != -1 || dykNomIndex != -1) {
int nextCreditIndex;
if (dykMakeIndex == -1) {
nextCreditIndex = dykNomIndex;
} else if (dykNomIndex == -1) {
nextCreditIndex = dykMakeIndex;
} else {
nextCreditIndex = Math.min(dykMakeIndex, dykNomIndex);
}
int closeTemplateIndex = queueText.indexOf("}}", nextCreditIndex);
int closeTemplatesEncountered = 1;
while (queueText.substring(nextCreditIndex + 2, closeTemplateIndex)
.split("\\{\\{").length > closeTemplatesEncountered) {
closeTemplateIndex = queueText.indexOf("}}", closeTemplateIndex+2);
++closeTemplatesEncountered;
}
String creditTemplate = queueText.substring(nextCreditIndex, closeTemplateIndex + 2);
boolean dykMake = (nextCreditIndex == dykMakeIndex);


self._post_errors(results.rgstr_warnings, results.rgstr_errors)
// these next two lines are the "increment" part of the while loop
results = None
dykMakeIndex = queueText.indexOf("{{DYKmake", nextCreditIndex + 1);
dykNomIndex = queueText.indexOf("{{DYKnom", nextCreditIndex + 1);
// end increment
LinkedList<String> creditTemplatePieces = new LinkedList<String>(Arrays.asList(
creditTemplate.substring(2, creditTemplate.length() - 2).split("\\|")));
int numContinuing = 0;
for (int i=0; i < creditTemplatePieces.size(); ) {
boolean continuation = numContinuing > 0;
int numOpenTemplates = creditTemplatePieces.get(i).split("\\{\\{").length - 1;
int numCloseTemplates = creditTemplatePieces.get(i).split("\\}\\}").length - 1;
numContinuing = numContinuing + numOpenTemplates - numCloseTemplates;
if (continuation) {
creditTemplatePieces.set(i - 1, creditTemplatePieces.get(i - 1) + "|" + creditTemplatePieces.get(i));
creditTemplatePieces.remove(i);
} else {
++i;
}
}
String title = null;
String user = null;
String subpage = null;
boolean firstPiece = true;
boolean invalidCreditTemplate = false;
int unnamedParamsSeen = 0;
for (String piece : creditTemplatePieces) {
String trimmedPiece = piece.trim();
if (firstPiece) {
if (!(trimmedPiece.equals("DYKmake") || trimmedPiece.equals("DYKnom"))) {
invalidCreditTemplate = true;
break;
}
firstPiece = false;
} else {
int firstEqualsIndex = piece.indexOf('=');
if (firstEqualsIndex != -1) {
String paramName = piece.substring(0, firstEqualsIndex).trim();
String paramValue = piece.substring(firstEqualsIndex + 1).trim();
paramValue = StringEscapeUtils.unescapeHtml(paramValue);
if (paramName.equals("1")) title = paramValue;
else if (paramName.equals("2")) user = paramValue;
else if (paramName.equals("subpage")) subpage = paramValue;
else {
logError("Invalid credit template: <nowiki>" + creditTemplate + "</nowiki>");
invalidCreditTemplate = true;
break;
}
} else {
trimmedPiece = StringEscapeUtils.unescapeHtml(trimmedPiece);
if (unnamedParamsSeen == 0) { // first unnamed param is title
title = trimmedPiece;
} else if (unnamedParamsSeen == 1) { // second is user
user = trimmedPiece;
} else {
logError("Invalid credit template: <nowiki>" + creditTemplate + "</nowiki>");
invalidCreditTemplate = true;
break;
}
++unnamedParamsSeen;
}
}
}
if (title == null || user == null) {
logError("Invalid credit template: <nowiki>" + creditTemplate + "</nowiki>");
invalidCreditTemplate = true;
}
if (invalidCreditTemplate) continue;
// check for common formatting errors
if (title.startsWith("[[")) {
title = title.substring(2);
}
if (title.endsWith("]]")) {
title = title.substring(0, title.length() - 2);
}
if (title.equals("Example") || title.isEmpty()) {
continue;
}
boolean errorInArticleTitle = false;
String hook = null;
// make sure the title corresponds to a real article
title = title.substring(0, 1).toUpperCase() + title.substring(1); // capitalize first letter
SimpleArticle article = new SimpleArticle(readContent(title));
if (article.getText().isEmpty()) { // if the article's been deleted, or otherwise nonexistent
logError("Article [[" + title + "]] does not exist");
errorInArticleTitle = true;
} else {
hook = findHook(hooks, title);
}
if (!errorInArticleTitle) {
String redirectTo = checkForPageRedirect(article.getText());
if (redirectTo != null) {
article = new SimpleArticle(readContent(redirectTo));
if (article.getText().isEmpty()) {
logError("Article [[" + title + "]] is a redirect to a deleted article");
errorInArticleTitle = true;
} else if (hook == null) { // if there was no matching hook before, try again
hook = findHook(hooks, article.getLabel());
}
}
}
if (!errorInArticleTitle && hook == null) {
// if we couldn't find the hook before, let's try other options
// check for redirects to the given page
LinkedList<String> otherPossibleTitles = findRedirectsToPage(article.getLabel(), 50);
// check for odd characters (like &nbsp;)
String normalizedTitle = normalizeTitle(article.getLabel());
if (!article.getLabel().equals(normalizedTitle)) {
otherPossibleTitles.add(normalizedTitle);
}
for (String possibility : otherPossibleTitles) {
hook = findHook(hooks, possibility);
if (hook != null) break;
}
if (hook == null) {
hook = findHook(hooks.replaceAll(StringEscapeUtils.unescapeHtml("&nbsp;"), " "),
article.getLabel());
}
}
if (!errorInArticleTitle && hook == null) {
logError("Couldn't find hook for [[" + title + "]]");
}
if (user.contains("}}")) user = expandTemplates(user);
String userTalkPage = validateUserTalkPage(user); // make sure this is a valid user talk page
credits.add(new DYKCredit(article.getLabel(), userTalkPage, hook, errorInArticleTitle, dykMake, subpage));
}
return credits;
}
/**
* Finds the hook of the title given in the hooks given
* @param hooks in the set
* @param title of the article
* @return the article's hook, or null if none found
*/
private String findHook(String hooks, String title) {
String hook = null;
// convert to lower case and underscores to spaces
String normalizedTitle = title.replaceAll("_", " ").toLowerCase();
int titleIndex = hooks.toLowerCase().indexOf(normalizedTitle);
while (titleIndex != -1 && (hook == null || hook.contains("px|") ||
hook.contains("100x100px") || hook.toLowerCase().contains("{{dyk listen")
|| hook.toLowerCase().contains("{{main page image"))) {
// "px" parts are in case the image caption or filename has the title
int startOfHook = hooks.lastIndexOf("\n", titleIndex);
if (startOfHook == -1) startOfHook = 0;
int endOfHook = hooks.indexOf("\n", titleIndex);
if (endOfHook == -1) endOfHook = hooks.length();
hook = hooks.substring(startOfHook, endOfHook).trim();
titleIndex = hooks.toLowerCase().indexOf(normalizedTitle,
titleIndex + normalizedTitle.length());
}
if (hook == null || hook.contains("px|") || hook.contains("100x100px")
|| hook.toLowerCase().contains("{{dyk listen")
|| hook.toLowerCase().contains("{{main page image")) {
hook = findShipHook(hooks, title);
if (hook == null) return null;
}
// hook formatting
while (hook.substring(hook.length() - 4).equalsIgnoreCase("<br>")) {
// http://en.wikipedia.org/w/index.php?title=Template:Did_you_know&oldid=2521104
hook = hook.substring(0, hook.length() - 4).trim();
}
if (hook.substring(hook.length() - 5).equalsIgnoreCase("</li>")) {
// http://en.wikipedia.org/w/index.php?title=Template:Did_you_know&oldid=9218861
hook = hook.substring(0, hook.length() - 5).trim();
}
if (hook.substring(0, 4).equalsIgnoreCase("<li>")) {
hook = hook.substring(4, hook.length()).trim();
}
if (hook.charAt(0) == '*') {
hook = hook.substring(1);
}
if (hook.substring(0, 7).equals("{{*mp}}")) {
hook = hook.substring(7);
}
if (hook.contains("{{*mp}}")) {
log("Hook for [[" + title + "]] has an extra {{*mp}}; hook mashup?");
}
return hook;
}
/**
* Finds the hook of the title given if a ship template is used (like {{SS}})
* @param hooks in the set
* @param title of the article
* @return the article's hook, or null if none found
*/
private String findShipHook(String hooks, String title) {
int i = 3;
// figure out which template matches the title
for (; i < shipTemplates.length; ++i) {
if (title.toLowerCase().startsWith(shipTemplates[i].toLowerCase())) break;
}
if (i == shipTemplates.length) {
if (title.toLowerCase().contains(" class ") && hooks.toLowerCase().contains("{{sclass")) {
// looks there's a possible match with {{sclass}} or {{sclass2}}
i = 1;
} else if (title.toLowerCase().startsWith("japanese submarine") && hooks.toLowerCase().contains("{{jsub")) {
// match with {{Jsub}}
i = 2;
} else if (hooks.toLowerCase().contains("{{ship")) {
// if none of the specific templates match, maybe {{ship}} will
i = 0;
} else {
return null;
}
}
for (int templateIndex = hooks.toLowerCase().indexOf("{{" + shipTemplates[i].toLowerCase());
templateIndex != -1;
templateIndex = hooks.toLowerCase().indexOf("{{" + shipTemplates[i].toLowerCase(), templateIndex + 2)) {
// find the ship template
int endIndex = templateIndex;
for (int j=2; hooks.indexOf("}}", endIndex + 2) != -1; j++) {
endIndex = hooks.indexOf("}}", endIndex + 2);
if (hooks.substring(templateIndex, endIndex).split("\\{\\{").length == j) {
break;
}
}
String template = hooks.substring(templateIndex + 2, endIndex);
// parse the ship template and assemble it into a title
String[] cutup = template.split("\\|");
int base = 0;
if (i < 2) base = 1;
String titleFromTemplate;
if (i != 1) {
titleFromTemplate = cutup[base].trim() + " " + cutup[base+1].trim();
if (cutup.length >= base+3 && !cutup[base+2].isEmpty() &&
!cutup[base+2].trim().equals("3=2")) {
titleFromTemplate += " (" + cutup[base+2].trim() + ")";
}
} else { // {{sclass}} and {{sclass2}}
titleFromTemplate = cutup[base].trim() + " class " + cutup[base+1].trim();
}
if (i == 2) {
titleFromTemplate = "Japanese submarine " + titleFromTemplate.substring(5);
}
// if the title from the credits and the title assembled from the template
// match, we've found the correct hook
if (titleFromTemplate.equalsIgnoreCase(title)) {
return findHook(hooks, template);
}
}
return null;
}


seconds_to_sleep = DYKUpdateBot.SECONDS_BETWEEN_STATUS_CHECKS
/**
if seconds_until_next_update > 0:
* Tag article talk pages
seconds_to_sleep = min(seconds_to_sleep, seconds_until_next_update)
* If {{Article history}} exists on the talk page, the bot will add the DYK credit there instead of
pywikibot.sleep(seconds_to_sleep)
* adding a new {{DYK talk}}
* @param a Calendar containing the time that DYK was last updated
* @param the credits (contains article title, username, and hook)
*/
private void tagArticles(Calendar time, LinkedList<DYKCredit> credits) {
// make the start of a DYK talk tag without the hook
String tag = new SimpleDateFormat("'{{DYK talk|'d MMMM'|'yyyy", BotLocale).format(time.getTime());
String editSummaryTimestamp = new SimpleDateFormat("d MMMM yyyy", BotLocale).format(time.getTime());
HashSet<String> taggedArticles = new HashSet<String>();


def _calculate_next_update_time(self, rgstr_errors) -> (pywikibot.Timestamp, pywikibot.Timestamp):
// tag articles
page_last_update_time = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.LAST_UPDATE_TIME_LOC)
for (DYKCredit credit: credits) {
time_next_update = datetime.now(timezone.utc) # placeholder
if (credit.errorInArticleTitle) continue;
try:
if (taggedArticles.contains(credit.articleTitle)) continue;
time_next_update = pywikibot.Timestamp.fromISOformat(page_last_update_time.text.strip()).replace(tzinfo=timezone.utc)
boolean editConflicted = false;
except:
do {
self._log_error(rgstr_errors, 'Time at [[' + DYKUpdateBot.LAST_UPDATE_TIME_LOC +
editConflicted = false;
']] is not formatted correctly')
try {
return time_next_update, time_next_update
// build up the tag
String tagWithHook = tag;
if (credit.hook != null) {
tagWithHook += "|entry=" + credit.hook;
}
if (credit.nompage != null) {
tagWithHook += "|nompage=" + credit.nompage;
}
tagWithHook += "}}";


page_time_between_updates = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.TIME_BETWEEN_UPDATES_LOC)
// get the talk page
seconds_between_updates = 0 # placeholder
SimpleArticle talkPage = new SimpleArticle(readContent("Talk:" + credit.articleTitle));
try:
String talkContent = talkPage.getText();
seconds_between_updates = int(page_time_between_updates.text)
if (talkContent.isEmpty()) talkPage.setEditTimestamp(OverrideEditConflicts);
except ValueError:
String talkContentLowerCase = talkContent.toLowerCase();
self._log_error(rgstr_errors, 'Time between updates at [[' + DYKUpdateBot.TIME_BETWEEN_UPDATES_LOC +
int articleHistoryIndex = Math.max(talkContentLowerCase.indexOf("{{article history"),
']] is not formatted correctly')
talkContentLowerCase.indexOf("{{articlehistory"));
return time_next_update, time_next_update
if (articleHistoryIndex != -1) {
// if it has {{Article history}}
String articleHistory = talkContent.substring(articleHistoryIndex,
talkContent.indexOf("}}", articleHistoryIndex));
String articleHistoryNew = new String(articleHistory);
int currentStatusIndex = articleHistoryNew.indexOf("currentstatus");
while (currentStatusIndex != -1 &&
!articleHistoryNew.substring(0, currentStatusIndex).trim().endsWith("|")) {
currentStatusIndex = articleHistoryNew.indexOf("currentstatus", currentStatusIndex + 13);
}
String currentStatusString = "|currentstatus"; //default
if (currentStatusIndex != -1) {
int temp = currentStatusIndex;
currentStatusIndex = articleHistoryNew.lastIndexOf("|", currentStatusIndex);
currentStatusString = articleHistoryNew.substring(currentStatusIndex, temp + 13);
}
int dykdateIndex = articleHistoryNew.indexOf("dykdate");
while (dykdateIndex != -1 &&
!articleHistoryNew.substring(0, dykdateIndex).trim().endsWith("|")) {
dykdateIndex = articleHistoryNew.indexOf("dykdate", dykdateIndex + 7);
}
int pipeAfterIndex = articleHistoryNew.indexOf("|", dykdateIndex + 7);
if (pipeAfterIndex == -1) {
pipeAfterIndex = articleHistoryNew.length();
}
boolean blankDYKdate = articleHistoryNew.substring(dykdateIndex + 7,
pipeAfterIndex).trim().equals("=");
String dykDateParam = editSummaryTimestamp;
String dykEntryAndNom = "";
if (credit.hook != null) dykEntryAndNom += "\n|dykentry=" + credit.hook;
if (credit.nompage != null) dykEntryAndNom += "\n|dyknom=" + credit.nompage;
if (currentStatusIndex == -1 && dykdateIndex == -1) {
// if there's no currentStatus or dykdate
logError("Could not tag [[" + credit.articleTitle +
"]] by {{tl|Article history}}; please tag article manually");
} else if (dykdateIndex == -1 || blankDYKdate) {
if (dykdateIndex == -1) {
articleHistoryNew = articleHistoryNew.replace(currentStatusString, "|dykdate=" +
dykDateParam + dykEntryAndNom + "\n" + currentStatusString);
} else if (blankDYKdate) {
String dykdateOld = articleHistoryNew.substring(dykdateIndex,
articleHistoryNew.indexOf("=", dykdateIndex) + 1);
// remove old |dykentry if it exists
int dykentryIndex = articleHistoryNew.indexOf("dykentry", dykdateIndex);
if (dykentryIndex != -1) {
int onePastPipeAfterEntryIndex = articleHistoryNew.indexOf("|", dykentryIndex);
if (onePastPipeAfterEntryIndex == -1) {
onePastPipeAfterEntryIndex = articleHistoryNew.length();
} else {
++onePastPipeAfterEntryIndex;
}
articleHistoryNew = articleHistoryNew.substring(0, dykentryIndex) +
articleHistoryNew.substring(onePastPipeAfterEntryIndex);
}
// remove old |dyknom if it exists
int dyknomIndex = articleHistoryNew.indexOf("dyknom", dykdateIndex);
if (dyknomIndex != -1) {
int onePastPipeAfterNomIndex = articleHistoryNew.indexOf("|", dyknomIndex);
if (onePastPipeAfterNomIndex == -1) {
onePastPipeAfterNomIndex = articleHistoryNew.length();
} else {
++onePastPipeAfterNomIndex;
}
articleHistoryNew = articleHistoryNew.substring(0, dyknomIndex) +
articleHistoryNew.substring(onePastPipeAfterNomIndex);
}
articleHistoryNew = articleHistoryNew.replace(dykdateOld, dykdateOld +
dykDateParam + dykEntryAndNom);
}
talkContent = talkContent.replace(articleHistory, articleHistoryNew);
talkPage.setText(talkContent.trim());
talkPage.setEditSummary("Article appeared on [[WP:Did you know|DYK]] on " +
editSummaryTimestamp + ", adding to " +
"{{[[Template:Article history|Article history]]}}");
writeContent(talkPage);
} else {
log("{{Article history}} up to date for article " + credit.articleTitle);
}
} else { // if it doesn't have {{Article history}}, add a new tag
int indexOfFirstSection = talkContent.indexOf("==");
if (indexOfFirstSection == -1) indexOfFirstSection = talkContent.length();
String zeroSection = talkContent.substring(0, indexOfFirstSection);
String theRest = talkContent.substring(indexOfFirstSection);
int lastTemplateIndex = findLastTemplateIndex(zeroSection);
String zeroSectionA = zeroSection.substring(0, lastTemplateIndex);
String zeroSectionB = zeroSection.substring(lastTemplateIndex);
talkContent = zeroSectionA.trim() + "\n" + tagWithHook + "\n\n" + zeroSectionB + theRest;
talkPage.setText(talkContent.trim());
talkPage.setEditSummary("Article appeared on [[WP:Did you know|DYK]] on " +
editSummaryTimestamp + ", adding {{[[Template:DYK talk|DYK talk]]}}");
writeContent(talkPage);
}
taggedArticles.add(credit.articleTitle);
} catch (EditConflictException e) {
log("Edit conflict caught");
editConflicted = true;
} catch (DYKResetException e) {
throw e;
} catch (Exception e) {
logError("Error occurred when attempting to tag [[" + credit.articleTitle + "]]");
}
} while (editConflicted);
}
}
/**
* Tag user talk pages
* @param the credits (contains article title, username, and hook)
* @param the {{DYKbotdo}} template
*/
private void giveUserCredits(LinkedList<DYKCredit> credits, String dykbotdo) {
for (DYKCredit credit : credits) {
if (credit.userTalkPage == null) continue;
boolean editConflicted = false;
do {
editConflicted = false;
try {
// tag user talk page
SimpleArticle userTalk = readContent(credit.userTalkPage);
if (userTalk.getText().isEmpty()) userTalk.setEditTimestamp(OverrideEditConflicts);
userTalk.addText("\n\n==DYK for " + credit.articleTitle + "==");
String creditTemplate;
if (credit.dykMake) { // if it's {{DYKmake}}
creditTemplate = "\n{{subst:Template:DYKmake/DYKmakecredit";
} else { // if it's {{DYKNom}}
creditTemplate = "\n{{subst:Template:DYKnom/DYKnomcredit";
}
creditTemplate += " |article=" + credit.articleTitle;
if (credit.hook != null) creditTemplate += " |hook=" + credit.hook;
if (credit.nompage != null) creditTemplate += " |nompage=" + credit.nompage;
creditTemplate += " |optional= }} ";
userTalk.addText(creditTemplate);
int dykBotDoPipeIndex = dykbotdo.indexOf("|");
if (dykBotDoPipeIndex == -1) {
userTalk.addText("~~~~");
} else {
userTalk.addText(dykbotdo.substring(dykBotDoPipeIndex + 1,
dykbotdo.lastIndexOf("}}")));
userTalk.addText(" ~~~~~");
}
// form edit summary
String adminUsername = findUserLink(dykbotdo);
String editSummary = "Giving DYK credit for [[" + credit.articleTitle + "]]";
if (adminUsername != null) {
editSummary += " on behalf of [[User:" + adminUsername + "|" +
adminUsername + "]]";
}
userTalk.setEditSummary(editSummary);
// edit talk page
writeContent(userTalk);
} catch (EditConflictException e) {
editConflicted = true;
log("Edit conflict caught");
} catch (DYKResetException e) {
throw e;
} catch (Exception e) {
logError("Error occurred while distributing user credits");
}
} while (editConflicted);
}
}
/**
* Checks if a user exists or has been renamed
* If the user talk page redirects to another user talk, this method will return the target username
* Otherwise, if the username is not registered and not an IP address, null is returned
* @param username to check
* @return a valid username, or null if none
*/
private String validateUserTalkPage(String username) {
// example credits aren't valid
if (username.equals("Editor") || username.equals("Nominator") || username.isEmpty()) {
return null;
}
String userTalkPage = "User talk:" + username;
// check if the talk page redirects to another page (if the user's been renamed)
SimpleArticle talkPage = new SimpleArticle(readContent(userTalkPage));
String redirectTo = checkForPageRedirect(talkPage.getText());
if (redirectTo != null) {
int userTalkIndex = redirectTo.toLowerCase().indexOf("user talk:");
if (userTalkIndex != -1) {
userTalkPage = redirectTo.substring(userTalkIndex);
username = userTalkPage.substring(10);


time_next_update = time_next_update + timedelta(seconds=seconds_between_updates)
// support redirects to talk page archives
return time_next_update, time_next_update + timedelta(seconds=seconds_between_updates)
// for example User talk:Djembayz -> User talk:Djembayz/Archive July 2014
int slashIndex = username.indexOf("/");
if (slashIndex != -1) username = username.substring(0, slashIndex);
}
}
// check if the username is registered
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query&list=users&ususers=" +
MediaWiki.encode(username);
Document doc = fetchUsingSAXBuilder(apiURL);
Element userInfo = doc.getRootElement().getChild("query", ns).getChild("users", ns).getChild("user", ns);
if (userInfo.getAttribute("missing") == null && userInfo.getAttribute("invalid") == null
&& !username.contains("|")) {
return userTalkPage;
}
// check if the user made edits (for IP addresses)
String apiURL2 = BaseEnWikiAPIURL + "api.php?format=xml&action=query&list=usercontribs&uclimit=1&ucprop=ids&ucuser=" +
MediaWiki.encode(username);
Document doc2 = fetchUsingSAXBuilder(apiURL2);
Element userContribs = doc2.getRootElement().getChild("query").getChild("usercontribs");
if ((userContribs.getChildren().size() > 0) && !username.contains("|")) {
return userTalkPage;
}
// the username isn't registered or technically impossible
logError("The username '" + username + "' is invalid");
return null;
}
/**
* Finds the link to the admins userpage in {{DYKbotdo}}
* @param the {{DYKbotdo}} tag
* @return the admin's username
*/
private String findUserLink(String dykbotdo) {
try {
if (dykbotdo.contains("User:") || dykbotdo.contains("User talk:")) {
int userLinkIndex = Math.max(dykbotdo.indexOf("User:"), dykbotdo.indexOf("User talk:"));
return dykbotdo.substring(dykbotdo.indexOf(":", userLinkIndex) + 1,
dykbotdo.indexOf("|", userLinkIndex));
}
} catch (Exception e) {
return null;
}
return null;
}
/**
* Finds the DYK sound/video/image from hooks wikitext
* If a sound or video file (.ogg) is used without the proper template,
* the bot will assume it's a video; these exceptions should be checked manually
*/
private DYKFile findFile(String hooks) {
String hooksLowerCase = hooks.toLowerCase();
if (hooksLowerCase.contains("{{dyk listen")) { // sound file
int startIndex = hooks.indexOf("|", hooksLowerCase.indexOf("{{dyk listen")) + 1;
int fileEndIndex = hooks.indexOf("|", startIndex);
String filename = hooks.substring(startIndex, fileEndIndex);
return new DYKFile(filename, "sound");
} else if (hooksLowerCase.contains("{{tall image")) {
int startIndex = hooks.indexOf("|", hooksLowerCase.indexOf("{{tall image")) + 1;
int fileEndIndex = hooks.indexOf("|", startIndex);
String filename = hooks.substring(startIndex, fileEndIndex);
return new DYKFile(filename, "image");
} else if (hooksLowerCase.contains("{{main page image")) {
// test cases:
// {{main page image|image=Carrot soup.jpg|caption=A cream of carrot soup with bread|width=120x133}}
// {{main page image |image=Carrot soup.jpg|caption=A cream of carrot soup with bread|width=120x133}}
// {{main page image | image=Carrot soup.jpg|caption=A cream of carrot soup with bread|width=120x133}}
// {{main page image | image = Carrot soup.jpg |caption=A cream of carrot soup with bread|width=120x133}}
// {{main page image|image=image:Carrot soup.jpg|caption=A cream of carrot soup with bread|width=120x133}}
// {{main page image|File:Carrot soup.jpg}}
// {{main page image|Carrot soup.jpg}}
// {{main page image|Carrot soup.jpg|A cream of carrot soup with bread}}
int fileStartIndex = hooksLowerCase.indexOf("|", hooksLowerCase.indexOf("{{main page image")) + 1;
int fileEndIndex = Math.min(hooks.indexOf("|", fileStartIndex), hooks.indexOf("}}", fileStartIndex));
String filename = hooks.substring(fileStartIndex, fileEndIndex).trim();
int equalsIndex = filename.indexOf('=');
if (equalsIndex != -1) {
String paramNameLowerCase = filename.substring(0, equalsIndex).trim();
if (paramNameLowerCase.equals("image")) {
filename = filename.substring(equalsIndex + 1).trim();
}
}
int colonIndex = filename.indexOf(':');
if (colonIndex != -1) {
String prefixLowerCase = filename.substring(0, colonIndex).toLowerCase().trim();
if (prefixLowerCase.equals("image") || prefixLowerCase.equals("file")) {
filename = filename.substring(colonIndex + 1).trim();
}
}
return new DYKFile(filename, "image");
} else if (hooksLowerCase.contains("[[file:") ||
hooksLowerCase.contains("[[image:")) { // image file
int startIndex = Math.max(hooksLowerCase.lastIndexOf("[[file:") + 7,
hooksLowerCase.lastIndexOf("[[image:") + 8);
int midIndex = hooks.indexOf("|", startIndex);
int endIndex = startIndex;
for (int i=1; hooks.indexOf("]]", endIndex + 2) != -1; i++) {
endIndex = hooks.indexOf("]]", endIndex + 2);
if (hooks.substring(startIndex, endIndex).split("\\[\\[").length == i) {
break;
}
}
int rollIndex = hooks.lastIndexOf("|", endIndex);
while (hooks.lastIndexOf("[[", rollIndex) > (startIndex - 7)) {
rollIndex = hooks.lastIndexOf("|", rollIndex - 1);
}
String type = "image";
String filename = hooks.substring(startIndex, midIndex).trim();
if (filename.substring(filename.length() - 4).equals(".ogg")) {
// http://en.wikipedia.org/w/index.php?diff=next&oldid=273311345
type = "video";
logError("Check if [[:File:" + filename + "]] is a sound or video file");
}
return new DYKFile(filename, type);
}
logError("Can't find an image, sound, or video file");
return null;
}
/**
* Store information about tags on the file, like {{c-uploaded}}
* @param the DYKFile to check
*/
private void checkFileTags(DYKFile file) {
if (file == null) return;
SimpleArticle filePage = new SimpleArticle(readContent("File:" + file.getFilename()));
String fileText = filePage.getText();
file.setCuploaded(fileText.contains("{{c-uploaded}}") || fileText.contains("{{C-uploaded}}"));
int mCroppedIndex = Math.max(fileText.indexOf("{{m-cropped"), fileText.indexOf("{{M-cropped"));
if (mCroppedIndex != -1) {
String croppedFrom = fileText.substring(fileText.indexOf('|', mCroppedIndex) + 1,
fileText.indexOf("}}", mCroppedIndex)).trim();
if (croppedFrom.toLowerCase().startsWith("file:")) {
croppedFrom = croppedFrom.substring(5).trim();
}
if (croppedFrom.toLowerCase().startsWith("image:")) {
croppedFrom = croppedFrom.substring(6).trim();
}
file.setCroppedFrom(croppedFrom);
}
}
/**
* Checks if the file specified is protected either locally or on Commons
* The bot will detect both cascade-protection and normal protection
* If you pass in a salted file (for example Capture.JPG), the function will return false
* @param fileName without "File:" in front, for example "Andrey Alexandrovich Popov.jpg"
* @param time when the image will leave the Main Page
* @return true if the file is fully protected, false otherwise (but see above note on salting)
*/
@SuppressWarnings("unchecked")
private boolean checkIfProtected(String fileName, GregorianCalendar nextNextUpdateTime,
boolean logging) {
String imageInfoURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query" +
"&prop=imageinfo&iilimit=1&iiprop=&titles=File:" + MediaWiki.encode(fileName);
Document imageInfo = fetchUsingSAXBuilder(imageInfoURL);
Element pageInfo = imageInfo.getRootElement().getChild("query", ns).getChild("pages", ns).getChild("page", ns);
String rootAPIurl;
if (pageInfo.getAttributeValue("imagerepository").equals("shared")) { // at Commons
rootAPIurl = BaseCommonsAPIURL + "api.php";
} else if (pageInfo.getAttributeValue("imagerepository").equals("local")) { // at Enwiki
rootAPIurl = BaseEnWikiAPIURL + "api.php";
} else { // the file doesn't exist; this should never happen
if (logging) logError("[[:File:" + fileName + "]] does not exist");
return false;
}
String protectionInfoURL = rootAPIurl + "?format=xml&action=query&prop=info" +
"&inprop=protection&titles=File:" + MediaWiki.encode(fileName);
Document protectionInfo = fetchUsingSAXBuilder(protectionInfoURL);
List<Element> protectionNodes = protectionInfo.getRootElement().getChild("query", ns).getChild("pages", ns)
.getChild("page", ns).getChild("protection", ns).getChildren("pr", ns);
if (protectionNodes.isEmpty()) { // isn't protected or (checked above) doesn't exist
String logMessage = "[[:File:" + fileName + "]] is not protected";
if (rootAPIurl.contains("commons")) {
logMessage += "; either 1) Upload the file to en.wiki, or 2) protect the file at Commons";
}
if (logging) logError(logMessage);
return false;
}
boolean notFullyProtected = false;
boolean protectionExpireEarly = false;
for (Element protectionNode : protectionNodes) {
if (!(protectionNode.getAttributeValue("type").equals("edit") &&
protectionNode.getAttributeValue("level").equals("sysop"))) {
notFullyProtected = true;
continue;
}
String protectionExpiryTime = protectionNode.getAttributeValue("expiry");
if (protectionExpiryTime.equals("infinity") || nextNextUpdateTime == null) {
return true;
}
try {
if (convertWikiTimestamp(protectionExpiryTime).before(nextNextUpdateTime)) {
protectionExpireEarly = true;
continue;
} else {
return true; // protection doesn't expire early, so we're good
}
} catch (ParseException e) {} // impossible
}
if (protectionExpireEarly) {
if (logging) {
logError("The protection for [[:File:" + fileName + "]] " +
"will expire while or before it's on the Main Page");
}
return false;
}
if (notFullyProtected) {
if (logging) logError("[[:File:" + fileName + "]] is not fully protected");
return false;
}
return false; // unreachable code
}
/**
* Checks if the file should be deleted, then deletes it
* The file will be deleted if it's a cropped version made just for DYK
* Otherwise, the file won't be deleted if:
* 1. It doesn't exist at Commons and/or Enwiki under the same filename
* 2. It isn't tagged with {{c-uploaded}}
* 3. The first revision in the file's history is before the first upload
* @param the file to be deleted
*/
@SuppressWarnings("unchecked")
private boolean deleteFile(DYKFile file) {
if (file == null) return false;
String filename = file.getFilename();
try {
if (file.getCroppedFrom() == null) { // always delete if this is a cropped image
if (!file.getCuploaded()) {
// if it's not tagged with c-uploaded on enwiki, don't delete
return false;
}
// if it doesn't exist at Commons, don't delete
MediaWikiBot commonsBot = new MediaWikiBot(BaseCommonsAPIURL);
if (readContent(commonsBot, "File:" + filename).getText().isEmpty()) {
logError("[[:File:" + filename + "]] is tagged with c-uploaded but does not exist at Commons");
return false;
}
// figure out when the image was uploaded
int revs = 10;
String imageInfoURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query" +
"&iiprop=timestamp&prop=imageinfo&iilimit=" + revs + "&titles=File:" +
MediaWiki.encode(filename);
Document imageInfo = fetchUsingSAXBuilder(imageInfoURL);
Element pageInfo = imageInfo.getRootElement().getChild("query", ns).getChild("pages", ns).getChild("page", ns);
if (pageInfo.getAttributeValue("imagerepository").equals("shared")) {
return false; // no information on enwiki's copy
}
List<Element> timestamps = pageInfo.getChild("imageinfo", ns).getChildren("ii", ns);
if (timestamps.size() == revs) log("Fetching " + revs + "/" + revs + " revisions");
Calendar uploadTime = convertWikiTimestamp(timestamps.get
(timestamps.size() - 1).getAttributeValue("timestamp"));
// figure out the date of the first revision
String revInfoURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query" +
"&prop=revisions&rvlimit=1&rvdir=newer&rvprop=timestamp" +
"&titles=File:" + MediaWiki.encode(filename);
Document revisionInfo = fetchUsingSAXBuilder(revInfoURL);
Calendar firstRevTime = convertWikiTimestamp(revisionInfo.getRootElement()
.getChild("query", ns).getChild("pages", ns).getChild("page", ns).getChild("revisions", ns)
.getChild("rev", ns).getAttributeValue("timestamp"));
if (firstRevTime.before(uploadTime)) {
// if the first revision was before the upload, don't delete
return false;
}
}
// otherwise, delete
String deleteReason = "{{[[Template:c-uploaded|c-uploaded]]}} file off the " +
"[[T:DYK|DYK]] section of the Main Page";
deleteContent("File:" + filename, deleteReason);
return true;
} catch (DYKResetException e) {
throw e;
} catch (Exception e) {
logError("Error occurred while deleting [[:File:" + filename + "]]");
return false;
}
}
/**
* Unprotects a file if:
* 1. It exists on English Wikipedia and is fully protected
* 2. The string "Main Page" is in the reason for the most recent protection
*/
private void unprotectFile(DYKFile file) {
if (file == null) return;
String filename = file.getFilename();
SimpleArticle filePage = new SimpleArticle(readContent("File:" + filename));
if (filePage.getText().isEmpty()) {
return; // don't continue if the file isn't on enwiki
}
if (!checkIfProtected(filename, null, false)) {
return; // don't continue if the file isn't fully protected
}
String protectionLogURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query" +
"&list=logevents&letype=protect&leprop=parsedcomment" +
"&letitle=File:" + MediaWiki.encode(filename);
Document protectionLog = fetchUsingSAXBuilder(protectionLogURL);
Element protLogItem = protectionLog.getRootElement().getChild("query", ns)
.getChild("logevents", ns).getChild("item", ns);
if (protLogItem == null) {
return; // don't continue if the file wasn't manually protected
}
String protReason = protLogItem.getAttributeValue("parsedcomment");
if (!protReason.contains("Main Page")) {
return; // don't continue if the file wasn't protected for DYK
}
unprotectContent("File:" + filename, "File off the [[T:DYK|DYK]] section of the Main Page");
}


# Returns:
/**
# * Int of the next queue number, parsed from NEXT_UPDATE_QUEUE_LOC
* Checks if the file exists at Commons or English Wikipedia,
# * 0 if NEXT_UPDATE_QUEUE_LOC doesn't parse to an int
* then tags the file on English Wikipedia if it does exist
def _find_next_queue_number(self) -> int:
*/
page = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC)
private void tagFile(DYKFile file, Calendar time) {
num_next_queue = 0
if (file == null) return;
try:
String filename = file.getCroppedFrom(); // tag the original file if the image was cropped
num_next_queue = int(page.text)
if (filename == null) filename = file.getFilename();
except ValueError:
do {
pass
try {
return num_next_queue
SimpleArticle filePage = new SimpleArticle(readContent("File:" + filename));
MediaWikiBot commonsBot = new MediaWikiBot(BaseCommonsAPIURL);
if (!filePage.getText().isEmpty() ||
!readContent(commonsBot, "File:" + filename).getText().isEmpty()) {
if (filePage.getText().contains("{{DYKfile")) {
log("The file " + filename + " has already been tagged");
return;
}
String fileTag = "{{DYKfile|" +
new SimpleDateFormat("d MMMM'|'yyyy", BotLocale).format(time.getTime()) +
"|type=" + file.getType() + "}}"; //create DYKfile tag
String fileContent = filePage.getText();
if (fileContent.isEmpty()) filePage.setEditTimestamp(OverrideEditConflicts);
int indexOfFirstSection = fileContent.indexOf("==");
if (indexOfFirstSection == -1) indexOfFirstSection = fileContent.length();
fileContent = fileContent.substring(0, indexOfFirstSection).trim() +
"\n" + fileTag + "\n" + fileContent.substring(indexOfFirstSection).trim();
filePage.setText(fileContent);
filePage.setEditSummary("File appeared on [[WP:Did you know|DYK]] on " +
new SimpleDateFormat("d MMMM yyyy", BotLocale).format(time.getTime()));
writeContent(filePage);
} else {
logError("[[:File:" + filename + "]] does not exist at Commons or English Wikipedia");
}
return;
} catch (EditConflictException e) {
log("Edit conflict caught");
// will try again because of while(true)
} catch (DYKResetException e) {
throw e;
} catch (Exception e) {
logError("Error occurred while tagging [[:File:" + filename + "]]");
return;
}
} while (true);
}
/**
* Makes sure that each hook is on its own line
* @param queue wikitext
* @param index of <!--Hooks--> in the queue
* @param index of <!--HooksEnd--> in the queue
* @return queue wikitext with each hook on its own line
*/
private String checkIfEachHookOnNewLine(String queueText, int indexOfHooksinQueue,
int indexOfHooksEndinQueue) {
for (int hookIndex = queueText.indexOf("{{*mp}}", indexOfHooksinQueue);
hookIndex != -1 && hookIndex < indexOfHooksEndinQueue;
hookIndex = queueText.indexOf("{{*mp}}", hookIndex + 7)) {
if (hookIndex != 0 && queueText.charAt(hookIndex - 1) != '\n') {
log("Multiple hooks detected on one line, fixing");
queueText = queueText.substring(0, hookIndex) + "\n" + queueText.substring(hookIndex);
indexOfHooksEndinQueue++;
}
}
return queueText;
}
/**
* Checks if the DYK has been reset manually.
* If so, bot attempts to reset itself by throwing an exception.
* The exception propagates up to the run() method.
*/
protected void checkIfReset() {
if (findNextQueueNumber() != nextQueue) {
log("DYK next queue number has been changed manually, attempting reset");
throw new DYKResetException();
}
}
/**
* Finds the first line after the template cluster on an article talk page
* Used to add a new DYK talk template after other templates and before conversations
*/
private int findLastTemplateIndex(String text) {
String[] lines = text.split("\n");
int openingBrackets = 0;
int closingBrackets = 0;
int returnIndex = 0;
for (String line : lines) {
int openIndex = 0;
while (line.indexOf("{{", openIndex) != -1) {
openingBrackets++;
openIndex = line.indexOf("{{", openIndex) + 2;
}
int closeIndex = 0;
while (line.indexOf("}}", closeIndex) != -1) {
closingBrackets++;
closeIndex = line.indexOf("}}", closeIndex) + 2;
}
if (line.trim().length() >= 2 && openingBrackets == closingBrackets
&& ((openIndex == 0 && closeIndex == 0) ||
line.matches("^[\\s]*\\{\\{[\\s]*[Tt]alk[\\s]*\\:.*"))) {
return returnIndex;
}
returnIndex += line.length() + 1;
}
return text.length();
}
/**
* Logs a message into the error log
* The error log is then posted to ErrorOutputLoc at the end of every run by postErrors()
*/
private void logError(String message) {
errorLog.append(message).append("\n\n");
System.out.println("Error: " + message);
}
/**
* At the end of each run, errors will be posted to the page specified in ErrorOutputLoc
* Also, the page will be cleared after a clean run
*/
private void postErrors() {
SimpleArticle errorsPage = new SimpleArticle(readContent(ErrorOutputLoc));
String errors = errorLog.toString().trim();
errorLog = new StringBuilder(); // clear local buffer
if (errorsPage.getText().trim().equals(errors)) {
// if the errors are already on the page, don't post again
return;
}
errorsPage.setText(errors);
if (errors.isEmpty()) {
errorsPage.setEditSummary("No errors; clear");
} else {
errorsPage.setEditSummary("Posting latest errors");
}
try {
errorsPage.setEditTimestamp(OverrideEditConflicts);
} catch (ParseException e) {} // impossible
writeContent(errorsPage);
}
/**
* Replaces multiple spaces with a single space in the given string
* @param the text with unnecessary spaces
* @return text without unnecessary spaces
*/
private String removeUnnecessarySpaces(String text) {
String[] words = text.split(" ");
StringBuilder textWithoutExtraSpaces = new StringBuilder();
for (String word : words) {
if (!word.isEmpty()) textWithoutExtraSpaces.append(word).append(" ");
}
if (textWithoutExtraSpaces.length() > 0) {
textWithoutExtraSpaces.deleteCharAt(textWithoutExtraSpaces.length() - 1);
}
return textWithoutExtraSpaces.toString();
}
/**
* Checks if a local text file is set to "on"
* @return true if it's on, false otherwise
*/
protected boolean isOn() {
do {
try {
BufferedReader reader = new BufferedReader(new FileReader("UpdateBotSwitch.txt"));
String status = reader.readLine();
reader.close();
return status.equalsIgnoreCase("on");
} catch (Exception e) {
log("File read exception caught");
sleep(5000);
}
} while (true);
}
public static void main(String[] args) {
DYKUpdateBot.initializeLoggers();
DYKUpdateBot updateBot = new DYKUpdateBot(TimeBetweenEdits, NumExceptionsBeforeAttemptedReset,
NextUpdateQueueLoc, UserInfo.getUser(), UserInfo.getPassword());
synchronized (updateBot) {
updateBot.run();
}
}
class DYKCredit {
String articleTitle;
String userTalkPage;
String hook;
boolean errorInArticleTitle;
boolean dykMake;
String nompage;
DYKCredit(String articleTitle, String userTalkPage, String hook, boolean errorInArticleTitle,
boolean dykMake, String nompage) {
this.articleTitle = articleTitle;
this.userTalkPage = userTalkPage;
this.hook = hook;
this.errorInArticleTitle = errorInArticleTitle;
this.dykMake = dykMake;
if (nompage != null) this.nompage = "Template:Did you know nominations/" + nompage;
}
}
}
</source>


def validate_before_update(self, results_val, time_set_leaving):
# figure out which queue to update from
results_val.num_queue = self._find_next_queue_number()
if results_val.num_queue == 0:
self._log_error(results_val.rgstr_errors, 'Could not parse [[{0}]]; check if it\'s a number 1-{1}'
.format(DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC, DYKUpdateBot.NUM_QUEUES))
return results_val


# get the wikitext of the queue
<source lang="java">
results_val.page_queue = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.QUEUE_ROOT_LOC + str(results_val.num_queue))
import java.lang.management.ManagementFactory;
str_queue = results_val.page_queue.text
import java.net.MalformedURLException;
str_link_to_queue = DYKUpdateBotUtils.wikilink_to_queue(results_val.num_queue, True)
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Locale;
import java.util.TimeZone;


# make sure all curly braces are matched
import net.sourceforge.jwbf.actions.mw.MediaWiki;
if str_queue.count('{{') != str_queue.count('}}'):
import net.sourceforge.jwbf.actions.mw.editing.GetRevision;
self._log_error(results_val.rgstr_errors, 'Unmatched left <nowiki>("{{") and right ("}}")</nowiki> curly braces in ' + str_link_to_queue)
import net.sourceforge.jwbf.actions.mw.util.ActionException;
return results_val
import net.sourceforge.jwbf.bots.MediaWikiBot;
import net.sourceforge.jwbf.bots.util.LoginData;
import net.sourceforge.jwbf.contentRep.mw.Article;
import net.sourceforge.jwbf.contentRep.mw.SimpleArticle;


# make sure the queue has {{DYKbotdo}}
import org.apache.log4j.BasicConfigurator;
has_dykbotdo, results_val.str_dykbotdo_signature = DYKUpdateBotUtils.parse_dykbotdo(str_queue)
import org.apache.log4j.Level;
if not has_dykbotdo:
import org.apache.log4j.Logger;
self._post_almost_late_message_to_WTDYK(time_set_leaving, results_val.num_queue)
import org.jdom.Document;
self._log_error(results_val.rgstr_errors, str_link_to_queue + ' is not tagged with {{tl|DYKbotdo}}')
import org.jdom.Element;
return results_val
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;


# make sure the queue has <!--Hooks--> and <!--HooksEnd--> and find hooks
public abstract class EnWikiBot {
results_val.hooks_incoming = DYKUpdateBotUtils.extract_hooks(str_queue)
static {
if results_val.hooks_incoming is None:
TimeZone.setDefault(TimeZone.getTimeZone("Coordinated Universal Time"));
self._log_error(results_val.rgstr_errors, str_link_to_queue + ' is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>')
}
return results_val
public static final Namespace ns = Namespace.NO_NAMESPACE;
protected static final Locale BotLocale = Locale.forLanguageTag("en-US");
protected static final SimpleDateFormat APITimestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", BotLocale);
protected static final String OverrideEditConflicts = "9999-12-31T23:59:59Z";
protected static final String BaseEnWikiAPIURL = "https://en.wikipedia.org/w/";
final String purgeLoc;
protected final int timeBetweenEdits;
protected final int numExceptionsBeforeAttemptedReset;
protected final String userName;
private final String password;
protected long lastRevId;
protected long lastDelId;
private MediaWikiBot enBot;
abstract protected boolean isOn();
abstract protected void checkIfReset();
public EnWikiBot(int timeBetweenEdits, int numExceptionsBeforeAttemptedReset,
String purgeLoc, String userName, String password) {
this.timeBetweenEdits = timeBetweenEdits;
this.numExceptionsBeforeAttemptedReset = numExceptionsBeforeAttemptedReset;
this.purgeLoc = purgeLoc;
this.userName = userName;
this.password = password;
String processInfo = ManagementFactory.getRuntimeMXBean().getName();
log("PID: " + processInfo.substring(0, processInfo.indexOf('@')));
log(Locale.getDefault().toLanguageTag());
try {
enBot = new MediaWikiBot(BaseEnWikiAPIURL);
} catch (MalformedURLException e) {
e.printStackTrace();
}
lastRevId = getLastRevId();
}
/**
* Gets the revision ID of the last edit made by the bot
* This function is used to make sure that the bot really has edited when it thinks it has
* This function is affected by server lag
* @return last revision ID
*/
protected long getLastRevId() {
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query&list=usercontribs" +
"&uclimit=1&ucprop=ids&ucuser=" + userName;
Document doc = fetchUsingSAXBuilder(apiURL);
Element editInfo = doc.getRootElement().getChild("query", ns).getChild("usercontribs", ns).getChild("item", ns);
return Long.parseLong(editInfo.getAttributeValue("revid"));
}
/**
* Gets the revision ID of the last edit made by the bot at the given page
* This function is used to make sure that the bot really has edited when it thinks it has
* This function is not affected by server lag
* @return last revision ID
*/
protected long getLastRevId(String title) {
title = MediaWiki.encode(title);
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query&prop=revisions" +
"&rvprop=ids&rvlimit=1&rvuser=" + userName + "&titles=" + title;
Document doc = fetchUsingSAXBuilder(apiURL);
Element pageInfo = doc.getRootElement().getChild("query", ns).getChild("pages", ns).getChild("page", ns);
if (pageInfo.getChildren().size() == 0) return 0; // the page has never been edited by the bot
Element editInfo = pageInfo.getChild("revisions", ns).getChild("rev", ns);
return Long.parseLong(editInfo.getAttributeValue("revid"));
}
/**
* Finds the redirects to the given page
* @param article title
* @param limit of the number of redirects to fetch
* @return list of pages that redirect to the given page
*/
@SuppressWarnings("unchecked")
protected LinkedList<String> findRedirectsToPage(String title, int limit) {
LinkedList<String> redirects = new LinkedList<String>();
String getRedirectsURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query" +
"&list=backlinks&blfilterredir=redirects&blnamespace=0&bllimit=" + limit + "&bltitle=" +
MediaWiki.encode(title);
Document redirectsInfo = fetchUsingSAXBuilder(getRedirectsURL);
Element backlinks = redirectsInfo.getRootElement().getChild("query", ns).getChild("backlinks", ns);
Iterator<Element> redirectIter = backlinks.getDescendants();
while (redirectIter.hasNext()) {
redirects.add(redirectIter.next().getAttributeValue("title"));
}
return redirects;
}
/**
* Checks if the current page is a redirect by parsing the page
* @param text on the page
* @return null if the page isn't a redirect, name of the "redirect to" page if it is
*/
protected String checkForPageRedirect(String pageText)
{
String redirectTo = null;
if (pageText.toLowerCase().trim().startsWith("#redirect")) {
int linkStartIndex = pageText.indexOf("[[") + 2;
int linkPipeIndex = pageText.indexOf("|", linkStartIndex);
int linkEndIndex = pageText.indexOf("]]", linkStartIndex);
if (linkStartIndex < linkPipeIndex && linkPipeIndex < linkEndIndex) {
linkEndIndex = linkPipeIndex;
}
redirectTo = pageText.substring(linkStartIndex, linkEndIndex);
if (redirectTo.indexOf("#") != -1) {
redirectTo = redirectTo.substring(0, redirectTo.indexOf("#"));
}
}
return redirectTo;
}
/**
* Converts a wiki timestamp (like "2009-01-17T23:45:32Z") to a Java Calendar
* @param wikiTimestamp in wiki format
* @return Calendar set to the specified time in UTC
* @throws ParseException
*/
public Calendar convertWikiTimestamp(String wikiTimestamp) throws ParseException {
GregorianCalendar time = new GregorianCalendar(BotLocale);
time.setTime(APITimestampFormat.parse(wikiTimestamp));
return time;
}


# make sure the image/file is protected
/**
results_val.file_incoming = DYKUpdateBotUtils.find_file(results_val.hooks_incoming)
* Fetches the URL using SAXBuilder
if results_val.file_incoming:
* If an exception is thrown, the bot will wait at least 5 seconds before attempting again
str_protection_error = DYKUpdateBotUtils.check_if_protected(results_val.file_incoming, time_set_leaving)
* @param url you want to fetch (should be formatted in XML)
if str_protection_error:
* @return the XML tree in the form of a Document
self._log_error(results_val.rgstr_errors, str_protection_error)
*/
else:
protected Document fetchUsingSAXBuilder(String url) {
self._log_warning(results_val.rgstr_warnings, 'Can\'t find the image / file for incoming DYK set\n')
int exceptionCounter = 0;
do {
try {
return new SAXBuilder().build(url);
} catch (Exception e) {
exceptionCounter++;
log("SAXbuilder exception caught, #" + exceptionCounter);
if (exceptionCounter > numExceptionsBeforeAttemptedReset) {
checkIfReset();
}
// wait at least 5 seconds and at most an hour before attempting another read
sleep(Math.min(5000 + (1000 * exceptionCounter), 3600000));
}
} while (true);
}
/**
* Checks if the bot is logged in, and logs in if not
* There's no easy way to tell if the bot's logged in, so the bot null edits its userpage
* and checks if its username shows up in the correct variables in the returned HTML
* Wikipedia automatically logs out a user one month after login
*/
protected void checkifLoggedIn() {
SimpleArticle userpage = readContent("User:" + userName);
userpage.setEditSummary("");
try {
String userpageHTML = enBot.performAction(new PostModifyContentWithEditConflicts(userpage));
if (!userpageHTML.contains("\"wgUserName\":\"" + userName + "\"")) {
// the bot got logged off somehow
log("Logging in");
login();
}
} catch (Exception e) {
log("Caught exception during null edit on login check");
}
}
/**
* Logs in to the wiki
*/
protected void login() {
int exceptionCounter = 0;
LoginData login = new LoginData();
do {
try {
enBot.performAction(new PostLoginNew(userName, password, login));
return;
} catch (Exception e) {
exceptionCounter++;
log("Exception caught while logging in");
if (exceptionCounter > numExceptionsBeforeAttemptedReset) {
checkIfReset();
}
// wait at least 5 seconds and at most an hour before attempting another login
sleep(Math.min(5000 + (1000 * exceptionCounter), 3600000));
}
} while (true);
}
/**
* Purges the given page
*/
protected String purge(String page, boolean sleep) {
int loopCounter = 0;
do {
try {
String xmlReply = enBot.performAction(new PostPurge(page));
if (xmlReply == null) throw new ActionException();
if (sleep) sleep(timeBetweenEdits * 1000);
return xmlReply;
} catch (Exception e) {
loopCounter++;
log("Purge exception caught, #" + loopCounter);
if (loopCounter > numExceptionsBeforeAttemptedReset) {
checkIfReset();
}
// wait at least 5 seconds and at most an hour before attempting another purge
sleep(Math.min(5000 + (1000 * loopCounter), 3600000));
}
} while (true);
}
/**
* See documentation for readContent(MediaWikiBot, String) below
*/
protected Article readContent(String pageName) {
return readContent(enBot, pageName);
}
/**
* Reads a Wikipedia page
* If an exception is thrown (most likely because of server connection issues), the bot will wait
* at least 5 seconds until attempting again
* The time between attempts increases by 1 second each attempt, up to a maximum of 1 hour
* @param bot that specifies which wiki you're reading from
* @param page to read
* @return the article
*/
protected Article readContent(MediaWikiBot bot, String pageName) {
int loopCounter = 0;
do {
try {
return bot.readContent(pageName, GetRevision.CONTENT | GetRevision.TIMESTAMP);
} catch (Exception e) {
loopCounter++;
log("Read exception caught, #" + loopCounter);
if (loopCounter > numExceptionsBeforeAttemptedReset) {
checkIfReset();
}
// wait at least 5 seconds and at most an hour before attempting another read
sleep(Math.min(5000 + (1000 * loopCounter), 3600000));
}
} while (true);
}
/**
* See documentation for writeContent(MediaWikiBot, SimpleArticle) below
*/
protected void writeContent(SimpleArticle page) {
writeContent(enBot, page);
}
/**
* Edits a Wikipedia page
* If an exception is thrown (most likely because of server connection issues), the bot will wait
* at least 5 seconds until attempting again
* The time between attempts increases by 1 second each attempt, up to a maximum of 1 hour
* @param logged-in bot
* @param page you want to edit
*/
protected void writeContent(MediaWikiBot bot, SimpleArticle page) {
int loopCounter = 0;
String normalizedTitle = normalizeTitle(page.getLabel());
if (!normalizedTitle.equals(page.getLabel())) {
log("Title normalized from " + page.getLabel() + " to " + normalizedTitle);
page.setLabel(normalizedTitle);
}
do {
try {
bot.performAction(new PostModifyContentWithEditConflicts(page));
log("Editing " + page.getLabel());
sleep(timeBetweenEdits * 1000);
long latestRevId = getLastRevId(page.getLabel());
if (latestRevId <= lastRevId) { // the edit didn't go through
log("Edit didn't process correctly, attempting again");
throw new ActionException();
} else {
lastRevId = latestRevId;
}
return;
} catch (EditConflictException e) {
if (loopCounter > 0) {
log("Newer page available, but skipping to avoid " +
"double-editing (check for edit conflicts)");
lastRevId = getLastRevId(page.getLabel());
return;
} else {
throw e;
}
} catch (Exception e) {
// wait at least 5 seconds and at most an hour
sleep(Math.min(5000 + (1000 * loopCounter), 3600000));
long latestRevId = getLastRevId(page.getLabel());
if (latestRevId > lastRevId) { // the edit did go through
log("Edit processed correctly, continuing");
lastRevId = latestRevId;
sleep(timeBetweenEdits * 1000);
return;
} // else
loopCounter++;
log("Write exception caught, #" + loopCounter);
if (loopCounter > numExceptionsBeforeAttemptedReset) {
checkIfReset();
}
checkifLoggedIn(); //make sure we're logged in
}
} while (true);
}


# fetch T:DYK
/**
results_val.page_TDYK = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.TDYK_LOC)
* Deletes a Wikipedia page
str_tdyk = results_val.page_TDYK.text
* If an exception is thrown (most likely because of server connection issues), the bot will wait
* at least 5 seconds before attempting again
* The time between attempts increases by 1 second each attempt, up to a maximum of 10 attempts
* @param page to delete
* @param reason for deletion
*/
protected void deleteContent(String pageName, String reason) {
boolean errorThrown = false;
int loopCounter = 0;
do {
try {
enBot.performAction(new PostDeleteWithReason(pageName,
reason, enBot.getSiteinfo(), enBot.getUserinfo()));
log("Deleting " + pageName);
errorThrown = false;
sleep(timeBetweenEdits * 1000);
long latestDelId = getLastDelId();
if (latestDelId <= lastDelId) { // the delete didn't go through
log("Delete didn't process correctly, attempting again");
throw new ActionException();
} else {
lastDelId = latestDelId;
}
return;
} catch (Exception e) {
sleep(timeBetweenEdits * 1000);
long latestDelId = getLastDelId();
if (latestDelId > lastDelId) { // the delete did go through
log("Delete processed correctly, continuing");
lastDelId = latestDelId;
errorThrown = false;
return;
} // else
errorThrown = true;
loopCounter++;
log("Delete exception caught, #" + loopCounter);
checkifLoggedIn(); //make sure we're logged in
// wait at least 5 seconds and at most an hour before attempting another delete
sleep(Math.min(5000 + (1000 * loopCounter), 3600000));
}
} while (errorThrown && loopCounter < 10);
}


# make sure T:DYK has <!--Hooks--> and <!--HooksEnd--> and find hooks
/**
results_val.hooks_outgoing = DYKUpdateBotUtils.extract_hooks(str_tdyk)
* Unprotects a Wikipedia page
if results_val.hooks_outgoing is None:
* If an exception is thrown (most likely because of server connection issues), the bot will wait
self._log_error(results_val.rgstr_errors, '[[' + DYKUpdateBot.TDYK_LOC + ']] is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>')
* at least 5 seconds before attempting again
return results_val
* The time between attempts increases by 1 second each attempt, up to a maximum of 10 attempts
* @param page to unprotect
* @param reason for unprotection
*/
protected void unprotectContent(String pageName, String reason) {
boolean errorThrown = false;
int loopCounter = 0;
do {
try {
enBot.performAction(new PostUnprotectWithReason(pageName, reason));
log("Unprotecting " + pageName);
errorThrown = false;
sleep(timeBetweenEdits * 1000);
return;
} catch (Exception e) {
errorThrown = true;
loopCounter++;
log("Unprotect exception caught, #" + loopCounter);
checkifLoggedIn(); //make sure we're logged in
// wait at least 5 seconds and at most an hour before attempting another delete
sleep(Math.min(5000 + (1000 * loopCounter), 3600000));
}
} while (errorThrown && loopCounter < 10);
}
/**
* Gets the log ID of the last delete action by the bot
* This function is used to make sure that the bot really has deleted when it thinks it has
* @return last deletion log ID
*/
protected long getLastDelId() {
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query&list=logevents" +
"&letype=delete&leprop=ids&lelimit=1&leuser=" + userName;
Document doc = fetchUsingSAXBuilder(apiURL);
Element itemInfo = doc.getRootElement().getChild("query", ns).getChild("logevents", ns).getChild("item", ns);
return Long.parseLong(itemInfo.getAttributeValue("logid"));
}
/**
* Normalizes a page title so Mediawiki will like it
* @param title to be normalized, e.g. "1922&ndash;23 Nelson F.C. season"
* @return normalized title, e.g. "1922–23 Nelson F.C. season"
*/
protected String normalizeTitle(String pageName) {
pageName = pageName.replaceAll("&amp;", "&");
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query&titles=" +
MediaWiki.encode(pageName);
Document doc = fetchUsingSAXBuilder(apiURL);
Element normalized = doc.getRootElement().getChild("query", ns).getChild("normalized", ns);
if (normalized == null) return pageName;
return normalized.getChild("n", ns).getAttributeValue("to");
}


return results_val
/**
* Expands templates in the given wikitext
* @param the wikitext to be expanded
* @return expanded wikitext
*/
protected String expandTemplates(String wikitext) {
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=expandtemplates&text=" +
MediaWiki.encode(wikitext);
Document doc = fetchUsingSAXBuilder(apiURL);
return doc.getRootElement().getChildText("expandtemplates", ns);
}
/**
* Pause for the time given
* @param time to wait, in milliseconds
*/
protected void sleep(long milliseconds) {
do {
try {
this.wait(milliseconds);
return;
} catch (InterruptedException e1) {
log ("Interrupted exception caught");
}
} while (true);
}
protected void log(String message) {
System.out.println(message);
}
/**
* Initializes the various loggers that the JWBF uses
*/
protected static void initializeLoggers() {
BasicConfigurator.configure();
Logger.getLogger("org.apache.commons.httpclient").setLevel(Level.FATAL);
Logger.getLogger("httpclient.wire").setLevel(Level.FATAL);
Logger.getLogger("net.sourceforge.jwbf").setLevel(Level.FATAL);
Logger.getLogger(PostModifyContentWithEditConflicts.class).setLevel(Level.FATAL);
}
}
</source>


def update_dyk(self, time_update, results) -> None:
if results.rgstr_errors:
return
str_link_to_queue = DYKUpdateBotUtils.wikilink_to_queue(results.num_queue, False)


# replace old hooks with new hooks
<source lang="java">
results.page_TDYK.text = results.page_TDYK.text.replace(results.hooks_outgoing, results.hooks_incoming)
import java.util.Date;
self._edit(results.page_TDYK, 'Bot automatically updating DYK template with hooks copied from ' + str_link_to_queue)


# purge the Main Page
public class DYKFile {
pywikibot.Page(pywikibot.Site(), 'Main Page').purge()
private final String filename;
private final String type;
private Date dykDate;
private final String rolloverText;
private boolean cuploaded;
private String croppedFrom;
// for backwards-compatibility with other packages
public DYKFile(String filename, String type, String rolloverText) {
this.filename = filename;
this.type = type;
this.rolloverText = rolloverText;
this.cuploaded = false;
}
public DYKFile(String filename, String type) {
this.filename = filename;
this.type = type;
this.rolloverText = null;
this.cuploaded = false;
}
public String getFilename() {
return filename;
}
public String getType() {
return type;
}
public void setDYKDate(Date dykDate) {
this.dykDate = dykDate;
}
public Date getDYKDate() {
return dykDate;
}
public void setCuploaded(boolean cuploaded) {
this.cuploaded = cuploaded;
}
public boolean getCuploaded() {
return cuploaded;
}
public void setCroppedFrom(String filename) {
croppedFrom = filename;
}
public String getCroppedFrom() {
return croppedFrom;
}
public String toStatsString() {
if (type.equals("sound")) {
return "{{DYK Listen|" + filename + "|" + rolloverText + "}}";
} else if (type.equals("video")) {
return "{{DYK Watch|" + filename + "|" + rolloverText + "}}";
} else {
return "[[File:" + filename + "|100x100px|" + rolloverText + "]]";
}
}
public String toString() {
return null; // unused code
}
}
</source>


# set last update time
time_update = time_update.replace(second=0, microsecond=0)
num_minutes_drift = self._calculate_drift(time_update, results.timedelta_between_updates)
time_update_with_drift = time_update + timedelta(minutes=num_minutes_drift)
page_last_update_time = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.LAST_UPDATE_TIME_LOC)
page_last_update_time.text = time_update_with_drift.isoformat()
self._edit(page_last_update_time, 'Resetting the clock' + (', with drift' if num_minutes_drift != 0 else ''))


# archive outgoing hooks
<source lang="java">
page_archive = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.ARCHIVE_LOC)
import net.sourceforge.jwbf.actions.Post;
page_archive.text = DYKUpdateBotUtils.archive(page_archive.text, time_update, results.hooks_outgoing)
import net.sourceforge.jwbf.actions.mw.HttpAction;
self._edit(page_archive, 'Archiving latest set')
import net.sourceforge.jwbf.actions.mw.util.MWAction;
import net.sourceforge.jwbf.actions.mw.util.ProcessException;


# credits - article talk, user talk
public class PostPurge extends MWAction {
rgcredits = self._parse_and_populate_credits(results.page_queue, results.hooks_incoming, results.file_incoming, results.rgstr_warnings)
private final Post msg;
self._tag_articles(rgcredits, time_update)
self._give_user_credits(rgcredits, results.str_dykbotdo_signature)
public PostPurge(final String title) {
super();
Post pm = new Post("/api.php?action=purge&format=xml");
pm.addParam("titles", title);
msg = pm;
}
public String processAllReturningText(final String s) throws ProcessException {
return s;
}
public HttpAction getNextMessage() {
return msg;
}


# clear queue
}
results.page_queue.text = '{{User:DYKUpdateBot/REMOVE THIS LINE}}'
</source>
self._edit(results.page_queue, 'Update is done, removing the hooks')


# update next queue number
num_next_queue = (results.num_queue % DYKUpdateBot.NUM_QUEUES) + 1
page_next_queue_num = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC)
page_next_queue_num.text = str(num_next_queue)
self._edit(page_next_queue_num, 'Next queue is ' + DYKUpdateBotUtils.wikilink_to_queue(num_next_queue, False))


# tag outgoing file
<source lang="java">
self._tag_outgoing_file(results.hooks_outgoing, time_update)
public class DYKResetException extends RuntimeException {
private static final long serialVersionUID = 6465485908664532508L;
}
</source>


def _post_almost_late_message_to_WTDYK(self, time_set_leaving, num_next_queue) -> None:
str_timestamp = time_set_leaving.isoformat()
page_wtdyk = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.WTDYK_LOC)
if str_timestamp in page_wtdyk.text:
return # bot already posted an "almost late" message for this update, don't post again


with open(str(pathlib.Path(__file__).parent / 'almostLate.txt'), 'r', encoding='utf-8') as f:
<source lang="java">
str_almost_late = f.read()
public class EditConflictException extends RuntimeException {
private static final long serialVersionUID = 7595756569739191727L;
}
</source>


str_almost_late = str_almost_late.replace('queueNum', str(num_next_queue))
str_almost_late = str_almost_late.replace('hoursLeft', 'two hours')
str_almost_late = str_almost_late.replace('uniqueSetIdentifier', str_timestamp)


self._append_and_edit(DYKUpdateBot.WTDYK_LOC, str_almost_late, 'DYK is almost late')
<source lang="java">
import org.apache.log4j.Logger;


def _calculate_drift(self, time_update, timedelta_between_updates) -> int:
import net.sourceforge.jwbf.actions.Post;
num_max_advance_minutes = 0
import net.sourceforge.jwbf.actions.mw.HttpAction;
num_max_delay_minutes = 0
import net.sourceforge.jwbf.actions.mw.MediaWiki;
page_drift = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.DRIFT_LOC)
import net.sourceforge.jwbf.actions.mw.editing.PostDelete;
for str_line in page_drift.text.split('\n'):
import net.sourceforge.jwbf.actions.mw.util.ProcessException;
try:
import net.sourceforge.jwbf.contentRep.mw.Siteinfo;
num_minutes_parsed = int(str_line[str_line.find(':') + 1:])
import net.sourceforge.jwbf.contentRep.mw.Userinfo;
if 'advance' in str_line:
num_max_advance_minutes = num_minutes_parsed
elif 'delay' in str_line:
num_max_delay_minutes = num_minutes_parsed
except:
DYKUpdateBotUtils.log('Couldn\'t parse drift')
return 0
return DYKUpdateBotUtils.calculate_drift_core(time_update,
timedelta_between_updates,
num_max_advance_minutes,
num_max_delay_minutes)


def _parse_and_populate_credits(self, page_queue, hooks_incoming, file_incoming, rgstr_warnings) -> []:
public class PostDeleteWithReason extends PostDelete {
rgcredits = DYKUpdateBotUtils.parse_credits(page_queue.text)
protected static final Logger LOG = Logger.getLogger(PostDelete.class);
fn_log_warning = partial(self._log_warning, self, rgstr_warnings)
protected final String reason;
DYKUpdateBotUtils.validate_credits_articles(rgcredits, fn_log_warning)
protected final String title;
DYKUpdateBotUtils.validate_credits_users(rgcredits, fn_log_warning)
DYKUpdateBotUtils.populate_hooks_and_file(rgcredits, hooks_incoming, file_incoming.title(with_ns=False))
for credit in rgcredits:
if credit.str_hook is None:
self._log_warning(rgstr_warnings, 'Couldn\'t find hook for [[{{0}}]], was the hook pulled or moved to a different set?'.format(credit.str_article))
return rgcredits


def _tag_articles(self, rgcredits, time_update) -> None:
set_tagged = set()
for credit in rgcredits:
if credit.str_article in set_tagged:
continue


str_edit_summary = None
public PostDeleteWithReason(String title, String reason, Siteinfo si, Userinfo ui)
page_talk = pywikibot.Page(pywikibot.Site(), 'Talk:' + credit.str_article)
throws ProcessException {
page_talk.text, str_edit_summary = DYKUpdateBotUtils.tag_article_history(page_talk.text, credit, time_update)
super(title, si, ui);
if not str_edit_summary:
this.reason = reason;
str_dyktalk_tag, str_edit_summary = DYKUpdateBotUtils.build_dyktalk_tag(credit, time_update)
this.title = title;
page_talk.text = DYKUpdateBotUtils.add_template_to_talk(page_talk.text, str_dyktalk_tag)
}
self._edit(page_talk, str_edit_summary)
/**
* This method is copied from PostDelete, with the reason added into the URL
*/
@Override
protected HttpAction getSecondRequest() {
HttpAction msg = null;
if (getToken() == null || getToken().length() == 0) {
throw new IllegalArgumentException(
"The argument 'token' must not be \""
+ String.valueOf(getToken()) + "\"");
}
if (LOG.isTraceEnabled()) {
LOG.trace("enter PostDelete.generateDeleteRequest(String)");
}


set_tagged.add(credit.str_article)
String uS = "/api.php" + "?action=delete" + "&title=" + MediaWiki.encode(title) +
"&token=" + MediaWiki.encode(getToken()) +
"&reason=" + MediaWiki.encode(reason) + "&format=xml";
if (LOG.isDebugEnabled()) {
LOG.debug("delete url: \"" + uS + "\"");
}
Post pm = new Post(uS);
msg = pm;


def _give_user_credits(self, rgcredits, str_dykbotdo_signature) -> None:
return msg;
str_promoting_admin = DYKUpdateBotUtils.find_user_link(str_dykbotdo_signature)
}
for credit in rgcredits:
}
if not credit.str_user_talk:
</source>
continue
str_message, str_edit_summary = DYKUpdateBotUtils.build_user_talk_credit(credit, str_dykbotdo_signature, str_promoting_admin)
self._append_and_edit(credit.str_user_talk, str_message, str_edit_summary)


def _tag_outgoing_file(self, hooks_outgoing, time_update) -> None:
file_outgoing = DYKUpdateBotUtils.find_file(hooks_outgoing)
if file_outgoing:
file_outgoing_commons = pywikibot.FilePage(pywikibot.Site().image_repository(), file_outgoing.title())
if file_outgoing.exists() or file_outgoing_commons.exists():
str_dykfile_tag = '{{{{DYKfile|{d.day} {d:%B}|{d.year}}}}}'.format(d=time_update)
file_outgoing.text = DYKUpdateBotUtils.add_template_to_talk(file_outgoing.text, str_dykfile_tag)
self._edit(file_outgoing, 'File appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}'.format(d=time_update))
if ('m-cropped' in file_outgoing.text.lower()) or ('c-uploaded' in file_outgoing.text.lower()):
DYKUpdateBotUtils.log('Outgoing file "{0}" tagged with {{m-cropped}} or {{c-uploaded}}'.format(file_outgoing.title()))
else:
DYKUpdateBotUtils.log('Special case (possible bug?): Outgoing file "{0}" doesn\'t exist'.format(file_outgoing.title()))


def _post_errors(self, rgstr_warnings, rgstr_errors) -> None:
<source lang="java">
str_output = ''
//this is almost a straight copy & paste of revision 260 of JWBF's PostLogin
str_edit_summary = 'No errors or warnings; clear'
/*
* Copyright 2007 Thomas Stock.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*
* Contributors:
* Philipp Kohl
* Carlos Valenzuela
*/


if rgstr_warnings:
import java.io.IOException;
str_warnings = 'Bot warnings:\n'
import java.io.Reader;
str_warnings += '\n'.join('* {0}'.format(str_warning) for str_warning in rgstr_warnings)
import java.io.StringReader;
str_output = str_warnings + '\n\n' + str_output
str_edit_summary = 'Posting latest warnings'


if rgstr_errors:
import net.sourceforge.jwbf.actions.mw.login.PostLogin;
str_errors = 'Errors blocking the bot from updating DYK:\n'
import net.sourceforge.jwbf.actions.mw.util.ProcessException;
str_errors += '\n'.join('* {0}'.format(str_error) for str_error in rgstr_errors)
import net.sourceforge.jwbf.actions.mw.util.MWAction;
str_output = str_errors + '\n\n' + str_output
str_edit_summary = 'Bot is blocked from updating DYK, posting latest errors'


page_errors = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.ERROR_OUTPUT_LOC)
import org.apache.log4j.Logger;
if page_errors.text.strip() == str_output.strip():
import org.jdom.DataConversionException;
return # if the errors are already on the page, don't post again
import org.jdom.Document;
page_errors.text = str_output.strip()
import org.jdom.Element;
self._edit(page_errors, str_edit_summary)
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.xml.sax.InputSource;
/**
*
* @author Thomas Stock
*/
public class PostLoginNew extends MWAction {
private final Logger log = Logger.getLogger(PostLogin.class);
private net.sourceforge.jwbf.actions.Post msg;


# ---------------------------------------------
# Core editing
# ---------------------------------------------


# Edge cases we're handling:
private final String success = "Success";
# * {{nobots}}
private final String wrongPass = "WrongPass";
# * Redirects
private final String notExists = "NotExists";
# * Page doesn't exist
private final String needToken = "NeedToken";
# * Edit conflicts
private net.sourceforge.jwbf.bots.util.LoginData login = null;
# * Protected page
private boolean reTry = false;
def _append_and_edit(self, str_title, str_message, str_edit_summary) -> None:
private boolean reTryLimit = true;
page_to_edit = pywikibot.Page(pywikibot.Site(), str_title)
private final String username;
if page_to_edit.isRedirectPage():
private final String pw;
page_to_edit = page_to_edit.getRedirectTarget()
if not page_to_edit.botMayEdit():
# Attempting to save the page when botMayEdit() is False will throw an OtherPageSaveError
DYKUpdateBotUtils.log('Couldn\'t edit ' + page_to_edit.title() + ' due to {{bots}} or {{nobots}}')
return


retry = True
/**
while retry:
*
retry = False
* @param username the
try:
* @param pw password
if not page_to_edit.text.isspace():
* @param domain a
page_to_edit.text += '\n\n'
* @param login a
page_to_edit.text += str_message
*/
self._edit(page_to_edit, str_edit_summary)
public PostLoginNew(final String username, final String pw, net.sourceforge.jwbf.bots.util.LoginData login) {
except pywikibot.EditConflict:
super();
retry = True
this.login = login;
DYKUpdateBotUtils.log('Edit conflicted on ' + page_to_edit.title() + ' will retry after a short nap')
this.username = username;
pywikibot.sleep(10) # sleep for 10 seconds
this.pw = pw;
page_to_edit = pywikibot.Page(pywikibot.Site(), page_to_edit.title())
msg = getLoginMsg(username, pw, null);


def _is_on(self) -> bool:
}
with open(str(pathlib.Path(__file__).parent / 'UpdateBotSwitch.txt'), 'r', encoding='utf-8') as f:
str_file_switch = f.read()
is_file_switch_on = str_file_switch.strip().lower() == 'on'
if not is_file_switch_on:
DYKUpdateBotUtils.log('Text file switch is not "on", exiting...')
return is_file_switch_on


def _edit(self, page_to_edit, str_edit_summary) -> None:
private net.sourceforge.jwbf.actions.Post getLoginMsg(final String username, final String pw,
DYKUpdateBotUtils.log('Editing ' + page_to_edit.title())
final String token) {
if (not page_to_edit.exists()) and DYKUpdateBotUtils.check_if_salted(page_to_edit):
net.sourceforge.jwbf.actions.Post pm = new net.sourceforge.jwbf.actions.Post("/api.php?action=login&format=xml");
DYKUpdateBotUtils.log('Special case: ' + page_to_edit.title() + ' is salted, skipping...')
pm.addParam("lgname", username);
pm.addParam("lgpassword", pw);
return
if (token != null) {
try:
pm.addParam("lgtoken", token);
page_to_edit.save(str_edit_summary, minor=False)
# For a dry run where the bot outputs to local files, comment out the above line and uncomment the lines below
}
# DYKUpdateBotUtils.log('Edit summary: ' + str_edit_summary)
return pm;
# filename = ''.join(character for character in page_to_edit.title() if character not in '\/:*?<>|"') + '.txt'
}
# with open(str(pathlib.Path(__file__).parent / 'TestResources' / filename), 'w', encoding='utf-8') as file_write:
# file_write.write(page_to_edit.text)
except pywikibot.exceptions.LockedPage: # I'm not sure it's possible to hit this with an adminbot...
DYKUpdateBotUtils.log('Special case: ' + page_to_edit.title() + ' is protected, skipping...')


def _log_error(self, rgstr_errors, str_error) -> None:
/**
rgstr_errors.append(str_error)
* {@inheritDoc}
DYKUpdateBotUtils.log('Error: ' + str_error)
*/
@Override
public String processAllReturningText(final String s) throws ProcessException {
SAXBuilder builder = new SAXBuilder();
Element root = null;
try {
Reader i = new StringReader(s);
Document doc = builder.build(new InputSource(i));


def _log_warning(self, rgstr_warnings, str_warning) -> None:
root = doc.getRootElement();
rgstr_warnings.append(str_warning)
findContent(root);
DYKUpdateBotUtils.log('Warning: ' + str_warning)
} catch (JDOMException e) {
log.error(e.getClass().getName() + e.getLocalizedMessage());
} catch (IOException e) {
log.error(e.getClass().getName() + e.getLocalizedMessage());
} catch (NullPointerException e) {
log.error(e.getClass().getName() + e.getLocalizedMessage());
throw new ProcessException("No regular content was found, check your api\n::" + s);
} catch (Exception e) {
log.error(e.getClass().getName() + e.getLocalizedMessage());
throw new ProcessException(e.getLocalizedMessage());
}


# Set of methods broken out for easier unit testability
# Unless otherwise noted, these methods don't make network calls
# Do Not edit the wiki from within these methods, otherwise unit tests will edit the wiki!


return s;
}
/**
*
* @param startElement the, where the search begins
* @throws ProcessException if problems with login
*/
private void findContent(final Element startElement) throws ProcessException {


class DYKUpdateBotUtils():
Element loginEl = startElement.getChild("login", EnWikiBot.ns);
@staticmethod
String result = loginEl.getAttributeValue("result");
def wikilink_to_queue(num_queue, capitalize) -> str:
if (result.equalsIgnoreCase(success)) {
return '[[{0}{1}|{2}ueue {1}]]'.format(DYKUpdateBot.QUEUE_ROOT_LOC,
try {
num_queue,
login.setup(loginEl.getAttribute("lguserid").getIntValue()
'Q' if capitalize else 'q')
, loginEl.getAttributeValue("lgusername"), "0", true);
} catch (DataConversionException e) {
e.printStackTrace();
}
} else if (result.equalsIgnoreCase(needToken) && reTryLimit ) {
msg = getLoginMsg(username, pw, loginEl.getAttributeValue("token"));
reTry = true;
reTryLimit = false;
} else if (result.equalsIgnoreCase(wrongPass)) {
throw new ProcessException("Wrong Password");
} else if (result.equalsIgnoreCase(notExists)) {
throw new ProcessException("No sutch User");
}


# Returns a tuple:
}
# * First value is True if dykbotdo was found, False if not
/**
# * Second value is the admin signature in dykbotdo, or None if not found
* {@inheritDoc}
@staticmethod
*/
def parse_dykbotdo(str_queue) -> (bool, str):
public net.sourceforge.jwbf.actions.mw.HttpAction getNextMessage() {
templates_in_queue = mwparserfromhell.parse(str_queue, skip_style_tags=True).filter_templates()
return msg;
for template in templates_in_queue:
}
if template.name.matches('DYKbotdo'):
return True, str(template.get(1)) if template.has(1) else None
return False, None


/* (non-Javadoc)
# Returns:
# * Hooks if <!--Hooks--> and <!--HooksEnd--> tags are in order
* @see net.sourceforge.jwbf.mediawiki.actions.util.MWAction#hasMoreMessages()
*/
# * None if not
@Override
@staticmethod
def extract_hooks(str_queue_or_tdyk) -> str:
public boolean hasMoreMessages() {
idx_hooks_tag = str_queue_or_tdyk.find('<!--Hooks-->')
boolean temp = super.hasMoreMessages() || reTry;
idx_hooksend_tag = str_queue_or_tdyk.find('<!--HooksEnd-->', max(idx_hooks_tag, 0))
reTry = false;
if min(idx_hooks_tag, idx_hooksend_tag) == -1:
return temp;
return None
}
return str_queue_or_tdyk[idx_hooks_tag + 12:idx_hooksend_tag].strip()
}
</source>


# Returns:
# * pywikibot.FilePage of the file in the DYK set if detected
# * None if not
@staticmethod
def find_file(str_hooks) -> pywikibot.FilePage:
templates_in_hooks = mwparserfromhell.parse(str_hooks, skip_style_tags=True).filter_templates()
for template in templates_in_hooks:
if template.name.matches('Main page image/DYK'):
# Note it's fine whether the parameter is File:XYZ.jpg, Image:XYZ.jpg, or XYZ.jpg
# all three formats will create the same FilePage object returning File:XYZ.jpg from title()
str_file = str(template.get('image').value)
if '{{!}}' in str_file:
DYKUpdateBotUtils.log('Special case: Stripping everything after pipe from filename "{0}"'.format(str_file))
str_file = str_file[:str_file.find('{{!}}')]
return pywikibot.FilePage(pywikibot.Site(), str_file)
return None


# This method makes network calls to the Wikipedia API (read-only)
<source lang="java">
# Returns:
// this is JWBF's PostModifyContent (rev 178) modified for edit conflicts and new(er) edit token requirements
# * None if protection looks good
/*
# * A string describing the issue if not
* Copyright 2007 Thomas Stock.
# Cases to validate if changing this function (leverage the unit tests!):
*
# * File that doesn't exist
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
# * File:Nlksjdkfjskdljflkdsjfame.jpg
* use this file except in compliance with the License. You may obtain a copy of
# * Fully not-protected file
* the License at
# * en:File:Emmelie de Forest Hunter & Prey.png and commons:File:Novo Selo TE 01.JPG
*
# * Fully not-protected file on Commons with an enwiki description page
* http://www.apache.org/licenses/LICENSE-2.0
# * en:File:MET Breuer (48377070386).jpg
*
# * Semi-protected file
* Unless required by applicable law or agreed to in writing, software
# * en:File:Amy Barlow.jpg and commons:File:Flag of Palestine.svg
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# * Fully protected file indefinitely protected
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# * en:File:George Floyd neck knelt on by police officer.png and commons:File:Name.jpg
* License for the specific language governing permissions and limitations under
# * Fully protected file via cascading protection
* the License.
# * en:File:WPVG icon 2016.svg and commons:File:Wikitech-2020-logo.svg
*
# * Fully protected file with protection expiring before set leaves the Main Page
* Contributors:
# * Use the API to find examples:
*
# * https://commons.wikimedia.org/w/api.php?action=query&list=allpages&apnamespace=6&apprtype=edit&apprexpiry=definite&apprlevel=sysop&aplimit=500
*/
# * Fully protected file with protection expiring after set leaves the Main Page
import java.text.ParseException;
# * see URL above
import java.text.SimpleDateFormat;
@staticmethod
import java.util.Hashtable;
def check_if_protected(filepage, time_set_leaving) -> str:
str_file_for_output = filepage.title(as_link=True, textlink=True)
filepage_commons = pywikibot.FilePage(pywikibot.Site().image_repository(), filepage.title())
if not (filepage.exists() or filepage_commons.exists()):
return str_file_for_output + ' does not exist'
on_commons = filepage.file_is_shared()
if on_commons:
filepage = filepage_commons
edit_protections = filepage.protection().get('edit')
if edit_protections is None:
if on_commons:
return str_file_for_output + ' is not protected; either 1) Upload the file to en.wiki, or 2) protect the file at Commons'
else: # on enwiki
return str_file_for_output + ' is not protected'
if edit_protections[0] != 'sysop':
return str_file_for_output + ' is not fully protected'
str_prot_end = edit_protections[1]
if str_prot_end == 'infinity':
return None
time_prot_end = pywikibot.Timestamp.fromISOformat(str_prot_end).replace(tzinfo=timezone.utc)
if time_prot_end < time_set_leaving:
return 'The protection for ' + str_file_for_output + ' will expire before or while it\'s on the Main Page'
return None # protection expires after set leaves the Main Page


@staticmethod
import net.sourceforge.jwbf.actions.Post;
def calculate_drift_core(time_update, timedelta_between_updates, minutes_max_advance, minutes_max_delay) -> int:
import net.sourceforge.jwbf.actions.mw.HttpAction;
seconds_per_day = 60 * 60 * 24
import net.sourceforge.jwbf.actions.mw.MediaWiki;
seconds_least_difference_from_0000 = 60 * 60 * 24
import net.sourceforge.jwbf.actions.mw.util.MWAction;
set_seconds_differences = set()
import net.sourceforge.jwbf.actions.mw.util.ProcessException;
time_iter = time_update
import net.sourceforge.jwbf.contentRep.mw.ContentAccessable;
while True:
import net.sourceforge.jwbf.contentRep.mw.SimpleArticle;
current_difference_from_0000 = int(time_iter.timestamp()) % seconds_per_day
if current_difference_from_0000 > (seconds_per_day / 2):
current_difference_from_0000 = -(seconds_per_day - current_difference_from_0000)
if abs(seconds_least_difference_from_0000) > abs(current_difference_from_0000):
seconds_least_difference_from_0000 = current_difference_from_0000
if seconds_least_difference_from_0000 == 0:
break
if (current_difference_from_0000 in set_seconds_differences) or (len(set_seconds_differences) >= 24):
break
set_seconds_differences.add(current_difference_from_0000)
time_iter = time_iter + timedelta_between_updates


if seconds_least_difference_from_0000 > 0:
import org.apache.log4j.Logger;
return -min(minutes_max_advance, seconds_least_difference_from_0000 // 60)
elif seconds_least_difference_from_0000 < 0:
return min(minutes_max_delay, -seconds_least_difference_from_0000 // 60)
else:
return 0


# This method makes network calls to the Wikipedia API (read-only)
/**
@staticmethod
*
def check_if_salted(page) -> bool:
*
create_protections = page.protection().get('create')
* Writes an article.
return create_protections and (create_protections[0] == 'sysop')
*
*
* TODO no api use.
* @author Thomas Stock
* @supportedBy MediaWiki 1.9.x, 1.10.x, 1.11.x, 1.12.x, 1.13.x, 1.14.x
*
*/
public class PostModifyContentWithEditConflicts extends MWAction {


@staticmethod
protected static final SimpleDateFormat WpTimestampFormat = new SimpleDateFormat("yyyyMMddHHmmss");
def archive(str_archive, time_update, hooks_outgoing) -> str:
private int numMessagesSent = 0;
str_section_heading = '==={d.day} {d:%B} {d.year}==='.format(d=time_update)
private final ContentAccessable article;
str_set_heading = '*\'\'\'\'\'{d:%H}:{d:%M}, {d.day} {d:%B} {d.year} (UTC)\'\'\'\'\''.format(d=time_update)
private static final Logger LOG = Logger.getLogger(PostModifyContentWithEditConflicts.class);
idx_this_date = str_archive.find(str_section_heading) # check if there is a section heading already for today
private Hashtable<String, String> table = new Hashtable<String, String>();
if idx_this_date == -1: # if there isn't, create a new section heading
idx_insert_section = str_archive.find('\n', str_archive.find('<!--BOTPOINTER-->')) + 1
str_archive = DYKUpdateBotUtils._insert_str(str_archive, idx_insert_section, str_section_heading + '\n')
idx_this_date = idx_insert_section
idx_this_date = str_archive.find('\n', idx_this_date) + 1
return DYKUpdateBotUtils._insert_str(str_archive, idx_this_date, str_set_heading + '\n' + hooks_outgoing + '\n\n')


@staticmethod
/**
def parse_credits(str_queue) -> []:
*
templates_in_queue = mwparserfromhell.parse(str_queue, skip_style_tags=True).filter_templates()
* @param a
* the
rgcredits = []
for template in templates_in_queue:
*/
if template.name.matches('DYKmake') or template.name.matches('DYKnom'):
public PostModifyContentWithEditConflicts(final ContentAccessable a) {
if not (template.has(1) and template.has(2)):
this.article = a;
continue
}
credit = DYKCredit()
credit.str_article = html.unescape(str(template.get(1).value))
credit.str_user = html.unescape(str(template.get(2).value))
credit.is_dykmake = template.name.matches('DYKmake')
if template.has('subpage'):
str_subpage = html.unescape(str(template.get('subpage').value))
if str_subpage != '':
credit.str_nompage = 'Template:Did you know nominations/' + str_subpage


# sanitize
if (credit.str_article == 'Example' or credit.str_article == '' or
public HttpAction getNextMessage() {
credit.str_user == '' or credit.str_user == 'Editor' or credit.str_user == 'Nominator'):
++numMessagesSent;
continue
credit.str_article = credit.str_article.replace('[[', '').replace(']]', '')
Post postMessage = new Post("/index.php?title=" + MediaWiki.encode(article.getLabel()) + "&action=submit");
rgcredits.append(credit)
return rgcredits
if (numMessagesSent == 1) {
return postMessage; // send off first request to grab edit token from the response
}


# This method makes network calls to the Wikipedia API (read-only)
postMessage.addParam("wpSave", "Save");
# As "output", sets str_article on valid credits & deletes credits for nonexistent articles
@staticmethod
def validate_credits_articles(rgcredits, fn_log_warning) -> None:
# Articles:
# * expand any templates in the article name
# * delete credits for nonexistent articles
# * follow redirects
# * normalize titles
dict_processed = {}
for idx_credit in reversed(range(len(rgcredits))):
str_article_orig = rgcredits[idx_credit].str_article
if str_article_orig in dict_processed:
rgcredits[idx_credit].str_article = dict_processed[str_article_orig].str_article
continue


str_article_processed = str_article_orig
postMessage.addParam("wpUltimateParam", table.get("wpUltimateParam"));
if '}}' in str_article_processed:
str_article_processed = pywikibot.Site().expand_text(text=str_article_processed)
DYKUpdateBotUtils.log('Special case: Credit article title contains template "{0}"->"{1}"'.format(str_article_orig, str_article_processed))
page_article = pywikibot.Page(pywikibot.Site(), str_article_processed)
if page_article.isRedirectPage():
page_article = page_article.getRedirectTarget()
if not page_article.exists():
fn_log_warning('Article [[{0}]] does not exist'.format(str_article_orig))
del rgcredits[idx_credit]
continue
str_article_processed = page_article.title()
rgcredits[idx_credit].str_article = str_article_processed
dict_processed[str_article_orig] = rgcredits[idx_credit]


# This method makes network calls to the Wikipedia API (read-only)
postMessage.addParam("wpUnicodeCheck", table.get("wpUnicodeCheck"));
# As "output", sets str_user_talk on valid credits
@staticmethod
def validate_credits_users(rgcredits, fn_log_warning) -> None:
# Users:
# * expand any templates in the username
# * check for nonexistent users
# * follow redirects
# * normalize titles
dict_processed = {}
for credit in rgcredits:
str_user_orig = credit.str_user
if str_user_orig in dict_processed:
credit.str_user_talk = dict_processed[str_user_orig].str_user_talk
continue


str_user_processed = str_user_orig
postMessage.addParam("wpStarttime", table.get("wpStarttime"));
if '}}' in str_user_processed:
str_user_processed = pywikibot.Site().expand_text(text=str_user_processed)
DYKUpdateBotUtils.log('Special case: Credit username contains template "{0}"->"{1}"'.format(str_user_orig, str_user_processed))
user = pywikibot.User(pywikibot.Site(), str_user_processed)
is_valid_user = user.isRegistered() or (user.isAnonymous() and user.last_edit)
if not is_valid_user:
# was the user recently renamed?
# example API call: https://en.wikipedia.org/w/api.php?action=query&list=logevents&letype=renameuser&letitle=User:Carrot%20official&lelimit=1
for entry in pywikibot.Site().logevents('renameuser', page=user.title(), total=1):
if entry['params']['olduser'] == user.username:
user = pywikibot.User(pywikibot.Site(), entry['params']['newuser'])
DYKUpdateBotUtils.log('Special case: User listed in credit was renamed "{0}"->"{1}"'.format(str_user_orig, user.username))
is_valid_user = user.isRegistered() or (user.isAnonymous() and user.last_edit)


if is_valid_user:
postMessage.addParam("wpEditToken", table.get("wpEditToken"));
page_usertalk = user.getUserTalkPage()
if page_usertalk.isRedirectPage():
try {
DYKUpdateBotUtils.log('Special case: User talk is a redirect "{0}"'.format(page_usertalk.title()))
if (WpTimestampFormat.parse(table.get("wpEdittime")).getTime() >
page_usertalk = page_usertalk.getRedirectTarget()
((SimpleArticle) article).getEditTimestamp().getTime()) {
if page_usertalk.isTalkPage():
throw new EditConflictException();
# no funny business - the redirect above shouldn't make the bot, eg, tag the Main Page with a DYK credit
}
credit.str_user_talk = page_usertalk.title()
} catch (ParseException e) {} // impossible
else:
postMessage.addParam("wpEdittime", table.get("wpEdittime"));
fn_log_warning('The username \'{0}\' is invalid'.format(str_user_orig))
dict_processed[str_user_orig] = credit
postMessage.addParam("wpTextbox1", article.getText());


# This method makes network calls to the Wikipedia API (read-only) if:
String editSummaryText = article.getEditSummary();
# * There's a template within the hooks
if (editSummaryText != null && editSummaryText.length() > 200) {
# * There's no string match between the article listed in the credit and the hooks - redirect search
editSummaryText = editSummaryText.substring(0, 200);
# As "output", sets str_hook and (if first hook) str_file on credits
}
@staticmethod
def populate_hooks_and_file(rgcredits, str_hooks, str_file) -> None:
# remove stuff at the top that isn't hooks (eg image)
str_hooks = str_hooks[str_hooks.rfind('\n', 0, str_hooks.find('...')):].strip()


# expand templates
postMessage.addParam("wpSummary", editSummaryText);
str_hooks_normalized = str_hooks
if (article.isMinorEdit()) {
if '}}' in str_hooks_normalized:
postMessage.addParam("wpMinoredit", "1");
str_hooks_normalized = pywikibot.Site().expand_text(text=str_hooks_normalized)
}


# unescape HTML and replace non-breaking spaces with normal spaces
LOG.info("WRITE: " + article.getLabel());
str_hooks_normalized = html.unescape(str_hooks_normalized).replace(html.unescape('&nbsp;'), ' ')
return postMessage;
}


rghooks_orig = str_hooks.split('\n')
@Override
rghooks_normalized = str_hooks_normalized.lower().split('\n')
public boolean hasMoreMessages() {
return numMessagesSent < 2;
}


# remove any lines without '...' and trim any leading characters, like *
@Override
for idx_hook in reversed(range(len(rghooks_orig))):
public String processReturningText(String returnedHTML, HttpAction action)
str_hook = rghooks_orig[idx_hook]
throws ProcessException {
idx_that = str_hook.find('...')
if (numMessagesSent == 1) {
if idx_that == -1:
parseWpValues(returnedHTML);
del rghooks_orig[idx_hook]
LOG.debug(table);
del rghooks_normalized[idx_hook]
}
else:
return returnedHTML;
rghooks_orig[idx_hook] = str_hook[idx_that:]
}


# search for the hook for each article
/**
dict_processed = {}
*
for credit in rgcredits:
* @param text
* where to search
if credit.str_article in dict_processed:
credit.str_hook = dict_processed[credit.str_article].str_hook
* @param table
credit.str_file = dict_processed[credit.str_article].str_file
* table with required values
continue
*/
private void parseWpValues(final String text) {
String[] tParts = text.split("\n");
// System.out.println(tParts.length);
for (int i = 0; i < tParts.length; i++) {
if (tParts[i].indexOf("wpEditToken") > 0) {
// \<input type='hidden' value=\"(.*?)\" name=\"wpEditToken\"
int begin = tParts[i].indexOf("value") + 7;
int end = tParts[i].indexOf("name") - 2;
// System.out.println(line.substring(begin, end));
// System.out.println("read wp token:" + tParts[i]);
table.put("wpEditToken", tParts[i].substring(begin, end));


idx_found_hook = DYKUpdateBotUtils._find_hook(credit.str_article, rghooks_normalized)
} else if (tParts[i].indexOf("wpEdittime") > 0) {
if idx_found_hook == -1: # maybe the hook links to a page that redirects to str_article?
// value="(\d+)" name=["\']wpEdittime["\']
page_article = pywikibot.Page(pywikibot.Site(), credit.str_article)
int begin = tParts[i].indexOf("value") + 7;
for page_redirect in page_article.getReferences(filter_redirects=True, namespaces=pywikibot.site.Namespace.MAIN):
int end = tParts[i].indexOf("name") - 2;
idx_found_hook = DYKUpdateBotUtils._find_hook(page_redirect.title(), rghooks_normalized)
// System.out.println( "read wp edit: " +
if idx_found_hook != -1:
// tParts[i].substring(begin, end));
DYKUpdateBotUtils.log('Special case: Hook matches redirect to article "{0}"'.format(credit.str_article))
break # got a hit! no need to keep iterating through redirects


if idx_found_hook >= 0:
table.put("wpEdittime", tParts[i].substring(begin, end));
credit.str_hook = rghooks_orig[idx_found_hook]
if idx_found_hook == 0:
credit.str_file = str_file


dict_processed[credit.str_article] = credit
} else if (tParts[i].indexOf("wpStarttime") > 0) {
// value="(\d+)" name=["\']wpStarttime["\']
int begin = tParts[i].indexOf("value") + 7;
int end = tParts[i].indexOf("name") - 2;
// System.out.println("read wp start:" + tParts[i]);


@staticmethod
table.put("wpStarttime", tParts[i].substring(begin, end));
def _find_hook(str_article, rghooks_normalized) -> int:
str_article_lower = str_article.lower()
for idx_hook, str_hook_normalized in enumerate(rghooks_normalized):
if str_article_lower in str_hook_normalized:
return idx_hook
return -1


@staticmethod
} else if (tParts[i].indexOf("wpUnicodeCheck") > 0) {
def tag_article_history(str_talk, credit, time_update) -> (str, str):
// \<input type='hidden' value=\"(.*?)\" name=\"wpUnicodeCheck\"
template_ah = None
int begin = tParts[i].indexOf("value") + 7;
templates_on_talk = mwparserfromhell.parse(str_talk, skip_style_tags=True).filter_templates()
int end = tParts[i].indexOf("name", begin) - 2;
for template in templates_on_talk:
// System.out.println(line.substring(begin, end));
tname = template.name
// System.out.println("read wp token:" + tParts[i]);
if (tname.matches('Article history') or tname.matches('Articlehistory') or
table.put("wpUnicodeCheck", tParts[i].substring(begin, end));
tname.matches('Article History') or tname.matches('ArticleHistory') or
} else if (tParts[i].indexOf("wpUltimateParam") > 0) {
tname.matches('Article milestones') or tname.matches('Articlemilestones')):
// \<input type='hidden' value=\"(.*?)\" name=\"wpUltimateParam\"
template_ah = template
int begin = tParts[i].indexOf("value") + 7;
break
int end = tParts[i].indexOf("name", begin) - 2;
// System.out.println(line.substring(begin, end));
// System.out.println("read wp token:" + tParts[i]);
table.put("wpUltimateParam", tParts[i].substring(begin, end));
}
}


str_edit_summary = None
}
if template_ah:
str_edit_summary = ('Article appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}'
', adding to {{{{[[Template:Article history|Article history]]}}}}'.format(d=time_update))
str_article_history_orig = str(template_ah)
# According to documentation at Template:Article_history, DYK params go between |currentstatus and |topic
param_topic = template_ah.get('topic') if template_ah.has('topic') else None
template_ah.add('dykdate', '{d.day} {d:%B} {d.year}'.format(d=time_update), before=param_topic)
if credit.str_hook:
template_ah.add('dykentry', credit.str_hook, before=param_topic)
if credit.str_nompage:
template_ah.add('dyknom', credit.str_nompage, before=param_topic)
str_talk = str_talk.replace(str_article_history_orig, str(template_ah))
return str_talk, str_edit_summary


# Returns a tuple:
}
# * First value is the dyktalk tag
</source>
# * Second value is the edit summary
@staticmethod
def build_dyktalk_tag(credit, time_update) -> (str, str):
str_tag = '\n{{{{DYK talk|{d.day} {d:%B}|{d.year}{str_image_param}{str_hook_param}{str_nompage_param}}}}}'.format(
d=time_update,
str_image_param=('|image=' + credit.str_file) if credit.str_file else '',
str_hook_param=('|entry=' + credit.str_hook) if credit.str_hook else '',
str_nompage_param=('|nompage=' + credit.str_nompage) if credit.str_nompage else '')
str_edit_summary = ('Article appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}'
', adding {{{{[[Template:DYK talk|DYK talk]]}}}}'.format(d=time_update))
return str_tag, str_edit_summary


@staticmethod
def add_template_to_talk(str_talk, str_tag) -> str:
idx_first_section = str_talk.find('==')
if idx_first_section == -1:
idx_first_section = len(str_talk)
str_header = str_talk[:idx_first_section]
idx_last_template = DYKUpdateBotUtils._last_template_index(str_header)
if (idx_last_template < len(str_talk)) and (str_talk[idx_last_template] != '\n'):
str_tag = str_tag + '\n'
return DYKUpdateBotUtils._insert_str(str_talk, idx_last_template, str_tag).strip()


@staticmethod
<source lang="java">
def _last_template_index(str_header) -> int:
import java.io.IOException;
# To a human reader, GA / DYK etc discussions aren't templates, they're part of the content
import java.io.StringReader;
# so detect and remove them from what we consider the header
# GA discussion transclusion example from Talk:Icos: {{Talk:Icos/GA1}}
# DYK discussion transclusion example from Special:Diff/873606519: {{Did you know nominations/Bishop John Carroll (statue)}}
match = search('\{\{\s*([Tt]alk:|[Tt]emplate talk:|([Tt]emplate:\s*)?[Dd]id you know nominations/)', str_header)
if match:
str_header = str_header[:match.start()]
idx_last_template = str_header.rfind('}}')
if idx_last_template == -1:
idx_last_template = 0
else:
idx_last_template += 2
return idx_last_template


# Returns username if one was found, None if not
import org.jdom.Document;
@staticmethod
import org.jdom.JDOMException;
def find_user_link(str_dykbotdo_signature) -> str:
import org.jdom.input.SAXBuilder;
links_in_sig = mwparserfromhell.parse(str_dykbotdo_signature, skip_style_tags=True).filter_wikilinks()
import org.xml.sax.InputSource;
for link in links_in_sig:
str_title = str(link.title)
idx_user_or_usertalk = max(str_title.find('User:'), str_title.find('User talk:'))
if idx_user_or_usertalk != -1:
str_user = str_title[str_title.find(':', idx_user_or_usertalk) + 1:]
idx_trailing = max(str_user.find('#'), str_user.find('/'))
if idx_trailing != -1:
str_user = str_user[:idx_trailing]
return str_user
return None


# Returns a tuple:
import net.sourceforge.jwbf.actions.Get;
# * First value is the message on the talk page (section + credit + signature)
import net.sourceforge.jwbf.actions.Post;
# * Second value is the edit summary
import net.sourceforge.jwbf.actions.mw.HttpAction;
@staticmethod
import net.sourceforge.jwbf.actions.mw.MediaWiki;
def build_user_talk_credit(credit, str_dykbotdo_signature, str_promoting_admin) -> (str, str):
import net.sourceforge.jwbf.actions.mw.util.MWAction;
str_message = ('==DYK for {str_article}==\n'
'{{{{subst:Template:{str_template} |article={str_article} {str_hook_param} '
'{str_nompage_param} |optional= }}}} {str_sig}'
.format(str_article=credit.str_article,
str_template='DYKmake/DYKmakecredit' if credit.is_dykmake else 'DYKnom/DYKnomcredit',
str_hook_param=('|hook=' + credit.str_hook) if credit.str_hook else '',
str_nompage_param=('|nompage=' + credit.str_nompage) if credit.str_nompage else '',
str_sig=(str_dykbotdo_signature + ' ~~~~~') if str_dykbotdo_signature else '~~~~'))
str_edit_summary = 'Giving DYK credit for [[{str_article}]]'.format(str_article=credit.str_article)
if str_promoting_admin:
str_edit_summary += ' on behalf of [[User:{str_username}|{str_username}]]'.format(str_username=str_promoting_admin)
return str_message, str_edit_summary


@staticmethod
public class PostUnprotectWithReason extends MWAction {
def _insert_str(str_target, idx, str_insert) -> str:
private final String title;
return str_target[:idx] + str_insert + str_target[idx:]
private final String reason;
private final Get tokenRequest;
private String token;
private boolean inHandshake = true;
private boolean finished = false;
public PostUnprotectWithReason(String title, String reason) throws JDOMException, IOException {
this.title = title;
this.reason = reason;
if (title == null || title.length() == 0) {
throw new IllegalArgumentException("The argument 'title' must not be null or empty");
}
// URL to fetch a protect token from the API
String url = "/api.php?format=xml&action=query&prop=info&titles=" +
MediaWiki.encode(title) + "&intoken=protect";
tokenRequest = new Get(url);
}
@Override
public String processReturningText(String s, HttpAction response) {
if (response.getRequest().equals(tokenRequest.getRequest())) {
Document tokenPage;
try {
tokenPage = new SAXBuilder().build(new InputSource(new StringReader(s)));
token = tokenPage.getRootElement().getChild("query", EnWikiBot.ns).getChild("pages", EnWikiBot.ns)
.getChild("page", EnWikiBot.ns).getAttributeValue("protecttoken");
} catch (JDOMException e) {
throw new UnprotectException();
} catch (IOException e) {
throw new UnprotectException();
}
}
return "";
}
protected HttpAction getSecondRequest() {
HttpAction unprotectRequest = null;
if (token == null || token.length() == 0) {
throw new IllegalArgumentException(
"The argument 'token' must not be \""
+ token + "\"");
}
String bar = MediaWiki.encode("|");


@staticmethod
String unprotectURL = "/api.php?format=xml&action=protect" +
def log(str_to_log) -> None:
"&protections=edit=all" + bar + "move=all" + bar + "upload=all" +
print(str_to_log, flush=True)
"&title=" + MediaWiki.encode(title) +
"&token=" + MediaWiki.encode(token) +
"&reason=" + MediaWiki.encode(reason);
unprotectRequest = new Post(unprotectURL);


return unprotectRequest;
}


class ValidationResults():
@Override
def __init__(self) -> None:
public HttpAction getNextMessage() {
self.rgstr_errors = []
if (inHandshake) {
self.rgstr_warnings = []
inHandshake = false;
self.page_TDYK = None
return tokenRequest;
self.page_queue = None
} else {
self.num_queue = 0
finished = true;
self.file_incoming = None
return getSecondRequest();
self.hooks_incoming = None
}
self.hooks_outgoing = None
}
self.str_dykbotdo_signature = None
self.timedelta_between_updates = None
@Override

public boolean hasMoreMessages() {

return !finished;
class DYKCredit():
}
def __init__(self) -> None:
self.str_article = None
public class UnprotectException extends RuntimeException {
self.str_user = None
private static final long serialVersionUID = 1L;
self.str_user_talk = None
}
self.str_nompage = None
}
self.is_dykmake = True
self.str_hook = None
self.str_file = None

def __str__(self):
return 'DYKCredit! article:{0}, user:{1}, nompage:{2}, is_dykmake:{3}, hook:{4}, file:{5}'.format(
self.str_article, self.str_user, self.str_nompage, self.is_dykmake, self.str_hook, self.str_file)


def main() -> None:
bot = DYKUpdateBot()
bot.run()

if __name__ == '__main__':
main()

</source>
</source>

Revision as of 05:01, 10 May 2021

Below is the code for DYKUpdateBot. The bot runs on WP:Pywikibot.

import os
import pathlib
import pywikibot
import mwparserfromhell
import html
from datetime import datetime, timedelta, timezone
from functools import partial
from re import search


class DYKUpdateBot():
    TDYK_LOC = 'Template:Did you know'
    NEXT_UPDATE_QUEUE_LOC = 'Template:Did you know/Queue/Next'
    LAST_UPDATE_TIME_LOC = 'Template:Did you know/Next update/Time'
    TIME_BETWEEN_UPDATES_LOC = 'User:DYKUpdateBot/Time Between Updates'
    QUEUE_ROOT_LOC = 'Template:Did you know/Queue/'
    WTDYK_LOC = 'Wikipedia talk:Did you know'
    ARCHIVE_LOC = 'Wikipedia:Recent additions'
    ERROR_OUTPUT_LOC = 'User:DYKUpdateBot/Errors'
    DRIFT_LOC = 'User:DYKUpdateBot/ResyncDrift'
    SECONDS_BETWEEN_STATUS_CHECKS = 600
    NUM_QUEUES = 7

    def run(self) -> None:
        DYKUpdateBotUtils.log('PID: {0}'.format(os.getpid()))

        pywikibot.Site().login()
        while self._is_on() and pywikibot.Site().logged_in():
            DYKUpdateBotUtils.log(datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S %Z'))

            results = ValidationResults()
            seconds_until_next_update = DYKUpdateBot.SECONDS_BETWEEN_STATUS_CHECKS  # placeholder
            time_next_update, time_next_update_leaving = self._calculate_next_update_time(results.rgstr_errors)
            if not results.rgstr_errors:
                time_now = pywikibot.Site().server_time().replace(tzinfo=timezone.utc)
                seconds_until_next_update = int((time_next_update - time_now).total_seconds())
                DYKUpdateBotUtils.log('Seconds left until next update: {0}'.format(seconds_until_next_update))

                if seconds_until_next_update < 7200:
                    self.validate_before_update(results, time_next_update_leaving)
                if seconds_until_next_update <= 0:
                    results.timedelta_between_updates = time_next_update_leaving - time_next_update
                    self.update_dyk(time_now, results)

            self._post_errors(results.rgstr_warnings, results.rgstr_errors)
            results = None

            seconds_to_sleep = DYKUpdateBot.SECONDS_BETWEEN_STATUS_CHECKS
            if seconds_until_next_update > 0:
                seconds_to_sleep = min(seconds_to_sleep, seconds_until_next_update)
            pywikibot.sleep(seconds_to_sleep)

    def _calculate_next_update_time(self, rgstr_errors) -> (pywikibot.Timestamp, pywikibot.Timestamp):
        page_last_update_time = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.LAST_UPDATE_TIME_LOC)
        time_next_update = datetime.now(timezone.utc)  # placeholder
        try:
            time_next_update = pywikibot.Timestamp.fromISOformat(page_last_update_time.text.strip()).replace(tzinfo=timezone.utc)
        except:
            self._log_error(rgstr_errors, 'Time at [[' + DYKUpdateBot.LAST_UPDATE_TIME_LOC +
                            ']] is not formatted correctly')
            return time_next_update, time_next_update

        page_time_between_updates = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.TIME_BETWEEN_UPDATES_LOC)
        seconds_between_updates = 0  # placeholder
        try:
            seconds_between_updates = int(page_time_between_updates.text)
        except ValueError:
            self._log_error(rgstr_errors, 'Time between updates at [[' + DYKUpdateBot.TIME_BETWEEN_UPDATES_LOC +
                            ']] is not formatted correctly')
            return time_next_update, time_next_update

        time_next_update = time_next_update + timedelta(seconds=seconds_between_updates)
        return time_next_update, time_next_update + timedelta(seconds=seconds_between_updates)

    # Returns:
    # * Int of the next queue number, parsed from NEXT_UPDATE_QUEUE_LOC
    # * 0 if NEXT_UPDATE_QUEUE_LOC doesn't parse to an int
    def _find_next_queue_number(self) -> int:
        page = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC)
        num_next_queue = 0
        try:
            num_next_queue = int(page.text)
        except ValueError:
            pass
        return num_next_queue

    def validate_before_update(self, results_val, time_set_leaving):
        # figure out which queue to update from
        results_val.num_queue = self._find_next_queue_number()
        if results_val.num_queue == 0:
            self._log_error(results_val.rgstr_errors, 'Could not parse [[{0}]]; check if it\'s a number 1-{1}'
                            .format(DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC, DYKUpdateBot.NUM_QUEUES))
            return results_val

        # get the wikitext of the queue
        results_val.page_queue = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.QUEUE_ROOT_LOC + str(results_val.num_queue))
        str_queue = results_val.page_queue.text
        str_link_to_queue = DYKUpdateBotUtils.wikilink_to_queue(results_val.num_queue, True)

        # make sure all curly braces are matched
        if str_queue.count('{{') != str_queue.count('}}'):
            self._log_error(results_val.rgstr_errors, 'Unmatched left <nowiki>("{{") and right ("}}")</nowiki> curly braces in ' + str_link_to_queue)
            return results_val

        # make sure the queue has {{DYKbotdo}}
        has_dykbotdo, results_val.str_dykbotdo_signature = DYKUpdateBotUtils.parse_dykbotdo(str_queue)
        if not has_dykbotdo:
            self._post_almost_late_message_to_WTDYK(time_set_leaving, results_val.num_queue)
            self._log_error(results_val.rgstr_errors, str_link_to_queue + ' is not tagged with {{tl|DYKbotdo}}')
            return results_val

        # make sure the queue has <!--Hooks--> and <!--HooksEnd--> and find hooks
        results_val.hooks_incoming = DYKUpdateBotUtils.extract_hooks(str_queue)
        if results_val.hooks_incoming is None:
            self._log_error(results_val.rgstr_errors, str_link_to_queue + ' is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>')
            return results_val

        # make sure the image/file is protected
        results_val.file_incoming = DYKUpdateBotUtils.find_file(results_val.hooks_incoming)
        if results_val.file_incoming:
            str_protection_error = DYKUpdateBotUtils.check_if_protected(results_val.file_incoming, time_set_leaving)
            if str_protection_error:
                self._log_error(results_val.rgstr_errors, str_protection_error)
        else:
            self._log_warning(results_val.rgstr_warnings, 'Can\'t find the image / file for incoming DYK set\n')

        # fetch T:DYK
        results_val.page_TDYK = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.TDYK_LOC)
        str_tdyk = results_val.page_TDYK.text

        # make sure T:DYK has <!--Hooks--> and <!--HooksEnd--> and find hooks
        results_val.hooks_outgoing = DYKUpdateBotUtils.extract_hooks(str_tdyk)
        if results_val.hooks_outgoing is None:
            self._log_error(results_val.rgstr_errors, '[[' + DYKUpdateBot.TDYK_LOC + ']] is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>')
            return results_val

        return results_val

    def update_dyk(self, time_update, results) -> None:
        if results.rgstr_errors:
            return
        str_link_to_queue = DYKUpdateBotUtils.wikilink_to_queue(results.num_queue, False)

        # replace old hooks with new hooks
        results.page_TDYK.text = results.page_TDYK.text.replace(results.hooks_outgoing, results.hooks_incoming)
        self._edit(results.page_TDYK, 'Bot automatically updating DYK template with hooks copied from ' + str_link_to_queue)

        # purge the Main Page
        pywikibot.Page(pywikibot.Site(), 'Main Page').purge()

        # set last update time
        time_update = time_update.replace(second=0, microsecond=0)
        num_minutes_drift = self._calculate_drift(time_update, results.timedelta_between_updates)
        time_update_with_drift = time_update + timedelta(minutes=num_minutes_drift)
        page_last_update_time = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.LAST_UPDATE_TIME_LOC)
        page_last_update_time.text = time_update_with_drift.isoformat()
        self._edit(page_last_update_time, 'Resetting the clock' + (', with drift' if num_minutes_drift != 0 else ''))

        # archive outgoing hooks
        page_archive = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.ARCHIVE_LOC)
        page_archive.text = DYKUpdateBotUtils.archive(page_archive.text, time_update, results.hooks_outgoing)
        self._edit(page_archive, 'Archiving latest set')

        # credits - article talk, user talk
        rgcredits = self._parse_and_populate_credits(results.page_queue, results.hooks_incoming, results.file_incoming, results.rgstr_warnings)
        self._tag_articles(rgcredits, time_update)
        self._give_user_credits(rgcredits, results.str_dykbotdo_signature)

        # clear queue
        results.page_queue.text = '{{User:DYKUpdateBot/REMOVE THIS LINE}}'
        self._edit(results.page_queue, 'Update is done, removing the hooks')

        # update next queue number
        num_next_queue = (results.num_queue % DYKUpdateBot.NUM_QUEUES) + 1
        page_next_queue_num = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC)
        page_next_queue_num.text = str(num_next_queue)
        self._edit(page_next_queue_num, 'Next queue is ' + DYKUpdateBotUtils.wikilink_to_queue(num_next_queue, False))

        # tag outgoing file
        self._tag_outgoing_file(results.hooks_outgoing, time_update)

    def _post_almost_late_message_to_WTDYK(self, time_set_leaving, num_next_queue) -> None:
        str_timestamp = time_set_leaving.isoformat()
        page_wtdyk = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.WTDYK_LOC)
        if str_timestamp in page_wtdyk.text:
            return  # bot already posted an "almost late" message for this update, don't post again

        with open(str(pathlib.Path(__file__).parent / 'almostLate.txt'), 'r', encoding='utf-8') as f:
            str_almost_late = f.read()

        str_almost_late = str_almost_late.replace('queueNum', str(num_next_queue))
        str_almost_late = str_almost_late.replace('hoursLeft', 'two hours')
        str_almost_late = str_almost_late.replace('uniqueSetIdentifier', str_timestamp)

        self._append_and_edit(DYKUpdateBot.WTDYK_LOC, str_almost_late, 'DYK is almost late')

    def _calculate_drift(self, time_update, timedelta_between_updates) -> int:
        num_max_advance_minutes = 0
        num_max_delay_minutes = 0
        page_drift = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.DRIFT_LOC)
        for str_line in page_drift.text.split('\n'):
            try:
                num_minutes_parsed = int(str_line[str_line.find(':') + 1:])
                if 'advance' in str_line:
                    num_max_advance_minutes = num_minutes_parsed
                elif 'delay' in str_line:
                    num_max_delay_minutes = num_minutes_parsed
            except:
                DYKUpdateBotUtils.log('Couldn\'t parse drift')
                return 0
        return DYKUpdateBotUtils.calculate_drift_core(time_update,
                                                      timedelta_between_updates,
                                                      num_max_advance_minutes,
                                                      num_max_delay_minutes)

    def _parse_and_populate_credits(self, page_queue, hooks_incoming, file_incoming, rgstr_warnings) -> []:
        rgcredits = DYKUpdateBotUtils.parse_credits(page_queue.text)
        fn_log_warning = partial(self._log_warning, self, rgstr_warnings)
        DYKUpdateBotUtils.validate_credits_articles(rgcredits, fn_log_warning)
        DYKUpdateBotUtils.validate_credits_users(rgcredits, fn_log_warning)
        DYKUpdateBotUtils.populate_hooks_and_file(rgcredits, hooks_incoming, file_incoming.title(with_ns=False))
        for credit in rgcredits:
            if credit.str_hook is None:
                self._log_warning(rgstr_warnings, 'Couldn\'t find hook for [[{{0}}]], was the hook pulled or moved to a different set?'.format(credit.str_article))
        return rgcredits

    def _tag_articles(self, rgcredits, time_update) -> None:
        set_tagged = set()
        for credit in rgcredits:
            if credit.str_article in set_tagged:
                continue

            str_edit_summary = None
            page_talk = pywikibot.Page(pywikibot.Site(), 'Talk:' + credit.str_article)
            page_talk.text, str_edit_summary = DYKUpdateBotUtils.tag_article_history(page_talk.text, credit, time_update)
            if not str_edit_summary:
                str_dyktalk_tag, str_edit_summary = DYKUpdateBotUtils.build_dyktalk_tag(credit, time_update)
                page_talk.text = DYKUpdateBotUtils.add_template_to_talk(page_talk.text, str_dyktalk_tag)
            self._edit(page_talk, str_edit_summary)

            set_tagged.add(credit.str_article)

    def _give_user_credits(self, rgcredits, str_dykbotdo_signature) -> None:
        str_promoting_admin = DYKUpdateBotUtils.find_user_link(str_dykbotdo_signature)
        for credit in rgcredits:
            if not credit.str_user_talk:
                continue
            str_message, str_edit_summary = DYKUpdateBotUtils.build_user_talk_credit(credit, str_dykbotdo_signature, str_promoting_admin)
            self._append_and_edit(credit.str_user_talk, str_message, str_edit_summary)

    def _tag_outgoing_file(self, hooks_outgoing, time_update) -> None:
        file_outgoing = DYKUpdateBotUtils.find_file(hooks_outgoing)
        if file_outgoing:
            file_outgoing_commons = pywikibot.FilePage(pywikibot.Site().image_repository(), file_outgoing.title())
            if file_outgoing.exists() or file_outgoing_commons.exists():
                str_dykfile_tag = '{{{{DYKfile|{d.day} {d:%B}|{d.year}}}}}'.format(d=time_update)
                file_outgoing.text = DYKUpdateBotUtils.add_template_to_talk(file_outgoing.text, str_dykfile_tag)
                self._edit(file_outgoing, 'File appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}'.format(d=time_update))
                if ('m-cropped' in file_outgoing.text.lower()) or ('c-uploaded' in file_outgoing.text.lower()):
                    DYKUpdateBotUtils.log('Outgoing file "{0}" tagged with {{m-cropped}} or {{c-uploaded}}'.format(file_outgoing.title()))
            else:
                DYKUpdateBotUtils.log('Special case (possible bug?): Outgoing file "{0}" doesn\'t exist'.format(file_outgoing.title()))

    def _post_errors(self, rgstr_warnings, rgstr_errors) -> None:
        str_output = ''
        str_edit_summary = 'No errors or warnings; clear'

        if rgstr_warnings:
            str_warnings = 'Bot warnings:\n'
            str_warnings += '\n'.join('* {0}'.format(str_warning) for str_warning in rgstr_warnings)
            str_output = str_warnings + '\n\n' + str_output
            str_edit_summary = 'Posting latest warnings'

        if rgstr_errors:
            str_errors = 'Errors blocking the bot from updating DYK:\n'
            str_errors += '\n'.join('* {0}'.format(str_error) for str_error in rgstr_errors)
            str_output = str_errors + '\n\n' + str_output
            str_edit_summary = 'Bot is blocked from updating DYK, posting latest errors'

        page_errors = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.ERROR_OUTPUT_LOC)
        if page_errors.text.strip() == str_output.strip():
            return  # if the errors are already on the page, don't post again
        page_errors.text = str_output.strip()
        self._edit(page_errors, str_edit_summary)

    # ---------------------------------------------
    # Core editing
    # ---------------------------------------------

    # Edge cases we're handling:
    # * {{nobots}}
    # * Redirects
    # * Page doesn't exist
    # * Edit conflicts
    # * Protected page
    def _append_and_edit(self, str_title, str_message, str_edit_summary) -> None:
        page_to_edit = pywikibot.Page(pywikibot.Site(), str_title)
        if page_to_edit.isRedirectPage():
            page_to_edit = page_to_edit.getRedirectTarget()
        if not page_to_edit.botMayEdit():
            # Attempting to save the page when botMayEdit() is False will throw an OtherPageSaveError
            DYKUpdateBotUtils.log('Couldn\'t edit ' + page_to_edit.title() + ' due to {{bots}} or {{nobots}}')
            return

        retry = True
        while retry:
            retry = False
            try:
                if not page_to_edit.text.isspace():
                    page_to_edit.text += '\n\n'
                page_to_edit.text += str_message
                self._edit(page_to_edit, str_edit_summary)
            except pywikibot.EditConflict:
                retry = True
                DYKUpdateBotUtils.log('Edit conflicted on ' + page_to_edit.title() + ' will retry after a short nap')
                pywikibot.sleep(10)  # sleep for 10 seconds
                page_to_edit = pywikibot.Page(pywikibot.Site(), page_to_edit.title())

    def _is_on(self) -> bool:
        with open(str(pathlib.Path(__file__).parent / 'UpdateBotSwitch.txt'), 'r', encoding='utf-8') as f:
            str_file_switch = f.read()
        is_file_switch_on = str_file_switch.strip().lower() == 'on'
        if not is_file_switch_on:
            DYKUpdateBotUtils.log('Text file switch is not "on", exiting...')
        return is_file_switch_on

    def _edit(self, page_to_edit, str_edit_summary) -> None:
        DYKUpdateBotUtils.log('Editing ' + page_to_edit.title())
        if (not page_to_edit.exists()) and DYKUpdateBotUtils.check_if_salted(page_to_edit):
            DYKUpdateBotUtils.log('Special case: ' + page_to_edit.title() + ' is salted, skipping...')
            return
        try:
            page_to_edit.save(str_edit_summary, minor=False)
            # For a dry run where the bot outputs to local files, comment out the above line and uncomment the lines below
            # DYKUpdateBotUtils.log('Edit summary: ' + str_edit_summary)
            # filename = ''.join(character for character in page_to_edit.title() if character not in '\/:*?<>|"') + '.txt'
            # with open(str(pathlib.Path(__file__).parent / 'TestResources' / filename), 'w', encoding='utf-8') as file_write:
            #     file_write.write(page_to_edit.text)
        except pywikibot.exceptions.LockedPage:  # I'm not sure it's possible to hit this with an adminbot...
            DYKUpdateBotUtils.log('Special case: ' + page_to_edit.title() + ' is protected, skipping...')

    def _log_error(self, rgstr_errors, str_error) -> None:
        rgstr_errors.append(str_error)
        DYKUpdateBotUtils.log('Error: ' + str_error)

    def _log_warning(self, rgstr_warnings, str_warning) -> None:
        rgstr_warnings.append(str_warning)
        DYKUpdateBotUtils.log('Warning: ' + str_warning)

# Set of methods broken out for easier unit testability
# Unless otherwise noted, these methods don't make network calls
# Do Not edit the wiki from within these methods, otherwise unit tests will edit the wiki!


class DYKUpdateBotUtils():
    @staticmethod
    def wikilink_to_queue(num_queue, capitalize) -> str:
        return '[[{0}{1}|{2}ueue {1}]]'.format(DYKUpdateBot.QUEUE_ROOT_LOC,
                                               num_queue,
                                               'Q' if capitalize else 'q')

    # Returns a tuple:
    # * First value is True if dykbotdo was found, False if not
    # * Second value is the admin signature in dykbotdo, or None if not found
    @staticmethod
    def parse_dykbotdo(str_queue) -> (bool, str):
        templates_in_queue = mwparserfromhell.parse(str_queue, skip_style_tags=True).filter_templates()
        for template in templates_in_queue:
            if template.name.matches('DYKbotdo'):
                return True, str(template.get(1)) if template.has(1) else None
        return False, None

    # Returns:
    # * Hooks if <!--Hooks--> and <!--HooksEnd--> tags are in order
    # * None if not
    @staticmethod
    def extract_hooks(str_queue_or_tdyk) -> str:
        idx_hooks_tag = str_queue_or_tdyk.find('<!--Hooks-->')
        idx_hooksend_tag = str_queue_or_tdyk.find('<!--HooksEnd-->', max(idx_hooks_tag, 0))
        if min(idx_hooks_tag, idx_hooksend_tag) == -1:
            return None
        return str_queue_or_tdyk[idx_hooks_tag + 12:idx_hooksend_tag].strip()

    # Returns:
    # * pywikibot.FilePage of the file in the DYK set if detected
    # * None if not
    @staticmethod
    def find_file(str_hooks) -> pywikibot.FilePage:
        templates_in_hooks = mwparserfromhell.parse(str_hooks, skip_style_tags=True).filter_templates()
        for template in templates_in_hooks:
            if template.name.matches('Main page image/DYK'):
                # Note it's fine whether the parameter is File:XYZ.jpg, Image:XYZ.jpg, or XYZ.jpg
                # all three formats will create the same FilePage object returning File:XYZ.jpg from title()
                str_file = str(template.get('image').value)
                if '{{!}}' in str_file:
                    DYKUpdateBotUtils.log('Special case: Stripping everything after pipe from filename "{0}"'.format(str_file))
                    str_file = str_file[:str_file.find('{{!}}')]
                return pywikibot.FilePage(pywikibot.Site(), str_file)
        return None

    # This method makes network calls to the Wikipedia API (read-only)
    # Returns:
    # * None if protection looks good
    # * A string describing the issue if not
    # Cases to validate if changing this function (leverage the unit tests!):
    # * File that doesn't exist
    #     * File:Nlksjdkfjskdljflkdsjfame.jpg
    # * Fully not-protected file
    #     * en:File:Emmelie de Forest Hunter & Prey.png and commons:File:Novo Selo TE 01.JPG
    # * Fully not-protected file on Commons with an enwiki description page
    #     * en:File:MET Breuer (48377070386).jpg
    # * Semi-protected file
    #     * en:File:Amy Barlow.jpg and commons:File:Flag of Palestine.svg
    # * Fully protected file indefinitely protected
    #     * en:File:George Floyd neck knelt on by police officer.png and commons:File:Name.jpg
    # * Fully protected file via cascading protection
    #     * en:File:WPVG icon 2016.svg and commons:File:Wikitech-2020-logo.svg
    # * Fully protected file with protection expiring before set leaves the Main Page
    #     * Use the API to find examples:
    #     * https://commons.wikimedia.org/w/api.php?action=query&list=allpages&apnamespace=6&apprtype=edit&apprexpiry=definite&apprlevel=sysop&aplimit=500
    # * Fully protected file with protection expiring after set leaves the Main Page
    #     * see URL above
    @staticmethod
    def check_if_protected(filepage, time_set_leaving) -> str:
        str_file_for_output = filepage.title(as_link=True, textlink=True)
        filepage_commons = pywikibot.FilePage(pywikibot.Site().image_repository(), filepage.title())
        if not (filepage.exists() or filepage_commons.exists()):
            return str_file_for_output + ' does not exist'
        on_commons = filepage.file_is_shared()
        if on_commons:
            filepage = filepage_commons
        edit_protections = filepage.protection().get('edit')
        if edit_protections is None:
            if on_commons:
                return str_file_for_output + ' is not protected; either 1) Upload the file to en.wiki, or 2) protect the file at Commons'
            else:  # on enwiki
                return str_file_for_output + ' is not protected'
        if edit_protections[0] != 'sysop':
            return str_file_for_output + ' is not fully protected'
        str_prot_end = edit_protections[1]
        if str_prot_end == 'infinity':
            return None
        time_prot_end = pywikibot.Timestamp.fromISOformat(str_prot_end).replace(tzinfo=timezone.utc)
        if time_prot_end < time_set_leaving:
            return 'The protection for ' + str_file_for_output + ' will expire before or while it\'s on the Main Page'
        return None  # protection expires after set leaves the Main Page

    @staticmethod
    def calculate_drift_core(time_update, timedelta_between_updates, minutes_max_advance, minutes_max_delay) -> int:
        seconds_per_day = 60 * 60 * 24
        seconds_least_difference_from_0000 = 60 * 60 * 24
        set_seconds_differences = set()
        time_iter = time_update
        while True:
            current_difference_from_0000 = int(time_iter.timestamp()) % seconds_per_day
            if current_difference_from_0000 > (seconds_per_day / 2):
                current_difference_from_0000 = -(seconds_per_day - current_difference_from_0000)
            if abs(seconds_least_difference_from_0000) > abs(current_difference_from_0000):
                seconds_least_difference_from_0000 = current_difference_from_0000
            if seconds_least_difference_from_0000 == 0:
                break
            if (current_difference_from_0000 in set_seconds_differences) or (len(set_seconds_differences) >= 24):
                break
            set_seconds_differences.add(current_difference_from_0000)
            time_iter = time_iter + timedelta_between_updates

        if seconds_least_difference_from_0000 > 0:
            return -min(minutes_max_advance, seconds_least_difference_from_0000 // 60)
        elif seconds_least_difference_from_0000 < 0:
            return min(minutes_max_delay, -seconds_least_difference_from_0000 // 60)
        else:
            return 0

    # This method makes network calls to the Wikipedia API (read-only)
    @staticmethod
    def check_if_salted(page) -> bool:
        create_protections = page.protection().get('create')
        return create_protections and (create_protections[0] == 'sysop')

    @staticmethod
    def archive(str_archive, time_update, hooks_outgoing) -> str:
        str_section_heading = '==={d.day} {d:%B} {d.year}==='.format(d=time_update)
        str_set_heading = '*\'\'\'\'\'{d:%H}:{d:%M}, {d.day} {d:%B} {d.year} (UTC)\'\'\'\'\''.format(d=time_update)
        idx_this_date = str_archive.find(str_section_heading)  # check if there is a section heading already for today
        if idx_this_date == -1:  # if there isn't, create a new section heading
            idx_insert_section = str_archive.find('\n', str_archive.find('<!--BOTPOINTER-->')) + 1
            str_archive = DYKUpdateBotUtils._insert_str(str_archive, idx_insert_section, str_section_heading + '\n')
            idx_this_date = idx_insert_section
        idx_this_date = str_archive.find('\n', idx_this_date) + 1
        return DYKUpdateBotUtils._insert_str(str_archive, idx_this_date, str_set_heading + '\n' + hooks_outgoing + '\n\n')

    @staticmethod
    def parse_credits(str_queue) -> []:
        templates_in_queue = mwparserfromhell.parse(str_queue, skip_style_tags=True).filter_templates()
        rgcredits = []
        for template in templates_in_queue:
            if template.name.matches('DYKmake') or template.name.matches('DYKnom'):
                if not (template.has(1) and template.has(2)):
                    continue
                credit = DYKCredit()
                credit.str_article = html.unescape(str(template.get(1).value))
                credit.str_user = html.unescape(str(template.get(2).value))
                credit.is_dykmake = template.name.matches('DYKmake')
                if template.has('subpage'):
                    str_subpage = html.unescape(str(template.get('subpage').value))
                    if str_subpage != '':
                        credit.str_nompage = 'Template:Did you know nominations/' + str_subpage

                # sanitize
                if (credit.str_article == 'Example' or credit.str_article == '' or
                        credit.str_user == '' or credit.str_user == 'Editor' or credit.str_user == 'Nominator'):
                    continue
                credit.str_article = credit.str_article.replace('[[', '').replace(']]', '')
                rgcredits.append(credit)
        return rgcredits

    # This method makes network calls to the Wikipedia API (read-only)
    # As "output", sets str_article on valid credits & deletes credits for nonexistent articles
    @staticmethod
    def validate_credits_articles(rgcredits, fn_log_warning) -> None:
        # Articles:
        # * expand any templates in the article name
        # * delete credits for nonexistent articles
        # * follow redirects
        # * normalize titles
        dict_processed = {}
        for idx_credit in reversed(range(len(rgcredits))):
            str_article_orig = rgcredits[idx_credit].str_article
            if str_article_orig in dict_processed:
                rgcredits[idx_credit].str_article = dict_processed[str_article_orig].str_article
                continue

            str_article_processed = str_article_orig
            if '}}' in str_article_processed:
                str_article_processed = pywikibot.Site().expand_text(text=str_article_processed)
                DYKUpdateBotUtils.log('Special case: Credit article title contains template "{0}"->"{1}"'.format(str_article_orig, str_article_processed))
            page_article = pywikibot.Page(pywikibot.Site(), str_article_processed)
            if page_article.isRedirectPage():
                page_article = page_article.getRedirectTarget()
            if not page_article.exists():
                fn_log_warning('Article [[{0}]] does not exist'.format(str_article_orig))
                del rgcredits[idx_credit]
                continue
            str_article_processed = page_article.title()
            rgcredits[idx_credit].str_article = str_article_processed
            dict_processed[str_article_orig] = rgcredits[idx_credit]

    # This method makes network calls to the Wikipedia API (read-only)
    # As "output", sets str_user_talk on valid credits
    @staticmethod
    def validate_credits_users(rgcredits, fn_log_warning) -> None:
        # Users:
        # * expand any templates in the username
        # * check for nonexistent users
        # * follow redirects
        # * normalize titles
        dict_processed = {}
        for credit in rgcredits:
            str_user_orig = credit.str_user
            if str_user_orig in dict_processed:
                credit.str_user_talk = dict_processed[str_user_orig].str_user_talk
                continue

            str_user_processed = str_user_orig
            if '}}' in str_user_processed:
                str_user_processed = pywikibot.Site().expand_text(text=str_user_processed)
                DYKUpdateBotUtils.log('Special case: Credit username contains template "{0}"->"{1}"'.format(str_user_orig, str_user_processed))
            user = pywikibot.User(pywikibot.Site(), str_user_processed)
            is_valid_user = user.isRegistered() or (user.isAnonymous() and user.last_edit)
            if not is_valid_user:
                # was the user recently renamed?
                # example API call: https://en.wikipedia.org/w/api.php?action=query&list=logevents&letype=renameuser&letitle=User:Carrot%20official&lelimit=1
                for entry in pywikibot.Site().logevents('renameuser', page=user.title(), total=1):
                    if entry['params']['olduser'] == user.username:
                        user = pywikibot.User(pywikibot.Site(), entry['params']['newuser'])
                        DYKUpdateBotUtils.log('Special case: User listed in credit was renamed "{0}"->"{1}"'.format(str_user_orig, user.username))
                is_valid_user = user.isRegistered() or (user.isAnonymous() and user.last_edit)

            if is_valid_user:
                page_usertalk = user.getUserTalkPage()
                if page_usertalk.isRedirectPage():
                    DYKUpdateBotUtils.log('Special case: User talk is a redirect "{0}"'.format(page_usertalk.title()))
                    page_usertalk = page_usertalk.getRedirectTarget()
                if page_usertalk.isTalkPage():
                    # no funny business - the redirect above shouldn't make the bot, eg, tag the Main Page with a DYK credit
                    credit.str_user_talk = page_usertalk.title()
            else:
                fn_log_warning('The username \'{0}\' is invalid'.format(str_user_orig))
            dict_processed[str_user_orig] = credit

    # This method makes network calls to the Wikipedia API (read-only) if:
    # * There's a template within the hooks
    # * There's no string match between the article listed in the credit and the hooks - redirect search
    # As "output", sets str_hook and (if first hook) str_file on credits
    @staticmethod
    def populate_hooks_and_file(rgcredits, str_hooks, str_file) -> None:
        # remove stuff at the top that isn't hooks (eg image)
        str_hooks = str_hooks[str_hooks.rfind('\n', 0, str_hooks.find('...')):].strip()

        # expand templates
        str_hooks_normalized = str_hooks
        if '}}' in str_hooks_normalized:
            str_hooks_normalized = pywikibot.Site().expand_text(text=str_hooks_normalized)

        # unescape HTML and replace non-breaking spaces with normal spaces
        str_hooks_normalized = html.unescape(str_hooks_normalized).replace(html.unescape('&nbsp;'), ' ')

        rghooks_orig = str_hooks.split('\n')
        rghooks_normalized = str_hooks_normalized.lower().split('\n')

        # remove any lines without '...' and trim any leading characters, like *
        for idx_hook in reversed(range(len(rghooks_orig))):
            str_hook = rghooks_orig[idx_hook]
            idx_that = str_hook.find('...')
            if idx_that == -1:
                del rghooks_orig[idx_hook]
                del rghooks_normalized[idx_hook]
            else:
                rghooks_orig[idx_hook] = str_hook[idx_that:]

        # search for the hook for each article
        dict_processed = {}
        for credit in rgcredits:
            if credit.str_article in dict_processed:
                credit.str_hook = dict_processed[credit.str_article].str_hook
                credit.str_file = dict_processed[credit.str_article].str_file
                continue

            idx_found_hook = DYKUpdateBotUtils._find_hook(credit.str_article, rghooks_normalized)
            if idx_found_hook == -1:  # maybe the hook links to a page that redirects to str_article?
                page_article = pywikibot.Page(pywikibot.Site(), credit.str_article)
                for page_redirect in page_article.getReferences(filter_redirects=True, namespaces=pywikibot.site.Namespace.MAIN):
                    idx_found_hook = DYKUpdateBotUtils._find_hook(page_redirect.title(), rghooks_normalized)
                    if idx_found_hook != -1:
                        DYKUpdateBotUtils.log('Special case: Hook matches redirect to article "{0}"'.format(credit.str_article))
                        break  # got a hit! no need to keep iterating through redirects

            if idx_found_hook >= 0:
                credit.str_hook = rghooks_orig[idx_found_hook]
                if idx_found_hook == 0:
                    credit.str_file = str_file

            dict_processed[credit.str_article] = credit

    @staticmethod
    def _find_hook(str_article, rghooks_normalized) -> int:
        str_article_lower = str_article.lower()
        for idx_hook, str_hook_normalized in enumerate(rghooks_normalized):
            if str_article_lower in str_hook_normalized:
                return idx_hook
        return -1

    @staticmethod
    def tag_article_history(str_talk, credit, time_update) -> (str, str):
        template_ah = None
        templates_on_talk = mwparserfromhell.parse(str_talk, skip_style_tags=True).filter_templates()
        for template in templates_on_talk:
            tname = template.name
            if (tname.matches('Article history') or tname.matches('Articlehistory') or
                    tname.matches('Article History') or tname.matches('ArticleHistory') or
                    tname.matches('Article milestones') or tname.matches('Articlemilestones')):
                template_ah = template
                break

        str_edit_summary = None
        if template_ah:
            str_edit_summary = ('Article appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}'
                                ', adding to {{{{[[Template:Article history|Article history]]}}}}'.format(d=time_update))
            str_article_history_orig = str(template_ah)
            # According to documentation at Template:Article_history, DYK params go between |currentstatus  and |topic
            param_topic = template_ah.get('topic') if template_ah.has('topic') else None
            template_ah.add('dykdate', '{d.day} {d:%B} {d.year}'.format(d=time_update), before=param_topic)
            if credit.str_hook:
                template_ah.add('dykentry', credit.str_hook, before=param_topic)
            if credit.str_nompage:
                template_ah.add('dyknom', credit.str_nompage, before=param_topic)
            str_talk = str_talk.replace(str_article_history_orig, str(template_ah))
        return str_talk, str_edit_summary

    # Returns a tuple:
    # * First value is the dyktalk tag
    # * Second value is the edit summary
    @staticmethod
    def build_dyktalk_tag(credit, time_update) -> (str, str):
        str_tag = '\n{{{{DYK talk|{d.day} {d:%B}|{d.year}{str_image_param}{str_hook_param}{str_nompage_param}}}}}'.format(
                  d=time_update,
                  str_image_param=('|image=' + credit.str_file) if credit.str_file else '',
                  str_hook_param=('|entry=' + credit.str_hook) if credit.str_hook else '',
                  str_nompage_param=('|nompage=' + credit.str_nompage) if credit.str_nompage else '')
        str_edit_summary = ('Article appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}'
                            ', adding {{{{[[Template:DYK talk|DYK talk]]}}}}'.format(d=time_update))
        return str_tag, str_edit_summary

    @staticmethod
    def add_template_to_talk(str_talk, str_tag) -> str:
        idx_first_section = str_talk.find('==')
        if idx_first_section == -1:
            idx_first_section = len(str_talk)
        str_header = str_talk[:idx_first_section]
        idx_last_template = DYKUpdateBotUtils._last_template_index(str_header)
        if (idx_last_template < len(str_talk)) and (str_talk[idx_last_template] != '\n'):
            str_tag = str_tag + '\n'
        return DYKUpdateBotUtils._insert_str(str_talk, idx_last_template, str_tag).strip()

    @staticmethod
    def _last_template_index(str_header) -> int:
        # To a human reader, GA / DYK etc discussions aren't templates, they're part of the content
        # so detect and remove them from what we consider the header
        # GA discussion transclusion example from Talk:Icos: {{Talk:Icos/GA1}}
        # DYK discussion transclusion example from Special:Diff/873606519: {{Did you know nominations/Bishop John Carroll (statue)}}
        match = search('\{\{\s*([Tt]alk:|[Tt]emplate talk:|([Tt]emplate:\s*)?[Dd]id you know nominations/)', str_header)
        if match:
            str_header = str_header[:match.start()]
        idx_last_template = str_header.rfind('}}')
        if idx_last_template == -1:
            idx_last_template = 0
        else:
            idx_last_template += 2
        return idx_last_template

    # Returns username if one was found, None if not
    @staticmethod
    def find_user_link(str_dykbotdo_signature) -> str:
        links_in_sig = mwparserfromhell.parse(str_dykbotdo_signature, skip_style_tags=True).filter_wikilinks()
        for link in links_in_sig:
            str_title = str(link.title)
            idx_user_or_usertalk = max(str_title.find('User:'), str_title.find('User talk:'))
            if idx_user_or_usertalk != -1:
                str_user = str_title[str_title.find(':', idx_user_or_usertalk) + 1:]
                idx_trailing = max(str_user.find('#'), str_user.find('/'))
                if idx_trailing != -1:
                    str_user = str_user[:idx_trailing]
                return str_user
        return None

    # Returns a tuple:
    # * First value is the message on the talk page (section + credit + signature)
    # * Second value is the edit summary
    @staticmethod
    def build_user_talk_credit(credit, str_dykbotdo_signature, str_promoting_admin) -> (str, str):
        str_message = ('==DYK for {str_article}==\n'
                       '{{{{subst:Template:{str_template} |article={str_article} {str_hook_param} '
                       '{str_nompage_param} |optional= }}}} {str_sig}'
                       .format(str_article=credit.str_article,
                               str_template='DYKmake/DYKmakecredit' if credit.is_dykmake else 'DYKnom/DYKnomcredit',
                               str_hook_param=('|hook=' + credit.str_hook) if credit.str_hook else '',
                               str_nompage_param=('|nompage=' + credit.str_nompage) if credit.str_nompage else '',
                               str_sig=(str_dykbotdo_signature + ' ~~~~~') if str_dykbotdo_signature else '~~~~'))
        str_edit_summary = 'Giving DYK credit for [[{str_article}]]'.format(str_article=credit.str_article)
        if str_promoting_admin:
            str_edit_summary += ' on behalf of [[User:{str_username}|{str_username}]]'.format(str_username=str_promoting_admin)
        return str_message, str_edit_summary

    @staticmethod
    def _insert_str(str_target, idx, str_insert) -> str:
        return str_target[:idx] + str_insert + str_target[idx:]

    @staticmethod
    def log(str_to_log) -> None:
        print(str_to_log, flush=True)


class ValidationResults():
    def __init__(self) -> None:
        self.rgstr_errors = []
        self.rgstr_warnings = []
        self.page_TDYK = None
        self.page_queue = None
        self.num_queue = 0
        self.file_incoming = None
        self.hooks_incoming = None
        self.hooks_outgoing = None
        self.str_dykbotdo_signature = None
        self.timedelta_between_updates = None


class DYKCredit():
    def __init__(self) -> None:
        self.str_article = None
        self.str_user = None
        self.str_user_talk = None
        self.str_nompage = None
        self.is_dykmake = True
        self.str_hook = None
        self.str_file = None

    def __str__(self):
        return 'DYKCredit! article:{0}, user:{1}, nompage:{2}, is_dykmake:{3}, hook:{4}, file:{5}'.format(
            self.str_article, self.str_user, self.str_nompage, self.is_dykmake, self.str_hook, self.str_file)


def main() -> None:
    bot = DYKUpdateBot()
    bot.run()

if __name__ == '__main__':
    main()