User:DYKUpdateBot/Code: Difference between revisions
Content deleted Content added
Shubinator (talk | contribs) bugfix; {{Article history}} support for DYK nomination pages shouldn't override the hook |
Shubinator (talk | contribs) |
||
Line 1: | Line 1: | ||
Below is the code for [[User:DYKUpdateBot|DYKUpdateBot]]. The bot runs on [[WP:Pywikibot]]. |
|||
Below is the code for [[User:DYKUpdateBot|DYKUpdateBot]]. Many thanks to the developers of the [http://jwbf.sourceforge.net/ JavaWikiBotFramework] (JWBF), which made this possible. The bot runs on [http://jwbf.svn.sourceforge.net/viewvc/jwbf/trunk/jwbf/src/net/sourceforge/jwbf/?pathrev=178 revision 178] of the JWBF and [http://commons.apache.org/lang/api-2.5/index.html version 2.5] of Apache's Commons Lang library. |
|||
<source lang=" |
<source lang="python"> |
||
import |
import os |
||
import |
import pathlib |
||
import |
import pywikibot |
||
import |
import mwparserfromhell |
||
import |
import html |
||
from datetime import datetime, timedelta, timezone |
|||
import java.util.Arrays; |
|||
from functools import partial |
|||
import java.util.Calendar; |
|||
import |
from re import search |
||
import java.util.GregorianCalendar; |
|||
import java.util.HashSet; |
|||
import java.util.LinkedList; |
|||
import java.util.List; |
|||
import java.util.Scanner; |
|||
import org.apache.commons.lang.StringEscapeUtils; |
|||
import org.jdom.Document; |
|||
import org.jdom.Element; |
|||
class DYKUpdateBot(): |
|||
import net.sourceforge.jwbf.actions.mw.MediaWiki; |
|||
TDYK_LOC = 'Template:Did you know' |
|||
import net.sourceforge.jwbf.bots.MediaWikiBot; |
|||
NEXT_UPDATE_QUEUE_LOC = 'Template:Did you know/Queue/Next' |
|||
import net.sourceforge.jwbf.contentRep.mw.SimpleArticle; |
|||
LAST_UPDATE_TIME_LOC = 'Template:Did you know/Next update/Time' |
|||
TIME_BETWEEN_UPDATES_LOC = 'User:DYKUpdateBot/Time Between Updates' |
|||
QUEUE_ROOT_LOC = 'Template:Did you know/Queue/' |
|||
WTDYK_LOC = 'Wikipedia talk:Did you know' |
|||
ARCHIVE_LOC = 'Wikipedia:Recent additions' |
|||
ERROR_OUTPUT_LOC = 'User:DYKUpdateBot/Errors' |
|||
DRIFT_LOC = 'User:DYKUpdateBot/ResyncDrift' |
|||
SECONDS_BETWEEN_STATUS_CHECKS = 600 |
|||
NUM_QUEUES = 7 |
|||
def run(self) -> None: |
|||
public class DYKUpdateBot extends EnWikiBot { |
|||
DYKUpdateBotUtils.log('PID: {0}'.format(os.getpid())) |
|||
pywikibot.Site().login() |
|||
private static final String TDYKLoc = "Template:Did you know"; |
|||
while self._is_on() and pywikibot.Site().logged_in(): |
|||
private static final String QueueRootLoc = "Template:Did you know/Queue/"; |
|||
DYKUpdateBotUtils.log(datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S %Z')) |
|||
private static final String TimeLoc = "Template:Did you know/Next update/Time"; |
|||
private static final String NextUpdateQueueLoc = "Template:Did you know/Queue/Next"; |
|||
private static final String ClearTemplate = "{{User:DYKUpdateBot/REMOVE THIS LINE}}"; |
|||
private static final String TimeBetweenUpdatesLoc = "User:DYKUpdateBot/Time Between Updates"; |
|||
private static final String ArchiveLoc = "Wikipedia:Recent additions"; |
|||
private static final String WTDYKLoc = "Wikipedia talk:Did you know"; |
|||
private static final String ErrorOutputLoc = "User:DYKUpdateBot/Errors"; |
|||
private static final String DriftLoc = "User:DYKUpdateBot/ResyncDrift"; |
|||
private static final String BaseCommonsAPIURL = "https://commons.wikimedia.org/w/"; |
|||
private static final int TimeBetweenEdits = 5; // in seconds |
|||
private static final int TimeBetweenStatusChecks = 600; // in seconds |
|||
private static final int NumQueues = 7; |
|||
private static final int NumExceptionsBeforeAttemptedReset = 55; |
|||
private static final String[] shipTemplates = {"ship", "sclass", "Jsub", |
|||
"barge", "GTS", "HSC", "MS", "MV", "PS", "SS", "tugboat", "HMAS", |
|||
"HMCS", "HMNZS", "HMS", "RMS", "USAT", "USCGC", "USNS", "USRC", "USS", |
|||
"SMS", "SMU", "GS", "HNLMS", "HNoMS" }; |
|||
private StringBuilder errorLog; |
|||
private int nextQueue; |
|||
public DYKUpdateBot(int timeBetweenEdits, int numExceptionsBeforeAttemptedReset, |
|||
String purgeLoc, String userName, String password) { |
|||
super(timeBetweenEdits, numExceptionsBeforeAttemptedReset, purgeLoc, userName, |
|||
password); |
|||
} |
|||
/** |
|||
* Loops every TimeBetweenStatusChecks seconds until it's time to update DYK |
|||
*/ |
|||
public void run() { |
|||
boolean dykResetExceptionThrown = false; |
|||
do { |
|||
dykResetExceptionThrown = false; |
|||
try { |
|||
login(); |
|||
nextQueue = findNextQueueNumber(); |
|||
errorLog = new StringBuilder(); |
|||
lastDelId = getLastDelId(); |
|||
while (isOn()) { |
|||
checkifLoggedIn(); |
|||
log(new Date().toString()); // output the date and time |
|||
// figure out when next update is |
|||
SimpleArticle dykTimePage = readContent(TimeLoc); |
|||
String dykTime = dykTimePage.getText().trim(); |
|||
if (dykTime.lastIndexOf("\n") != -1) { // if there are multiple lines, get the last line |
|||
dykTime = dykTime.substring(dykTime.lastIndexOf("\n")).trim(); |
|||
} |
|||
GregorianCalendar nextUpdateTime = new GregorianCalendar(BotLocale); |
|||
// first set it to the last update time |
|||
try { |
|||
nextUpdateTime.setTime(APITimestampFormat.parse(dykTime)); |
|||
} catch (ParseException e) { |
|||
logError("Time at [[" + TimeLoc + "]] is not formatted correctly"); |
|||
postErrors(); |
|||
sleep(TimeBetweenStatusChecks * 1000); |
|||
continue; |
|||
} |
|||
// then get the number of seconds between updates, and add it |
|||
int timeBetweenUpdates; |
|||
try { |
|||
timeBetweenUpdates = Integer.parseInt( |
|||
readContent(TimeBetweenUpdatesLoc).getText().trim()); |
|||
} catch (Exception e) { |
|||
logError("Time between updates at [[" + TimeBetweenUpdatesLoc + |
|||
"]] is not formatted correctly"); |
|||
postErrors(); |
|||
sleep(TimeBetweenStatusChecks * 1000); |
|||
continue; |
|||
} |
|||
// add the correct number of seconds to show the time for the next update |
|||
nextUpdateTime.add(Calendar.SECOND, timeBetweenUpdates); |
|||
// figure out what the current time is |
|||
GregorianCalendar currentTime = new GregorianCalendar(BotLocale); |
|||
// update DYK if it's time |
|||
long secondsUntilUpdate = (nextUpdateTime.getTimeInMillis() - |
|||
currentTime.getTimeInMillis())/1000; |
|||
log("Seconds left until next update: " + secondsUntilUpdate); |
|||
GregorianCalendar nextNextUpdateTime = new GregorianCalendar(BotLocale); |
|||
// calendar for checking if image is protected the whole time it's on the Main Page |
|||
nextNextUpdateTime.setTimeInMillis(nextUpdateTime.getTimeInMillis()); |
|||
nextNextUpdateTime.add(Calendar.SECOND, timeBetweenUpdates); |
|||
if (secondsUntilUpdate <= 0) { |
|||
updateDYK(dykTimePage, timeBetweenUpdates, nextNextUpdateTime); |
|||
} else if (secondsUntilUpdate < 7200) { |
|||
checkFormatting(secondsUntilUpdate, nextNextUpdateTime); |
|||
} |
|||
postErrors(); |
|||
if (secondsUntilUpdate < TimeBetweenStatusChecks && secondsUntilUpdate > 0) { |
|||
currentTime.setTime(new Date()); |
|||
sleep(Math.abs(nextUpdateTime.getTimeInMillis() - currentTime.getTimeInMillis())); |
|||
} else { |
|||
sleep(TimeBetweenStatusChecks * 1000); |
|||
} |
|||
} |
|||
} catch (DYKResetException e) { |
|||
log("Reset exception caught, resetting..."); |
|||
dykResetExceptionThrown = true; |
|||
} catch (Exception e) { |
|||
e.printStackTrace(System.out); |
|||
log("Exception occurred; exiting at " + new Date().toString()); |
|||
} |
|||
} while (dykResetExceptionThrown); |
|||
} |
|||
/** |
|||
* Checks if all pages are formatted correctly for the next update |
|||
* If something's wrong, the bot will post to WT:DYK 2 hours before the update |
|||
* Most of this code is copied from updateDYK() |
|||
* @param number of seconds until the next update |
|||
* @param time when the update after next will go live; |
|||
* aka when the set for the next update will be taken off |
|||
*/ |
|||
private void checkFormatting(long secondsUntilUpdate, GregorianCalendar nextNextUpdateTime) { |
|||
// figure out which queue is next |
|||
nextQueue = findNextQueueNumber(); |
|||
if (nextQueue == 0) return; // couldn't parse |
|||
String wikilinkToQueue = "[[" + QueueRootLoc + nextQueue + "|Queue " + nextQueue + "]]"; |
|||
// get the wikitext of the queue |
|||
String queueText = removeUnnecessarySpaces(readContent(QueueRootLoc + nextQueue).getText()); |
|||
// make sure the queue has {{DYKbotdo}} |
|||
if (!queueText.contains("{{DYKbotdo")) { |
|||
logError(wikilinkToQueue + " is not tagged with {{tl|DYKbotdo}}"); |
|||
if (secondsUntilUpdate < 7200) { |
|||
// post to WT:DYK if less than two hours left |
|||
try { |
|||
// get the text of the message and update it |
|||
Scanner in = new Scanner(new File("almostLate.txt")); |
|||
StringBuilder errorBuilder = new StringBuilder(); |
|||
while (in.hasNext()) { |
|||
errorBuilder.append(in.nextLine()).append("\n"); |
|||
} |
|||
in.close(); |
|||
String errorMessage = errorBuilder.toString().trim(); |
|||
while (errorMessage.contains("queueNum")) { |
|||
errorMessage = errorMessage.replace("queueNum", "" + nextQueue); |
|||
} |
|||
if (errorMessage.contains("hoursLeft")) { |
|||
errorMessage = errorMessage.replace("hoursLeft", "two hours"); |
|||
} |
|||
String setIdentifier = APITimestampFormat.format(nextNextUpdateTime.getTime()); |
|||
if (errorMessage.contains("uniqueSetIdentifier")) { |
|||
// if Template:Did you know/Next update/Time changes, the |
|||
// set identifier will also change |
|||
errorMessage = errorMessage.replace("uniqueSetIdentifier", setIdentifier); |
|||
} |
|||
do { |
|||
try { |
|||
SimpleArticle WTDYK = readContent(WTDYKLoc); |
|||
// edit WT:DYK if an alert isn't already posted for this set |
|||
if (!WTDYK.getText().contains(setIdentifier)) { |
|||
WTDYK.addText("\n\n" + errorMessage); |
|||
WTDYK.setEditSummary("DYK is almost late"); |
|||
writeContent(WTDYK); |
|||
} |
|||
return; |
|||
} catch (EditConflictException e) { |
|||
log("Edit conflict caught"); |
|||
// will try again because of while(true) |
|||
} |
|||
} while (true); |
|||
} catch (DYKResetException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
logError("Error occurred while posting 'dyk is late' message"); |
|||
} |
|||
} |
|||
return; // don't continue checking for formatting errors, as the queue may be empty |
|||
} |
|||
results = ValidationResults() |
|||
// make sure the queue has <!--Hooks--> and <!--HooksEnd--> |
|||
seconds_until_next_update = DYKUpdateBot.SECONDS_BETWEEN_STATUS_CHECKS # placeholder |
|||
int indexOfHooksinQueue = queueText.indexOf("<!--Hooks-->"); |
|||
time_next_update, time_next_update_leaving = self._calculate_next_update_time(results.rgstr_errors) |
|||
int indexOfHooksEndinQueue = queueText.indexOf("<!--HooksEnd-->", indexOfHooksinQueue); |
|||
if not results.rgstr_errors: |
|||
if (indexOfHooksinQueue == -1 || indexOfHooksEndinQueue == -1) { |
|||
time_now = pywikibot.Site().server_time().replace(tzinfo=timezone.utc) |
|||
logError(wikilinkToQueue + " is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>"); |
|||
seconds_until_next_update = int((time_next_update - time_now).total_seconds()) |
|||
return; // can't find hooks, bail out |
|||
DYKUpdateBotUtils.log('Seconds left until next update: {0}'.format(seconds_until_next_update)) |
|||
} |
|||
String newHooks = queueText.substring(indexOfHooksinQueue + 12, indexOfHooksEndinQueue); |
|||
// make sure image doesn't have |right and is set to 100x100px |
|||
String newHooksLowerCase = newHooks.toLowerCase(); |
|||
if (newHooksLowerCase.contains("[[file:") || |
|||
newHooksLowerCase.contains("[[image:")) { // image file |
|||
int startIndex = Math.max(newHooksLowerCase.lastIndexOf("[[file:") + 7, |
|||
newHooksLowerCase.lastIndexOf("[[image:") + 8); |
|||
int endIndex = startIndex; |
|||
for (int i=1; newHooks.indexOf("]]", endIndex + 2) != -1; i++) { |
|||
endIndex = newHooks.indexOf("]]", endIndex + 2); |
|||
if (newHooks.substring(startIndex, endIndex).split("\\[\\[").length == i) { |
|||
break; |
|||
} |
|||
} |
|||
String imageWikitext = newHooks.substring(startIndex, endIndex); |
|||
if (imageWikitext.contains("|right")) { |
|||
logError("Warning: File formatting contains |right in " + wikilinkToQueue); |
|||
} |
|||
if (!imageWikitext.contains("100x100px")) { |
|||
logError("Warning: File size is not set to 100x100px in " + wikilinkToQueue); |
|||
} |
|||
} |
|||
// make sure all curly braces are matched |
|||
if (queueText.split("\\{\\{").length != queueText.split("\\}\\}").length) { |
|||
logError("Unmatched left <nowiki>(\"{{\") and right (\"}}\")</nowiki> curly braces in " + wikilinkToQueue); |
|||
} |
|||
// make sure file is protected |
|||
DYKFile incomingFile = findFile(newHooks); |
|||
if (incomingFile != null) checkIfProtected(incomingFile.getFilename(), nextNextUpdateTime, true); |
|||
// fetch T:DYK |
|||
String dykMainText = readContent(TDYKLoc).getText(); |
|||
// make sure T:DYK has <!--Hooks--> and <!--HooksEnd--> |
|||
int indexOfHooksonTDYK = dykMainText.indexOf("<!--Hooks-->"); |
|||
int indexOfHooksEndonTDYK = dykMainText.indexOf("<!--HooksEnd-->", indexOfHooksonTDYK); |
|||
if (indexOfHooksonTDYK == -1 || indexOfHooksEndonTDYK == -1) { |
|||
logError("[[" + TDYKLoc + "]] is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>"); |
|||
} |
|||
} |
|||
/** |
|||
* Updates DYK |
|||
* @param the page indicating the time of the last update |
|||
* @param time when the update after next will go live; |
|||
* aka when the set for the next update will be taken off |
|||
*/ |
|||
private void updateDYK(SimpleArticle dykTimePage, final int timeBetweenUpdates, |
|||
GregorianCalendar nextNextUpdateTime) { |
|||
// figure out which queue to update from |
|||
nextQueue = findNextQueueNumber(); |
|||
if (nextQueue == 0) return; // couldn't parse |
|||
// get the wikitext of the queue |
|||
SimpleArticle queue = new SimpleArticle(readContent(QueueRootLoc + nextQueue)); |
|||
String queueText = removeUnnecessarySpaces(queue.getText()); |
|||
// make sure the queue has {{DYKbotdo}} |
|||
int dykbotdoIndex = queueText.indexOf("{{DYKbotdo"); |
|||
String wikilinkToQueue = "[[" + QueueRootLoc + nextQueue + "|Queue " + nextQueue + "]]"; |
|||
if (dykbotdoIndex == -1) { |
|||
logError(wikilinkToQueue + " is not tagged with {{tl|DYKbotdo}}"); |
|||
return; |
|||
} |
|||
String dykbotdo = queueText.substring(dykbotdoIndex, queueText.indexOf("\n", dykbotdoIndex)).trim(); |
|||
// make sure the queue has <!--Hooks--> and <!--HooksEnd-->, then find hooks |
|||
int indexOfHooksinQueue = queueText.indexOf("<!--Hooks-->"); |
|||
int indexOfHooksEndinQueue = queueText.indexOf("<!--HooksEnd-->", indexOfHooksinQueue); |
|||
if (indexOfHooksinQueue == -1 || indexOfHooksEndinQueue == -1) { |
|||
logError(wikilinkToQueue + " is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>"); |
|||
return; |
|||
} |
|||
queueText = checkIfEachHookOnNewLine(queueText, indexOfHooksinQueue, indexOfHooksEndinQueue); |
|||
indexOfHooksEndinQueue = queueText.indexOf("<!--HooksEnd-->", indexOfHooksinQueue); // this may have changed from above line |
|||
String newHooks = queueText.substring(indexOfHooksinQueue + 12, indexOfHooksEndinQueue); |
|||
// make sure all curly braces are matched |
|||
if (queueText.split("\\{\\{").length != queueText.split("\\}\\}").length) { |
|||
logError("Unmatched left <nowiki>(\"{{\") and right (\"}}\")</nowiki> curly braces in " + wikilinkToQueue); |
|||
return; |
|||
} |
|||
// make sure the image/file is protected |
|||
DYKFile incomingFile = findFile(newHooks); |
|||
if (incomingFile != null && !checkIfProtected(incomingFile.getFilename(), nextNextUpdateTime, true)) { |
|||
return; |
|||
} |
|||
// fetch T:DYK |
|||
SimpleArticle dykMain = new SimpleArticle(readContent(TDYKLoc)); |
|||
String dykMainText = dykMain.getText(); |
|||
// make sure T:DYK has <!--Hooks--> and <!--HooksEnd-->, then find hooks |
|||
int indexOfHooksonTDYK = dykMainText.indexOf("<!--Hooks-->"); |
|||
int indexOfHooksEndonTDYK = dykMainText.indexOf("<!--HooksEnd-->", indexOfHooksonTDYK); |
|||
if (indexOfHooksonTDYK == -1 || indexOfHooksEndonTDYK == -1) { |
|||
logError("[[" + TDYKLoc + "]] is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>"); |
|||
return; |
|||
} |
|||
// replace old hooks with new hooks |
|||
String oldHooks = dykMainText.substring(indexOfHooksonTDYK + 12, indexOfHooksEndonTDYK).trim(); |
|||
dykMainText = dykMainText.substring(0, indexOfHooksonTDYK + 12) + newHooks + |
|||
dykMainText.substring(indexOfHooksEndonTDYK, dykMainText.length()); |
|||
GregorianCalendar time = new GregorianCalendar(BotLocale); |
|||
// edit T:DYK |
|||
dykMain.setText(dykMainText); |
|||
dykMain.setEditSummary("Bot automatically updating DYK template with hooks copied from " + |
|||
"[[" + QueueRootLoc + nextQueue + "|" + "queue " + nextQueue + "]]"); |
|||
try { |
|||
dykMain.setEditTimestamp(OverrideEditConflicts); |
|||
} catch (ParseException e) {} // impossible |
|||
writeContent(dykMain); |
|||
// purge the main page |
|||
purge("Main Page", true); |
|||
// reset DYK time |
|||
String dykTimePageText = dykTimePage.getText(); |
|||
String dykTime = dykTimePageText.trim(); |
|||
if (dykTime.lastIndexOf("\n") != -1) { // if there are multiple lines, get the last line |
|||
dykTime = dykTime.substring(dykTime.lastIndexOf("\n")).trim(); |
|||
} |
|||
String timeEditSummary = "Resetting the clock"; |
|||
GregorianCalendar writeTime = new GregorianCalendar(BotLocale); |
|||
writeTime.setTimeInMillis(time.getTimeInMillis()); |
|||
writeTime.set(Calendar.SECOND, 0); |
|||
writeTime.set(Calendar.MILLISECOND, 0); |
|||
int drift = calculateDrift(writeTime, timeBetweenUpdates); |
|||
if (drift != 0) { |
|||
writeTime.add(Calendar.MINUTE, drift); |
|||
timeEditSummary += ", with drift"; |
|||
} |
|||
String wikiTimeString = APITimestampFormat.format(new Date(writeTime.getTimeInMillis())); |
|||
dykTimePage.setText(dykTimePageText.substring(0, dykTimePageText.indexOf(dykTime)) + |
|||
wikiTimeString); |
|||
dykTimePage.setEditSummary(timeEditSummary); |
|||
try { |
|||
dykTimePage.setEditTimestamp(OverrideEditConflicts); |
|||
} catch (ParseException e) {} // impossible |
|||
writeContent(dykTimePage); |
|||
// find old file and associated tags |
|||
DYKFile file = findFile(oldHooks); |
|||
checkFileTags(file); |
|||
// archive old hooks |
|||
archive(oldHooks, time, file); |
|||
// remove any commented-out wikitext from queueText |
|||
while (queueText.indexOf("<!--") != -1) { |
|||
int endCommentIndex = queueText.indexOf("-->") + 3; |
|||
if (endCommentIndex == -1) { |
|||
endCommentIndex = queueText.length(); |
|||
} |
|||
queueText = queueText.substring(0, queueText.indexOf("<!--")) + |
|||
queueText.substring(endCommentIndex); |
|||
} |
|||
// parse the credits |
|||
LinkedList<DYKCredit> credits = parseCredits(queueText, newHooks); |
|||
// tag article talk pages |
|||
tagArticles(time, credits); |
|||
// tag user talk pages |
|||
giveUserCredits(credits, dykbotdo); |
|||
// clear queue |
|||
queue.setText(ClearTemplate); |
|||
queue.setEditSummary("Update is done, removing the hooks"); |
|||
try { |
|||
queue.setEditTimestamp(OverrideEditConflicts); |
|||
} catch (ParseException e) {} // impossible |
|||
writeContent(queue); |
|||
// update next queue number |
|||
int updatedNextQueue = (nextQueue % NumQueues) + 1; |
|||
SimpleArticle nextQueuePage = new SimpleArticle("" + updatedNextQueue, NextUpdateQueueLoc); |
|||
nextQueuePage.setEditSummary("Next queue is [[" + QueueRootLoc + updatedNextQueue + "|" + |
|||
"queue " + updatedNextQueue + "]]"); |
|||
try { |
|||
nextQueuePage.setEditTimestamp(OverrideEditConflicts); |
|||
} catch (ParseException e) {} // impossible |
|||
writeContent(nextQueuePage); |
|||
// delete/unprotect and tag outgoing file |
|||
//boolean fileDeleted = deleteFile(file); // Jan 2017 - delete and unprotect don't work, |
|||
//if (!fileDeleted) unprotectFile(file); // likely due to authentication issues |
|||
tagFile(file, time); |
|||
if seconds_until_next_update < 7200: |
|||
nextQueue = updatedNextQueue; |
|||
self.validate_before_update(results, time_next_update_leaving) |
|||
} |
|||
if seconds_until_next_update <= 0: |
|||
results.timedelta_between_updates = time_next_update_leaving - time_next_update |
|||
/** |
|||
self.update_dyk(time_now, results) |
|||
* Reads the next queue number from NextUpdateQueueLoc |
|||
* @return next queue number, or 0 if there was an error parsing |
|||
*/ |
|||
private int findNextQueueNumber() { |
|||
SimpleArticle nextQueuePage = new SimpleArticle(readContent(NextUpdateQueueLoc)); |
|||
int nextQueue = 0; |
|||
try { |
|||
nextQueue = Integer.parseInt(nextQueuePage.getText()); |
|||
} catch (NumberFormatException e) { |
|||
logError("Could not parse [[" + NextUpdateQueueLoc + "]]; check if it's a number 1-" + NumQueues); |
|||
} |
|||
return nextQueue; |
|||
} |
|||
/** |
|||
* |
|||
* @param updateTime the calendar corresponding to the time of the current update |
|||
* @param timeBetweenUpdates in seconds |
|||
* @return drift in minutes; negative is advance, positive is delay |
|||
*/ |
|||
private int calculateDrift(GregorianCalendar updateTime, final int timeBetweenUpdates) { |
|||
final long millisecondsPerMinute = 60 * 1000; |
|||
final long millisecondsPerDay = 24 * 60 * millisecondsPerMinute; //86400000 |
|||
long leastDifferenceFrom0000 = Long.MAX_VALUE; |
|||
HashSet<Long> differences = new HashSet<Long>(); |
|||
GregorianCalendar updateIter = new GregorianCalendar(BotLocale); |
|||
updateIter.setTimeInMillis(updateTime.getTimeInMillis()); |
|||
while (true) { |
|||
long currentDifferenceFrom0000 = updateIter.getTimeInMillis()%millisecondsPerDay; |
|||
if (currentDifferenceFrom0000 > millisecondsPerDay/2) { |
|||
currentDifferenceFrom0000 = -(millisecondsPerDay - currentDifferenceFrom0000); |
|||
} |
|||
if (Math.abs(leastDifferenceFrom0000) > Math.abs(currentDifferenceFrom0000)) { |
|||
leastDifferenceFrom0000 = currentDifferenceFrom0000; |
|||
} |
|||
if (differences.contains(currentDifferenceFrom0000) || differences.size() >= 24) { |
|||
break; |
|||
} |
|||
differences.add(currentDifferenceFrom0000); |
|||
updateIter.add(Calendar.SECOND, timeBetweenUpdates); |
|||
} |
|||
String driftText = readContent(DriftLoc).getText(); |
|||
int maxAdvance = 0; |
|||
int maxDelay = 0; |
|||
try { |
|||
String[] driftLines = driftText.split("\n"); |
|||
maxAdvance = Integer.parseInt(driftLines[0].split(":")[1].trim()); // in minutes |
|||
maxDelay = Integer.parseInt(driftLines[1].split(":")[1].trim());; //in minutes |
|||
} catch (Exception e) { |
|||
log("Couldn't parse drift"); |
|||
return 0; |
|||
} |
|||
if (leastDifferenceFrom0000 > 0) { |
|||
return -Math.min(maxAdvance, (int)(leastDifferenceFrom0000/millisecondsPerMinute)); |
|||
} else if (leastDifferenceFrom0000 < 0) { |
|||
return Math.min(maxDelay, (int)(-leastDifferenceFrom0000/millisecondsPerMinute)); |
|||
} else { |
|||
return 0; |
|||
} |
|||
} |
|||
/** |
|||
* Archives the latest set to ArchiveLoc |
|||
* @param the hooks to be archived |
|||
* @param a Calendar object containing the time that DYK was updated |
|||
*/ |
|||
private void archive(String hooks, Calendar updateTime, DYKFile file) { |
|||
do { |
|||
try { |
|||
if (file != null) { |
|||
// if the file was cropped, point to the original file in the archives |
|||
String originalFile = file.getCroppedFrom(); |
|||
if (originalFile != null) { |
|||
int fileStartIndex = hooks.indexOf(file.getFilename()); |
|||
hooks = hooks.substring(0, fileStartIndex) + originalFile + hooks.substring(fileStartIndex + file.getFilename().length()); |
|||
} |
|||
} |
|||
SimpleArticle archivePage = new SimpleArticle(readContent(ArchiveLoc)); |
|||
String timeHeading = new SimpleDateFormat("'*'''''''''''HH:mm, d MMMM yyyy '(UTC)'''''''''''", BotLocale). |
|||
format(updateTime.getTime()); |
|||
String sectionHeading = new SimpleDateFormat("'==='d MMMM yyyy'==='", BotLocale).format(updateTime.getTime()); |
|||
String archiveText = archivePage.getText(); |
|||
// check if there is a section heading already for today |
|||
int thisDateIndex = archiveText.indexOf(sectionHeading); |
|||
if (thisDateIndex == -1) { // if there isn't, create a new section heading and add the new set |
|||
int firstSectionIndex = archiveText.indexOf("===", archiveText.indexOf("<!--BOTPOINTER-->")); |
|||
if (firstSectionIndex == -1) { // if no archive sections exist (ie at the very beginning of a month) |
|||
firstSectionIndex = archiveText.indexOf("\n", archiveText.indexOf("<!--BOTPOINTER-->")) + 1; |
|||
} |
|||
archiveText = archiveText.substring(0, firstSectionIndex) + |
|||
sectionHeading + "\n" + timeHeading + "\n" + hooks + "\n\n" + |
|||
archiveText.substring(firstSectionIndex); |
|||
} else { // otherwise add the set under the section heading for today |
|||
int writeIndex = thisDateIndex + sectionHeading.length(); |
|||
archiveText = archiveText.substring(0, writeIndex) + "\n" + timeHeading + "\n" + |
|||
hooks + "\n" + archiveText.substring(writeIndex); |
|||
} |
|||
archivePage.setText(archiveText); |
|||
archivePage.setEditSummary("Archiving latest set"); |
|||
writeContent(archivePage); |
|||
return; |
|||
} catch (EditConflictException e) { |
|||
log("Edit conflict caught"); |
|||
// will try again because of while(true) |
|||
} catch (DYKResetException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
logError("Error occurred while archiving"); |
|||
return; |
|||
} |
|||
} while(true); |
|||
} |
|||
/** |
|||
* Parses the credits; associates each article title with the user to be credited |
|||
* and the hook |
|||
* @param the wikitext of the queue |
|||
* @param the hooks in the queue |
|||
* @return parsed credits |
|||
*/ |
|||
private LinkedList<DYKCredit> parseCredits(String queueText, String hooks) { |
|||
LinkedList<DYKCredit> credits = new LinkedList<DYKCredit>(); |
|||
// unescape all html encoding in the hooks; for example, "M&M" will become "M&M" |
|||
hooks = StringEscapeUtils.unescapeHtml(hooks); |
|||
// find all credit templates and parse article titles, users, and hooks |
|||
int dykMakeIndex = queueText.indexOf("{{DYKmake"); |
|||
int dykNomIndex = queueText.indexOf("{{DYKnom"); |
|||
while (dykMakeIndex != -1 || dykNomIndex != -1) { |
|||
int nextCreditIndex; |
|||
if (dykMakeIndex == -1) { |
|||
nextCreditIndex = dykNomIndex; |
|||
} else if (dykNomIndex == -1) { |
|||
nextCreditIndex = dykMakeIndex; |
|||
} else { |
|||
nextCreditIndex = Math.min(dykMakeIndex, dykNomIndex); |
|||
} |
|||
int closeTemplateIndex = queueText.indexOf("}}", nextCreditIndex); |
|||
int closeTemplatesEncountered = 1; |
|||
while (queueText.substring(nextCreditIndex + 2, closeTemplateIndex) |
|||
.split("\\{\\{").length > closeTemplatesEncountered) { |
|||
closeTemplateIndex = queueText.indexOf("}}", closeTemplateIndex+2); |
|||
++closeTemplatesEncountered; |
|||
} |
|||
String creditTemplate = queueText.substring(nextCreditIndex, closeTemplateIndex + 2); |
|||
boolean dykMake = (nextCreditIndex == dykMakeIndex); |
|||
self._post_errors(results.rgstr_warnings, results.rgstr_errors) |
|||
// these next two lines are the "increment" part of the while loop |
|||
results = None |
|||
dykMakeIndex = queueText.indexOf("{{DYKmake", nextCreditIndex + 1); |
|||
dykNomIndex = queueText.indexOf("{{DYKnom", nextCreditIndex + 1); |
|||
// end increment |
|||
LinkedList<String> creditTemplatePieces = new LinkedList<String>(Arrays.asList( |
|||
creditTemplate.substring(2, creditTemplate.length() - 2).split("\\|"))); |
|||
int numContinuing = 0; |
|||
for (int i=0; i < creditTemplatePieces.size(); ) { |
|||
boolean continuation = numContinuing > 0; |
|||
int numOpenTemplates = creditTemplatePieces.get(i).split("\\{\\{").length - 1; |
|||
int numCloseTemplates = creditTemplatePieces.get(i).split("\\}\\}").length - 1; |
|||
numContinuing = numContinuing + numOpenTemplates - numCloseTemplates; |
|||
if (continuation) { |
|||
creditTemplatePieces.set(i - 1, creditTemplatePieces.get(i - 1) + "|" + creditTemplatePieces.get(i)); |
|||
creditTemplatePieces.remove(i); |
|||
} else { |
|||
++i; |
|||
} |
|||
} |
|||
String title = null; |
|||
String user = null; |
|||
String subpage = null; |
|||
boolean firstPiece = true; |
|||
boolean invalidCreditTemplate = false; |
|||
int unnamedParamsSeen = 0; |
|||
for (String piece : creditTemplatePieces) { |
|||
String trimmedPiece = piece.trim(); |
|||
if (firstPiece) { |
|||
if (!(trimmedPiece.equals("DYKmake") || trimmedPiece.equals("DYKnom"))) { |
|||
invalidCreditTemplate = true; |
|||
break; |
|||
} |
|||
firstPiece = false; |
|||
} else { |
|||
int firstEqualsIndex = piece.indexOf('='); |
|||
if (firstEqualsIndex != -1) { |
|||
String paramName = piece.substring(0, firstEqualsIndex).trim(); |
|||
String paramValue = piece.substring(firstEqualsIndex + 1).trim(); |
|||
paramValue = StringEscapeUtils.unescapeHtml(paramValue); |
|||
if (paramName.equals("1")) title = paramValue; |
|||
else if (paramName.equals("2")) user = paramValue; |
|||
else if (paramName.equals("subpage")) subpage = paramValue; |
|||
else { |
|||
logError("Invalid credit template: <nowiki>" + creditTemplate + "</nowiki>"); |
|||
invalidCreditTemplate = true; |
|||
break; |
|||
} |
|||
} else { |
|||
trimmedPiece = StringEscapeUtils.unescapeHtml(trimmedPiece); |
|||
if (unnamedParamsSeen == 0) { // first unnamed param is title |
|||
title = trimmedPiece; |
|||
} else if (unnamedParamsSeen == 1) { // second is user |
|||
user = trimmedPiece; |
|||
} else { |
|||
logError("Invalid credit template: <nowiki>" + creditTemplate + "</nowiki>"); |
|||
invalidCreditTemplate = true; |
|||
break; |
|||
} |
|||
++unnamedParamsSeen; |
|||
} |
|||
} |
|||
} |
|||
if (title == null || user == null) { |
|||
logError("Invalid credit template: <nowiki>" + creditTemplate + "</nowiki>"); |
|||
invalidCreditTemplate = true; |
|||
} |
|||
if (invalidCreditTemplate) continue; |
|||
// check for common formatting errors |
|||
if (title.startsWith("[[")) { |
|||
title = title.substring(2); |
|||
} |
|||
if (title.endsWith("]]")) { |
|||
title = title.substring(0, title.length() - 2); |
|||
} |
|||
if (title.equals("Example") || title.isEmpty()) { |
|||
continue; |
|||
} |
|||
boolean errorInArticleTitle = false; |
|||
String hook = null; |
|||
// make sure the title corresponds to a real article |
|||
title = title.substring(0, 1).toUpperCase() + title.substring(1); // capitalize first letter |
|||
SimpleArticle article = new SimpleArticle(readContent(title)); |
|||
if (article.getText().isEmpty()) { // if the article's been deleted, or otherwise nonexistent |
|||
logError("Article [[" + title + "]] does not exist"); |
|||
errorInArticleTitle = true; |
|||
} else { |
|||
hook = findHook(hooks, title); |
|||
} |
|||
if (!errorInArticleTitle) { |
|||
String redirectTo = checkForPageRedirect(article.getText()); |
|||
if (redirectTo != null) { |
|||
article = new SimpleArticle(readContent(redirectTo)); |
|||
if (article.getText().isEmpty()) { |
|||
logError("Article [[" + title + "]] is a redirect to a deleted article"); |
|||
errorInArticleTitle = true; |
|||
} else if (hook == null) { // if there was no matching hook before, try again |
|||
hook = findHook(hooks, article.getLabel()); |
|||
} |
|||
} |
|||
} |
|||
if (!errorInArticleTitle && hook == null) { |
|||
// if we couldn't find the hook before, let's try other options |
|||
// check for redirects to the given page |
|||
LinkedList<String> otherPossibleTitles = findRedirectsToPage(article.getLabel(), 50); |
|||
// check for odd characters (like ) |
|||
String normalizedTitle = normalizeTitle(article.getLabel()); |
|||
if (!article.getLabel().equals(normalizedTitle)) { |
|||
otherPossibleTitles.add(normalizedTitle); |
|||
} |
|||
for (String possibility : otherPossibleTitles) { |
|||
hook = findHook(hooks, possibility); |
|||
if (hook != null) break; |
|||
} |
|||
if (hook == null) { |
|||
hook = findHook(hooks.replaceAll(StringEscapeUtils.unescapeHtml(" "), " "), |
|||
article.getLabel()); |
|||
} |
|||
} |
|||
if (!errorInArticleTitle && hook == null) { |
|||
logError("Couldn't find hook for [[" + title + "]]"); |
|||
} |
|||
if (user.contains("}}")) user = expandTemplates(user); |
|||
String userTalkPage = validateUserTalkPage(user); // make sure this is a valid user talk page |
|||
credits.add(new DYKCredit(article.getLabel(), userTalkPage, hook, errorInArticleTitle, dykMake, subpage)); |
|||
} |
|||
return credits; |
|||
} |
|||
/** |
|||
* Finds the hook of the title given in the hooks given |
|||
* @param hooks in the set |
|||
* @param title of the article |
|||
* @return the article's hook, or null if none found |
|||
*/ |
|||
private String findHook(String hooks, String title) { |
|||
String hook = null; |
|||
// convert to lower case and underscores to spaces |
|||
String normalizedTitle = title.replaceAll("_", " ").toLowerCase(); |
|||
int titleIndex = hooks.toLowerCase().indexOf(normalizedTitle); |
|||
while (titleIndex != -1 && (hook == null || hook.contains("px|") || |
|||
hook.contains("100x100px") || hook.toLowerCase().contains("{{dyk listen") |
|||
|| hook.toLowerCase().contains("{{main page image"))) { |
|||
// "px" parts are in case the image caption or filename has the title |
|||
int startOfHook = hooks.lastIndexOf("\n", titleIndex); |
|||
if (startOfHook == -1) startOfHook = 0; |
|||
int endOfHook = hooks.indexOf("\n", titleIndex); |
|||
if (endOfHook == -1) endOfHook = hooks.length(); |
|||
hook = hooks.substring(startOfHook, endOfHook).trim(); |
|||
titleIndex = hooks.toLowerCase().indexOf(normalizedTitle, |
|||
titleIndex + normalizedTitle.length()); |
|||
} |
|||
if (hook == null || hook.contains("px|") || hook.contains("100x100px") |
|||
|| hook.toLowerCase().contains("{{dyk listen") |
|||
|| hook.toLowerCase().contains("{{main page image")) { |
|||
hook = findShipHook(hooks, title); |
|||
if (hook == null) return null; |
|||
} |
|||
// hook formatting |
|||
while (hook.substring(hook.length() - 4).equalsIgnoreCase("<br>")) { |
|||
// http://en.wikipedia.org/w/index.php?title=Template:Did_you_know&oldid=2521104 |
|||
hook = hook.substring(0, hook.length() - 4).trim(); |
|||
} |
|||
if (hook.substring(hook.length() - 5).equalsIgnoreCase("</li>")) { |
|||
// http://en.wikipedia.org/w/index.php?title=Template:Did_you_know&oldid=9218861 |
|||
hook = hook.substring(0, hook.length() - 5).trim(); |
|||
} |
|||
if (hook.substring(0, 4).equalsIgnoreCase("<li>")) { |
|||
hook = hook.substring(4, hook.length()).trim(); |
|||
} |
|||
if (hook.charAt(0) == '*') { |
|||
hook = hook.substring(1); |
|||
} |
|||
if (hook.substring(0, 7).equals("{{*mp}}")) { |
|||
hook = hook.substring(7); |
|||
} |
|||
if (hook.contains("{{*mp}}")) { |
|||
log("Hook for [[" + title + "]] has an extra {{*mp}}; hook mashup?"); |
|||
} |
|||
return hook; |
|||
} |
|||
/** |
|||
* Finds the hook of the title given if a ship template is used (like {{SS}}) |
|||
* @param hooks in the set |
|||
* @param title of the article |
|||
* @return the article's hook, or null if none found |
|||
*/ |
|||
private String findShipHook(String hooks, String title) { |
|||
int i = 3; |
|||
// figure out which template matches the title |
|||
for (; i < shipTemplates.length; ++i) { |
|||
if (title.toLowerCase().startsWith(shipTemplates[i].toLowerCase())) break; |
|||
} |
|||
if (i == shipTemplates.length) { |
|||
if (title.toLowerCase().contains(" class ") && hooks.toLowerCase().contains("{{sclass")) { |
|||
// looks there's a possible match with {{sclass}} or {{sclass2}} |
|||
i = 1; |
|||
} else if (title.toLowerCase().startsWith("japanese submarine") && hooks.toLowerCase().contains("{{jsub")) { |
|||
// match with {{Jsub}} |
|||
i = 2; |
|||
} else if (hooks.toLowerCase().contains("{{ship")) { |
|||
// if none of the specific templates match, maybe {{ship}} will |
|||
i = 0; |
|||
} else { |
|||
return null; |
|||
} |
|||
} |
|||
for (int templateIndex = hooks.toLowerCase().indexOf("{{" + shipTemplates[i].toLowerCase()); |
|||
templateIndex != -1; |
|||
templateIndex = hooks.toLowerCase().indexOf("{{" + shipTemplates[i].toLowerCase(), templateIndex + 2)) { |
|||
// find the ship template |
|||
int endIndex = templateIndex; |
|||
for (int j=2; hooks.indexOf("}}", endIndex + 2) != -1; j++) { |
|||
endIndex = hooks.indexOf("}}", endIndex + 2); |
|||
if (hooks.substring(templateIndex, endIndex).split("\\{\\{").length == j) { |
|||
break; |
|||
} |
|||
} |
|||
String template = hooks.substring(templateIndex + 2, endIndex); |
|||
// parse the ship template and assemble it into a title |
|||
String[] cutup = template.split("\\|"); |
|||
int base = 0; |
|||
if (i < 2) base = 1; |
|||
String titleFromTemplate; |
|||
if (i != 1) { |
|||
titleFromTemplate = cutup[base].trim() + " " + cutup[base+1].trim(); |
|||
if (cutup.length >= base+3 && !cutup[base+2].isEmpty() && |
|||
!cutup[base+2].trim().equals("3=2")) { |
|||
titleFromTemplate += " (" + cutup[base+2].trim() + ")"; |
|||
} |
|||
} else { // {{sclass}} and {{sclass2}} |
|||
titleFromTemplate = cutup[base].trim() + " class " + cutup[base+1].trim(); |
|||
} |
|||
if (i == 2) { |
|||
titleFromTemplate = "Japanese submarine " + titleFromTemplate.substring(5); |
|||
} |
|||
// if the title from the credits and the title assembled from the template |
|||
// match, we've found the correct hook |
|||
if (titleFromTemplate.equalsIgnoreCase(title)) { |
|||
return findHook(hooks, template); |
|||
} |
|||
} |
|||
return null; |
|||
} |
|||
seconds_to_sleep = DYKUpdateBot.SECONDS_BETWEEN_STATUS_CHECKS |
|||
/** |
|||
if seconds_until_next_update > 0: |
|||
* Tag article talk pages |
|||
seconds_to_sleep = min(seconds_to_sleep, seconds_until_next_update) |
|||
* If {{Article history}} exists on the talk page, the bot will add the DYK credit there instead of |
|||
pywikibot.sleep(seconds_to_sleep) |
|||
* adding a new {{DYK talk}} |
|||
* @param a Calendar containing the time that DYK was last updated |
|||
* @param the credits (contains article title, username, and hook) |
|||
*/ |
|||
private void tagArticles(Calendar time, LinkedList<DYKCredit> credits) { |
|||
// make the start of a DYK talk tag without the hook |
|||
String tag = new SimpleDateFormat("'{{DYK talk|'d MMMM'|'yyyy", BotLocale).format(time.getTime()); |
|||
String editSummaryTimestamp = new SimpleDateFormat("d MMMM yyyy", BotLocale).format(time.getTime()); |
|||
HashSet<String> taggedArticles = new HashSet<String>(); |
|||
def _calculate_next_update_time(self, rgstr_errors) -> (pywikibot.Timestamp, pywikibot.Timestamp): |
|||
// tag articles |
|||
page_last_update_time = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.LAST_UPDATE_TIME_LOC) |
|||
for (DYKCredit credit: credits) { |
|||
time_next_update = datetime.now(timezone.utc) # placeholder |
|||
if (credit.errorInArticleTitle) continue; |
|||
try: |
|||
if (taggedArticles.contains(credit.articleTitle)) continue; |
|||
time_next_update = pywikibot.Timestamp.fromISOformat(page_last_update_time.text.strip()).replace(tzinfo=timezone.utc) |
|||
boolean editConflicted = false; |
|||
except: |
|||
do { |
|||
self._log_error(rgstr_errors, 'Time at [[' + DYKUpdateBot.LAST_UPDATE_TIME_LOC + |
|||
editConflicted = false; |
|||
']] is not formatted correctly') |
|||
try { |
|||
return time_next_update, time_next_update |
|||
// build up the tag |
|||
String tagWithHook = tag; |
|||
if (credit.hook != null) { |
|||
tagWithHook += "|entry=" + credit.hook; |
|||
} |
|||
if (credit.nompage != null) { |
|||
tagWithHook += "|nompage=" + credit.nompage; |
|||
} |
|||
tagWithHook += "}}"; |
|||
page_time_between_updates = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.TIME_BETWEEN_UPDATES_LOC) |
|||
// get the talk page |
|||
seconds_between_updates = 0 # placeholder |
|||
SimpleArticle talkPage = new SimpleArticle(readContent("Talk:" + credit.articleTitle)); |
|||
try: |
|||
String talkContent = talkPage.getText(); |
|||
seconds_between_updates = int(page_time_between_updates.text) |
|||
if (talkContent.isEmpty()) talkPage.setEditTimestamp(OverrideEditConflicts); |
|||
except ValueError: |
|||
String talkContentLowerCase = talkContent.toLowerCase(); |
|||
self._log_error(rgstr_errors, 'Time between updates at [[' + DYKUpdateBot.TIME_BETWEEN_UPDATES_LOC + |
|||
int articleHistoryIndex = Math.max(talkContentLowerCase.indexOf("{{article history"), |
|||
']] is not formatted correctly') |
|||
talkContentLowerCase.indexOf("{{articlehistory")); |
|||
return time_next_update, time_next_update |
|||
if (articleHistoryIndex != -1) { |
|||
// if it has {{Article history}} |
|||
String articleHistory = talkContent.substring(articleHistoryIndex, |
|||
talkContent.indexOf("}}", articleHistoryIndex)); |
|||
String articleHistoryNew = new String(articleHistory); |
|||
int currentStatusIndex = articleHistoryNew.indexOf("currentstatus"); |
|||
while (currentStatusIndex != -1 && |
|||
!articleHistoryNew.substring(0, currentStatusIndex).trim().endsWith("|")) { |
|||
currentStatusIndex = articleHistoryNew.indexOf("currentstatus", currentStatusIndex + 13); |
|||
} |
|||
String currentStatusString = "|currentstatus"; //default |
|||
if (currentStatusIndex != -1) { |
|||
int temp = currentStatusIndex; |
|||
currentStatusIndex = articleHistoryNew.lastIndexOf("|", currentStatusIndex); |
|||
currentStatusString = articleHistoryNew.substring(currentStatusIndex, temp + 13); |
|||
} |
|||
int dykdateIndex = articleHistoryNew.indexOf("dykdate"); |
|||
while (dykdateIndex != -1 && |
|||
!articleHistoryNew.substring(0, dykdateIndex).trim().endsWith("|")) { |
|||
dykdateIndex = articleHistoryNew.indexOf("dykdate", dykdateIndex + 7); |
|||
} |
|||
int pipeAfterIndex = articleHistoryNew.indexOf("|", dykdateIndex + 7); |
|||
if (pipeAfterIndex == -1) { |
|||
pipeAfterIndex = articleHistoryNew.length(); |
|||
} |
|||
boolean blankDYKdate = articleHistoryNew.substring(dykdateIndex + 7, |
|||
pipeAfterIndex).trim().equals("="); |
|||
String dykDateParam = editSummaryTimestamp; |
|||
String dykEntryAndNom = ""; |
|||
if (credit.hook != null) dykEntryAndNom += "\n|dykentry=" + credit.hook; |
|||
if (credit.nompage != null) dykEntryAndNom += "\n|dyknom=" + credit.nompage; |
|||
if (currentStatusIndex == -1 && dykdateIndex == -1) { |
|||
// if there's no currentStatus or dykdate |
|||
logError("Could not tag [[" + credit.articleTitle + |
|||
"]] by {{tl|Article history}}; please tag article manually"); |
|||
} else if (dykdateIndex == -1 || blankDYKdate) { |
|||
if (dykdateIndex == -1) { |
|||
articleHistoryNew = articleHistoryNew.replace(currentStatusString, "|dykdate=" + |
|||
dykDateParam + dykEntryAndNom + "\n" + currentStatusString); |
|||
} else if (blankDYKdate) { |
|||
String dykdateOld = articleHistoryNew.substring(dykdateIndex, |
|||
articleHistoryNew.indexOf("=", dykdateIndex) + 1); |
|||
// remove old |dykentry if it exists |
|||
int dykentryIndex = articleHistoryNew.indexOf("dykentry", dykdateIndex); |
|||
if (dykentryIndex != -1) { |
|||
int onePastPipeAfterEntryIndex = articleHistoryNew.indexOf("|", dykentryIndex); |
|||
if (onePastPipeAfterEntryIndex == -1) { |
|||
onePastPipeAfterEntryIndex = articleHistoryNew.length(); |
|||
} else { |
|||
++onePastPipeAfterEntryIndex; |
|||
} |
|||
articleHistoryNew = articleHistoryNew.substring(0, dykentryIndex) + |
|||
articleHistoryNew.substring(onePastPipeAfterEntryIndex); |
|||
} |
|||
// remove old |dyknom if it exists |
|||
int dyknomIndex = articleHistoryNew.indexOf("dyknom", dykdateIndex); |
|||
if (dyknomIndex != -1) { |
|||
int onePastPipeAfterNomIndex = articleHistoryNew.indexOf("|", dyknomIndex); |
|||
if (onePastPipeAfterNomIndex == -1) { |
|||
onePastPipeAfterNomIndex = articleHistoryNew.length(); |
|||
} else { |
|||
++onePastPipeAfterNomIndex; |
|||
} |
|||
articleHistoryNew = articleHistoryNew.substring(0, dyknomIndex) + |
|||
articleHistoryNew.substring(onePastPipeAfterNomIndex); |
|||
} |
|||
articleHistoryNew = articleHistoryNew.replace(dykdateOld, dykdateOld + |
|||
dykDateParam + dykEntryAndNom); |
|||
} |
|||
talkContent = talkContent.replace(articleHistory, articleHistoryNew); |
|||
talkPage.setText(talkContent.trim()); |
|||
talkPage.setEditSummary("Article appeared on [[WP:Did you know|DYK]] on " + |
|||
editSummaryTimestamp + ", adding to " + |
|||
"{{[[Template:Article history|Article history]]}}"); |
|||
writeContent(talkPage); |
|||
} else { |
|||
log("{{Article history}} up to date for article " + credit.articleTitle); |
|||
} |
|||
} else { // if it doesn't have {{Article history}}, add a new tag |
|||
int indexOfFirstSection = talkContent.indexOf("=="); |
|||
if (indexOfFirstSection == -1) indexOfFirstSection = talkContent.length(); |
|||
String zeroSection = talkContent.substring(0, indexOfFirstSection); |
|||
String theRest = talkContent.substring(indexOfFirstSection); |
|||
int lastTemplateIndex = findLastTemplateIndex(zeroSection); |
|||
String zeroSectionA = zeroSection.substring(0, lastTemplateIndex); |
|||
String zeroSectionB = zeroSection.substring(lastTemplateIndex); |
|||
talkContent = zeroSectionA.trim() + "\n" + tagWithHook + "\n\n" + zeroSectionB + theRest; |
|||
talkPage.setText(talkContent.trim()); |
|||
talkPage.setEditSummary("Article appeared on [[WP:Did you know|DYK]] on " + |
|||
editSummaryTimestamp + ", adding {{[[Template:DYK talk|DYK talk]]}}"); |
|||
writeContent(talkPage); |
|||
} |
|||
taggedArticles.add(credit.articleTitle); |
|||
} catch (EditConflictException e) { |
|||
log("Edit conflict caught"); |
|||
editConflicted = true; |
|||
} catch (DYKResetException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
logError("Error occurred when attempting to tag [[" + credit.articleTitle + "]]"); |
|||
} |
|||
} while (editConflicted); |
|||
} |
|||
} |
|||
/** |
|||
* Tag user talk pages |
|||
* @param the credits (contains article title, username, and hook) |
|||
* @param the {{DYKbotdo}} template |
|||
*/ |
|||
private void giveUserCredits(LinkedList<DYKCredit> credits, String dykbotdo) { |
|||
for (DYKCredit credit : credits) { |
|||
if (credit.userTalkPage == null) continue; |
|||
boolean editConflicted = false; |
|||
do { |
|||
editConflicted = false; |
|||
try { |
|||
// tag user talk page |
|||
SimpleArticle userTalk = readContent(credit.userTalkPage); |
|||
if (userTalk.getText().isEmpty()) userTalk.setEditTimestamp(OverrideEditConflicts); |
|||
userTalk.addText("\n\n==DYK for " + credit.articleTitle + "=="); |
|||
String creditTemplate; |
|||
if (credit.dykMake) { // if it's {{DYKmake}} |
|||
creditTemplate = "\n{{subst:Template:DYKmake/DYKmakecredit"; |
|||
} else { // if it's {{DYKNom}} |
|||
creditTemplate = "\n{{subst:Template:DYKnom/DYKnomcredit"; |
|||
} |
|||
creditTemplate += " |article=" + credit.articleTitle; |
|||
if (credit.hook != null) creditTemplate += " |hook=" + credit.hook; |
|||
if (credit.nompage != null) creditTemplate += " |nompage=" + credit.nompage; |
|||
creditTemplate += " |optional= }} "; |
|||
userTalk.addText(creditTemplate); |
|||
int dykBotDoPipeIndex = dykbotdo.indexOf("|"); |
|||
if (dykBotDoPipeIndex == -1) { |
|||
userTalk.addText("~~~~"); |
|||
} else { |
|||
userTalk.addText(dykbotdo.substring(dykBotDoPipeIndex + 1, |
|||
dykbotdo.lastIndexOf("}}"))); |
|||
userTalk.addText(" ~~~~~"); |
|||
} |
|||
// form edit summary |
|||
String adminUsername = findUserLink(dykbotdo); |
|||
String editSummary = "Giving DYK credit for [[" + credit.articleTitle + "]]"; |
|||
if (adminUsername != null) { |
|||
editSummary += " on behalf of [[User:" + adminUsername + "|" + |
|||
adminUsername + "]]"; |
|||
} |
|||
userTalk.setEditSummary(editSummary); |
|||
// edit talk page |
|||
writeContent(userTalk); |
|||
} catch (EditConflictException e) { |
|||
editConflicted = true; |
|||
log("Edit conflict caught"); |
|||
} catch (DYKResetException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
logError("Error occurred while distributing user credits"); |
|||
} |
|||
} while (editConflicted); |
|||
} |
|||
} |
|||
/** |
|||
* Checks if a user exists or has been renamed |
|||
* If the user talk page redirects to another user talk, this method will return the target username |
|||
* Otherwise, if the username is not registered and not an IP address, null is returned |
|||
* @param username to check |
|||
* @return a valid username, or null if none |
|||
*/ |
|||
private String validateUserTalkPage(String username) { |
|||
// example credits aren't valid |
|||
if (username.equals("Editor") || username.equals("Nominator") || username.isEmpty()) { |
|||
return null; |
|||
} |
|||
String userTalkPage = "User talk:" + username; |
|||
// check if the talk page redirects to another page (if the user's been renamed) |
|||
SimpleArticle talkPage = new SimpleArticle(readContent(userTalkPage)); |
|||
String redirectTo = checkForPageRedirect(talkPage.getText()); |
|||
if (redirectTo != null) { |
|||
int userTalkIndex = redirectTo.toLowerCase().indexOf("user talk:"); |
|||
if (userTalkIndex != -1) { |
|||
userTalkPage = redirectTo.substring(userTalkIndex); |
|||
username = userTalkPage.substring(10); |
|||
time_next_update = time_next_update + timedelta(seconds=seconds_between_updates) |
|||
// support redirects to talk page archives |
|||
return time_next_update, time_next_update + timedelta(seconds=seconds_between_updates) |
|||
// for example User talk:Djembayz -> User talk:Djembayz/Archive July 2014 |
|||
int slashIndex = username.indexOf("/"); |
|||
if (slashIndex != -1) username = username.substring(0, slashIndex); |
|||
} |
|||
} |
|||
// check if the username is registered |
|||
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query&list=users&ususers=" + |
|||
MediaWiki.encode(username); |
|||
Document doc = fetchUsingSAXBuilder(apiURL); |
|||
Element userInfo = doc.getRootElement().getChild("query", ns).getChild("users", ns).getChild("user", ns); |
|||
if (userInfo.getAttribute("missing") == null && userInfo.getAttribute("invalid") == null |
|||
&& !username.contains("|")) { |
|||
return userTalkPage; |
|||
} |
|||
// check if the user made edits (for IP addresses) |
|||
String apiURL2 = BaseEnWikiAPIURL + "api.php?format=xml&action=query&list=usercontribs&uclimit=1&ucprop=ids&ucuser=" + |
|||
MediaWiki.encode(username); |
|||
Document doc2 = fetchUsingSAXBuilder(apiURL2); |
|||
Element userContribs = doc2.getRootElement().getChild("query").getChild("usercontribs"); |
|||
if ((userContribs.getChildren().size() > 0) && !username.contains("|")) { |
|||
return userTalkPage; |
|||
} |
|||
// the username isn't registered or technically impossible |
|||
logError("The username '" + username + "' is invalid"); |
|||
return null; |
|||
} |
|||
/** |
|||
* Finds the link to the admins userpage in {{DYKbotdo}} |
|||
* @param the {{DYKbotdo}} tag |
|||
* @return the admin's username |
|||
*/ |
|||
private String findUserLink(String dykbotdo) { |
|||
try { |
|||
if (dykbotdo.contains("User:") || dykbotdo.contains("User talk:")) { |
|||
int userLinkIndex = Math.max(dykbotdo.indexOf("User:"), dykbotdo.indexOf("User talk:")); |
|||
return dykbotdo.substring(dykbotdo.indexOf(":", userLinkIndex) + 1, |
|||
dykbotdo.indexOf("|", userLinkIndex)); |
|||
} |
|||
} catch (Exception e) { |
|||
return null; |
|||
} |
|||
return null; |
|||
} |
|||
/** |
|||
* Finds the DYK sound/video/image from hooks wikitext |
|||
* If a sound or video file (.ogg) is used without the proper template, |
|||
* the bot will assume it's a video; these exceptions should be checked manually |
|||
*/ |
|||
private DYKFile findFile(String hooks) { |
|||
String hooksLowerCase = hooks.toLowerCase(); |
|||
if (hooksLowerCase.contains("{{dyk listen")) { // sound file |
|||
int startIndex = hooks.indexOf("|", hooksLowerCase.indexOf("{{dyk listen")) + 1; |
|||
int fileEndIndex = hooks.indexOf("|", startIndex); |
|||
String filename = hooks.substring(startIndex, fileEndIndex); |
|||
return new DYKFile(filename, "sound"); |
|||
} else if (hooksLowerCase.contains("{{tall image")) { |
|||
int startIndex = hooks.indexOf("|", hooksLowerCase.indexOf("{{tall image")) + 1; |
|||
int fileEndIndex = hooks.indexOf("|", startIndex); |
|||
String filename = hooks.substring(startIndex, fileEndIndex); |
|||
return new DYKFile(filename, "image"); |
|||
} else if (hooksLowerCase.contains("{{main page image")) { |
|||
// test cases: |
|||
// {{main page image|image=Carrot soup.jpg|caption=A cream of carrot soup with bread|width=120x133}} |
|||
// {{main page image |image=Carrot soup.jpg|caption=A cream of carrot soup with bread|width=120x133}} |
|||
// {{main page image | image=Carrot soup.jpg|caption=A cream of carrot soup with bread|width=120x133}} |
|||
// {{main page image | image = Carrot soup.jpg |caption=A cream of carrot soup with bread|width=120x133}} |
|||
// {{main page image|image=image:Carrot soup.jpg|caption=A cream of carrot soup with bread|width=120x133}} |
|||
// {{main page image|File:Carrot soup.jpg}} |
|||
// {{main page image|Carrot soup.jpg}} |
|||
// {{main page image|Carrot soup.jpg|A cream of carrot soup with bread}} |
|||
int fileStartIndex = hooksLowerCase.indexOf("|", hooksLowerCase.indexOf("{{main page image")) + 1; |
|||
int fileEndIndex = Math.min(hooks.indexOf("|", fileStartIndex), hooks.indexOf("}}", fileStartIndex)); |
|||
String filename = hooks.substring(fileStartIndex, fileEndIndex).trim(); |
|||
int equalsIndex = filename.indexOf('='); |
|||
if (equalsIndex != -1) { |
|||
String paramNameLowerCase = filename.substring(0, equalsIndex).trim(); |
|||
if (paramNameLowerCase.equals("image")) { |
|||
filename = filename.substring(equalsIndex + 1).trim(); |
|||
} |
|||
} |
|||
int colonIndex = filename.indexOf(':'); |
|||
if (colonIndex != -1) { |
|||
String prefixLowerCase = filename.substring(0, colonIndex).toLowerCase().trim(); |
|||
if (prefixLowerCase.equals("image") || prefixLowerCase.equals("file")) { |
|||
filename = filename.substring(colonIndex + 1).trim(); |
|||
} |
|||
} |
|||
return new DYKFile(filename, "image"); |
|||
} else if (hooksLowerCase.contains("[[file:") || |
|||
hooksLowerCase.contains("[[image:")) { // image file |
|||
int startIndex = Math.max(hooksLowerCase.lastIndexOf("[[file:") + 7, |
|||
hooksLowerCase.lastIndexOf("[[image:") + 8); |
|||
int midIndex = hooks.indexOf("|", startIndex); |
|||
int endIndex = startIndex; |
|||
for (int i=1; hooks.indexOf("]]", endIndex + 2) != -1; i++) { |
|||
endIndex = hooks.indexOf("]]", endIndex + 2); |
|||
if (hooks.substring(startIndex, endIndex).split("\\[\\[").length == i) { |
|||
break; |
|||
} |
|||
} |
|||
int rollIndex = hooks.lastIndexOf("|", endIndex); |
|||
while (hooks.lastIndexOf("[[", rollIndex) > (startIndex - 7)) { |
|||
rollIndex = hooks.lastIndexOf("|", rollIndex - 1); |
|||
} |
|||
String type = "image"; |
|||
String filename = hooks.substring(startIndex, midIndex).trim(); |
|||
if (filename.substring(filename.length() - 4).equals(".ogg")) { |
|||
// http://en.wikipedia.org/w/index.php?diff=next&oldid=273311345 |
|||
type = "video"; |
|||
logError("Check if [[:File:" + filename + "]] is a sound or video file"); |
|||
} |
|||
return new DYKFile(filename, type); |
|||
} |
|||
logError("Can't find an image, sound, or video file"); |
|||
return null; |
|||
} |
|||
/** |
|||
* Store information about tags on the file, like {{c-uploaded}} |
|||
* @param the DYKFile to check |
|||
*/ |
|||
private void checkFileTags(DYKFile file) { |
|||
if (file == null) return; |
|||
SimpleArticle filePage = new SimpleArticle(readContent("File:" + file.getFilename())); |
|||
String fileText = filePage.getText(); |
|||
file.setCuploaded(fileText.contains("{{c-uploaded}}") || fileText.contains("{{C-uploaded}}")); |
|||
int mCroppedIndex = Math.max(fileText.indexOf("{{m-cropped"), fileText.indexOf("{{M-cropped")); |
|||
if (mCroppedIndex != -1) { |
|||
String croppedFrom = fileText.substring(fileText.indexOf('|', mCroppedIndex) + 1, |
|||
fileText.indexOf("}}", mCroppedIndex)).trim(); |
|||
if (croppedFrom.toLowerCase().startsWith("file:")) { |
|||
croppedFrom = croppedFrom.substring(5).trim(); |
|||
} |
|||
if (croppedFrom.toLowerCase().startsWith("image:")) { |
|||
croppedFrom = croppedFrom.substring(6).trim(); |
|||
} |
|||
file.setCroppedFrom(croppedFrom); |
|||
} |
|||
} |
|||
/** |
|||
* Checks if the file specified is protected either locally or on Commons |
|||
* The bot will detect both cascade-protection and normal protection |
|||
* If you pass in a salted file (for example Capture.JPG), the function will return false |
|||
* @param fileName without "File:" in front, for example "Andrey Alexandrovich Popov.jpg" |
|||
* @param time when the image will leave the Main Page |
|||
* @return true if the file is fully protected, false otherwise (but see above note on salting) |
|||
*/ |
|||
@SuppressWarnings("unchecked") |
|||
private boolean checkIfProtected(String fileName, GregorianCalendar nextNextUpdateTime, |
|||
boolean logging) { |
|||
String imageInfoURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query" + |
|||
"&prop=imageinfo&iilimit=1&iiprop=&titles=File:" + MediaWiki.encode(fileName); |
|||
Document imageInfo = fetchUsingSAXBuilder(imageInfoURL); |
|||
Element pageInfo = imageInfo.getRootElement().getChild("query", ns).getChild("pages", ns).getChild("page", ns); |
|||
String rootAPIurl; |
|||
if (pageInfo.getAttributeValue("imagerepository").equals("shared")) { // at Commons |
|||
rootAPIurl = BaseCommonsAPIURL + "api.php"; |
|||
} else if (pageInfo.getAttributeValue("imagerepository").equals("local")) { // at Enwiki |
|||
rootAPIurl = BaseEnWikiAPIURL + "api.php"; |
|||
} else { // the file doesn't exist; this should never happen |
|||
if (logging) logError("[[:File:" + fileName + "]] does not exist"); |
|||
return false; |
|||
} |
|||
String protectionInfoURL = rootAPIurl + "?format=xml&action=query&prop=info" + |
|||
"&inprop=protection&titles=File:" + MediaWiki.encode(fileName); |
|||
Document protectionInfo = fetchUsingSAXBuilder(protectionInfoURL); |
|||
List<Element> protectionNodes = protectionInfo.getRootElement().getChild("query", ns).getChild("pages", ns) |
|||
.getChild("page", ns).getChild("protection", ns).getChildren("pr", ns); |
|||
if (protectionNodes.isEmpty()) { // isn't protected or (checked above) doesn't exist |
|||
String logMessage = "[[:File:" + fileName + "]] is not protected"; |
|||
if (rootAPIurl.contains("commons")) { |
|||
logMessage += "; either 1) Upload the file to en.wiki, or 2) protect the file at Commons"; |
|||
} |
|||
if (logging) logError(logMessage); |
|||
return false; |
|||
} |
|||
boolean notFullyProtected = false; |
|||
boolean protectionExpireEarly = false; |
|||
for (Element protectionNode : protectionNodes) { |
|||
if (!(protectionNode.getAttributeValue("type").equals("edit") && |
|||
protectionNode.getAttributeValue("level").equals("sysop"))) { |
|||
notFullyProtected = true; |
|||
continue; |
|||
} |
|||
String protectionExpiryTime = protectionNode.getAttributeValue("expiry"); |
|||
if (protectionExpiryTime.equals("infinity") || nextNextUpdateTime == null) { |
|||
return true; |
|||
} |
|||
try { |
|||
if (convertWikiTimestamp(protectionExpiryTime).before(nextNextUpdateTime)) { |
|||
protectionExpireEarly = true; |
|||
continue; |
|||
} else { |
|||
return true; // protection doesn't expire early, so we're good |
|||
} |
|||
} catch (ParseException e) {} // impossible |
|||
} |
|||
if (protectionExpireEarly) { |
|||
if (logging) { |
|||
logError("The protection for [[:File:" + fileName + "]] " + |
|||
"will expire while or before it's on the Main Page"); |
|||
} |
|||
return false; |
|||
} |
|||
if (notFullyProtected) { |
|||
if (logging) logError("[[:File:" + fileName + "]] is not fully protected"); |
|||
return false; |
|||
} |
|||
return false; // unreachable code |
|||
} |
|||
/** |
|||
* Checks if the file should be deleted, then deletes it |
|||
* The file will be deleted if it's a cropped version made just for DYK |
|||
* Otherwise, the file won't be deleted if: |
|||
* 1. It doesn't exist at Commons and/or Enwiki under the same filename |
|||
* 2. It isn't tagged with {{c-uploaded}} |
|||
* 3. The first revision in the file's history is before the first upload |
|||
* @param the file to be deleted |
|||
*/ |
|||
@SuppressWarnings("unchecked") |
|||
private boolean deleteFile(DYKFile file) { |
|||
if (file == null) return false; |
|||
String filename = file.getFilename(); |
|||
try { |
|||
if (file.getCroppedFrom() == null) { // always delete if this is a cropped image |
|||
if (!file.getCuploaded()) { |
|||
// if it's not tagged with c-uploaded on enwiki, don't delete |
|||
return false; |
|||
} |
|||
// if it doesn't exist at Commons, don't delete |
|||
MediaWikiBot commonsBot = new MediaWikiBot(BaseCommonsAPIURL); |
|||
if (readContent(commonsBot, "File:" + filename).getText().isEmpty()) { |
|||
logError("[[:File:" + filename + "]] is tagged with c-uploaded but does not exist at Commons"); |
|||
return false; |
|||
} |
|||
// figure out when the image was uploaded |
|||
int revs = 10; |
|||
String imageInfoURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query" + |
|||
"&iiprop=timestamp&prop=imageinfo&iilimit=" + revs + "&titles=File:" + |
|||
MediaWiki.encode(filename); |
|||
Document imageInfo = fetchUsingSAXBuilder(imageInfoURL); |
|||
Element pageInfo = imageInfo.getRootElement().getChild("query", ns).getChild("pages", ns).getChild("page", ns); |
|||
if (pageInfo.getAttributeValue("imagerepository").equals("shared")) { |
|||
return false; // no information on enwiki's copy |
|||
} |
|||
List<Element> timestamps = pageInfo.getChild("imageinfo", ns).getChildren("ii", ns); |
|||
if (timestamps.size() == revs) log("Fetching " + revs + "/" + revs + " revisions"); |
|||
Calendar uploadTime = convertWikiTimestamp(timestamps.get |
|||
(timestamps.size() - 1).getAttributeValue("timestamp")); |
|||
// figure out the date of the first revision |
|||
String revInfoURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query" + |
|||
"&prop=revisions&rvlimit=1&rvdir=newer&rvprop=timestamp" + |
|||
"&titles=File:" + MediaWiki.encode(filename); |
|||
Document revisionInfo = fetchUsingSAXBuilder(revInfoURL); |
|||
Calendar firstRevTime = convertWikiTimestamp(revisionInfo.getRootElement() |
|||
.getChild("query", ns).getChild("pages", ns).getChild("page", ns).getChild("revisions", ns) |
|||
.getChild("rev", ns).getAttributeValue("timestamp")); |
|||
if (firstRevTime.before(uploadTime)) { |
|||
// if the first revision was before the upload, don't delete |
|||
return false; |
|||
} |
|||
} |
|||
// otherwise, delete |
|||
String deleteReason = "{{[[Template:c-uploaded|c-uploaded]]}} file off the " + |
|||
"[[T:DYK|DYK]] section of the Main Page"; |
|||
deleteContent("File:" + filename, deleteReason); |
|||
return true; |
|||
} catch (DYKResetException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
logError("Error occurred while deleting [[:File:" + filename + "]]"); |
|||
return false; |
|||
} |
|||
} |
|||
/** |
|||
* Unprotects a file if: |
|||
* 1. It exists on English Wikipedia and is fully protected |
|||
* 2. The string "Main Page" is in the reason for the most recent protection |
|||
*/ |
|||
private void unprotectFile(DYKFile file) { |
|||
if (file == null) return; |
|||
String filename = file.getFilename(); |
|||
SimpleArticle filePage = new SimpleArticle(readContent("File:" + filename)); |
|||
if (filePage.getText().isEmpty()) { |
|||
return; // don't continue if the file isn't on enwiki |
|||
} |
|||
if (!checkIfProtected(filename, null, false)) { |
|||
return; // don't continue if the file isn't fully protected |
|||
} |
|||
String protectionLogURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query" + |
|||
"&list=logevents&letype=protect&leprop=parsedcomment" + |
|||
"&letitle=File:" + MediaWiki.encode(filename); |
|||
Document protectionLog = fetchUsingSAXBuilder(protectionLogURL); |
|||
Element protLogItem = protectionLog.getRootElement().getChild("query", ns) |
|||
.getChild("logevents", ns).getChild("item", ns); |
|||
if (protLogItem == null) { |
|||
return; // don't continue if the file wasn't manually protected |
|||
} |
|||
String protReason = protLogItem.getAttributeValue("parsedcomment"); |
|||
if (!protReason.contains("Main Page")) { |
|||
return; // don't continue if the file wasn't protected for DYK |
|||
} |
|||
unprotectContent("File:" + filename, "File off the [[T:DYK|DYK]] section of the Main Page"); |
|||
} |
|||
# Returns: |
|||
/** |
|||
# * Int of the next queue number, parsed from NEXT_UPDATE_QUEUE_LOC |
|||
* Checks if the file exists at Commons or English Wikipedia, |
|||
# * 0 if NEXT_UPDATE_QUEUE_LOC doesn't parse to an int |
|||
* then tags the file on English Wikipedia if it does exist |
|||
def _find_next_queue_number(self) -> int: |
|||
*/ |
|||
page = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC) |
|||
private void tagFile(DYKFile file, Calendar time) { |
|||
num_next_queue = 0 |
|||
if (file == null) return; |
|||
try: |
|||
String filename = file.getCroppedFrom(); // tag the original file if the image was cropped |
|||
num_next_queue = int(page.text) |
|||
if (filename == null) filename = file.getFilename(); |
|||
except ValueError: |
|||
do { |
|||
pass |
|||
try { |
|||
return num_next_queue |
|||
SimpleArticle filePage = new SimpleArticle(readContent("File:" + filename)); |
|||
MediaWikiBot commonsBot = new MediaWikiBot(BaseCommonsAPIURL); |
|||
if (!filePage.getText().isEmpty() || |
|||
!readContent(commonsBot, "File:" + filename).getText().isEmpty()) { |
|||
if (filePage.getText().contains("{{DYKfile")) { |
|||
log("The file " + filename + " has already been tagged"); |
|||
return; |
|||
} |
|||
String fileTag = "{{DYKfile|" + |
|||
new SimpleDateFormat("d MMMM'|'yyyy", BotLocale).format(time.getTime()) + |
|||
"|type=" + file.getType() + "}}"; //create DYKfile tag |
|||
String fileContent = filePage.getText(); |
|||
if (fileContent.isEmpty()) filePage.setEditTimestamp(OverrideEditConflicts); |
|||
int indexOfFirstSection = fileContent.indexOf("=="); |
|||
if (indexOfFirstSection == -1) indexOfFirstSection = fileContent.length(); |
|||
fileContent = fileContent.substring(0, indexOfFirstSection).trim() + |
|||
"\n" + fileTag + "\n" + fileContent.substring(indexOfFirstSection).trim(); |
|||
filePage.setText(fileContent); |
|||
filePage.setEditSummary("File appeared on [[WP:Did you know|DYK]] on " + |
|||
new SimpleDateFormat("d MMMM yyyy", BotLocale).format(time.getTime())); |
|||
writeContent(filePage); |
|||
} else { |
|||
logError("[[:File:" + filename + "]] does not exist at Commons or English Wikipedia"); |
|||
} |
|||
return; |
|||
} catch (EditConflictException e) { |
|||
log("Edit conflict caught"); |
|||
// will try again because of while(true) |
|||
} catch (DYKResetException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
logError("Error occurred while tagging [[:File:" + filename + "]]"); |
|||
return; |
|||
} |
|||
} while (true); |
|||
} |
|||
/** |
|||
* Makes sure that each hook is on its own line |
|||
* @param queue wikitext |
|||
* @param index of <!--Hooks--> in the queue |
|||
* @param index of <!--HooksEnd--> in the queue |
|||
* @return queue wikitext with each hook on its own line |
|||
*/ |
|||
private String checkIfEachHookOnNewLine(String queueText, int indexOfHooksinQueue, |
|||
int indexOfHooksEndinQueue) { |
|||
for (int hookIndex = queueText.indexOf("{{*mp}}", indexOfHooksinQueue); |
|||
hookIndex != -1 && hookIndex < indexOfHooksEndinQueue; |
|||
hookIndex = queueText.indexOf("{{*mp}}", hookIndex + 7)) { |
|||
if (hookIndex != 0 && queueText.charAt(hookIndex - 1) != '\n') { |
|||
log("Multiple hooks detected on one line, fixing"); |
|||
queueText = queueText.substring(0, hookIndex) + "\n" + queueText.substring(hookIndex); |
|||
indexOfHooksEndinQueue++; |
|||
} |
|||
} |
|||
return queueText; |
|||
} |
|||
/** |
|||
* Checks if the DYK has been reset manually. |
|||
* If so, bot attempts to reset itself by throwing an exception. |
|||
* The exception propagates up to the run() method. |
|||
*/ |
|||
protected void checkIfReset() { |
|||
if (findNextQueueNumber() != nextQueue) { |
|||
log("DYK next queue number has been changed manually, attempting reset"); |
|||
throw new DYKResetException(); |
|||
} |
|||
} |
|||
/** |
|||
* Finds the first line after the template cluster on an article talk page |
|||
* Used to add a new DYK talk template after other templates and before conversations |
|||
*/ |
|||
private int findLastTemplateIndex(String text) { |
|||
String[] lines = text.split("\n"); |
|||
int openingBrackets = 0; |
|||
int closingBrackets = 0; |
|||
int returnIndex = 0; |
|||
for (String line : lines) { |
|||
int openIndex = 0; |
|||
while (line.indexOf("{{", openIndex) != -1) { |
|||
openingBrackets++; |
|||
openIndex = line.indexOf("{{", openIndex) + 2; |
|||
} |
|||
int closeIndex = 0; |
|||
while (line.indexOf("}}", closeIndex) != -1) { |
|||
closingBrackets++; |
|||
closeIndex = line.indexOf("}}", closeIndex) + 2; |
|||
} |
|||
if (line.trim().length() >= 2 && openingBrackets == closingBrackets |
|||
&& ((openIndex == 0 && closeIndex == 0) || |
|||
line.matches("^[\\s]*\\{\\{[\\s]*[Tt]alk[\\s]*\\:.*"))) { |
|||
return returnIndex; |
|||
} |
|||
returnIndex += line.length() + 1; |
|||
} |
|||
return text.length(); |
|||
} |
|||
/** |
|||
* Logs a message into the error log |
|||
* The error log is then posted to ErrorOutputLoc at the end of every run by postErrors() |
|||
*/ |
|||
private void logError(String message) { |
|||
errorLog.append(message).append("\n\n"); |
|||
System.out.println("Error: " + message); |
|||
} |
|||
/** |
|||
* At the end of each run, errors will be posted to the page specified in ErrorOutputLoc |
|||
* Also, the page will be cleared after a clean run |
|||
*/ |
|||
private void postErrors() { |
|||
SimpleArticle errorsPage = new SimpleArticle(readContent(ErrorOutputLoc)); |
|||
String errors = errorLog.toString().trim(); |
|||
errorLog = new StringBuilder(); // clear local buffer |
|||
if (errorsPage.getText().trim().equals(errors)) { |
|||
// if the errors are already on the page, don't post again |
|||
return; |
|||
} |
|||
errorsPage.setText(errors); |
|||
if (errors.isEmpty()) { |
|||
errorsPage.setEditSummary("No errors; clear"); |
|||
} else { |
|||
errorsPage.setEditSummary("Posting latest errors"); |
|||
} |
|||
try { |
|||
errorsPage.setEditTimestamp(OverrideEditConflicts); |
|||
} catch (ParseException e) {} // impossible |
|||
writeContent(errorsPage); |
|||
} |
|||
/** |
|||
* Replaces multiple spaces with a single space in the given string |
|||
* @param the text with unnecessary spaces |
|||
* @return text without unnecessary spaces |
|||
*/ |
|||
private String removeUnnecessarySpaces(String text) { |
|||
String[] words = text.split(" "); |
|||
StringBuilder textWithoutExtraSpaces = new StringBuilder(); |
|||
for (String word : words) { |
|||
if (!word.isEmpty()) textWithoutExtraSpaces.append(word).append(" "); |
|||
} |
|||
if (textWithoutExtraSpaces.length() > 0) { |
|||
textWithoutExtraSpaces.deleteCharAt(textWithoutExtraSpaces.length() - 1); |
|||
} |
|||
return textWithoutExtraSpaces.toString(); |
|||
} |
|||
/** |
|||
* Checks if a local text file is set to "on" |
|||
* @return true if it's on, false otherwise |
|||
*/ |
|||
protected boolean isOn() { |
|||
do { |
|||
try { |
|||
BufferedReader reader = new BufferedReader(new FileReader("UpdateBotSwitch.txt")); |
|||
String status = reader.readLine(); |
|||
reader.close(); |
|||
return status.equalsIgnoreCase("on"); |
|||
} catch (Exception e) { |
|||
log("File read exception caught"); |
|||
sleep(5000); |
|||
} |
|||
} while (true); |
|||
} |
|||
public static void main(String[] args) { |
|||
DYKUpdateBot.initializeLoggers(); |
|||
DYKUpdateBot updateBot = new DYKUpdateBot(TimeBetweenEdits, NumExceptionsBeforeAttemptedReset, |
|||
NextUpdateQueueLoc, UserInfo.getUser(), UserInfo.getPassword()); |
|||
synchronized (updateBot) { |
|||
updateBot.run(); |
|||
} |
|||
} |
|||
class DYKCredit { |
|||
String articleTitle; |
|||
String userTalkPage; |
|||
String hook; |
|||
boolean errorInArticleTitle; |
|||
boolean dykMake; |
|||
String nompage; |
|||
DYKCredit(String articleTitle, String userTalkPage, String hook, boolean errorInArticleTitle, |
|||
boolean dykMake, String nompage) { |
|||
this.articleTitle = articleTitle; |
|||
this.userTalkPage = userTalkPage; |
|||
this.hook = hook; |
|||
this.errorInArticleTitle = errorInArticleTitle; |
|||
this.dykMake = dykMake; |
|||
if (nompage != null) this.nompage = "Template:Did you know nominations/" + nompage; |
|||
} |
|||
} |
|||
} |
|||
</source> |
|||
def validate_before_update(self, results_val, time_set_leaving): |
|||
# figure out which queue to update from |
|||
results_val.num_queue = self._find_next_queue_number() |
|||
if results_val.num_queue == 0: |
|||
self._log_error(results_val.rgstr_errors, 'Could not parse [[{0}]]; check if it\'s a number 1-{1}' |
|||
.format(DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC, DYKUpdateBot.NUM_QUEUES)) |
|||
return results_val |
|||
# get the wikitext of the queue |
|||
<source lang="java"> |
|||
results_val.page_queue = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.QUEUE_ROOT_LOC + str(results_val.num_queue)) |
|||
import java.lang.management.ManagementFactory; |
|||
str_queue = results_val.page_queue.text |
|||
import java.net.MalformedURLException; |
|||
str_link_to_queue = DYKUpdateBotUtils.wikilink_to_queue(results_val.num_queue, True) |
|||
import java.text.ParseException; |
|||
import java.text.SimpleDateFormat; |
|||
import java.util.Calendar; |
|||
import java.util.GregorianCalendar; |
|||
import java.util.Iterator; |
|||
import java.util.LinkedList; |
|||
import java.util.Locale; |
|||
import java.util.TimeZone; |
|||
# make sure all curly braces are matched |
|||
import net.sourceforge.jwbf.actions.mw.MediaWiki; |
|||
if str_queue.count('{{') != str_queue.count('}}'): |
|||
import net.sourceforge.jwbf.actions.mw.editing.GetRevision; |
|||
self._log_error(results_val.rgstr_errors, 'Unmatched left <nowiki>("{{") and right ("}}")</nowiki> curly braces in ' + str_link_to_queue) |
|||
import net.sourceforge.jwbf.actions.mw.util.ActionException; |
|||
return results_val |
|||
import net.sourceforge.jwbf.bots.MediaWikiBot; |
|||
import net.sourceforge.jwbf.bots.util.LoginData; |
|||
import net.sourceforge.jwbf.contentRep.mw.Article; |
|||
import net.sourceforge.jwbf.contentRep.mw.SimpleArticle; |
|||
# make sure the queue has {{DYKbotdo}} |
|||
import org.apache.log4j.BasicConfigurator; |
|||
has_dykbotdo, results_val.str_dykbotdo_signature = DYKUpdateBotUtils.parse_dykbotdo(str_queue) |
|||
import org.apache.log4j.Level; |
|||
if not has_dykbotdo: |
|||
import org.apache.log4j.Logger; |
|||
self._post_almost_late_message_to_WTDYK(time_set_leaving, results_val.num_queue) |
|||
import org.jdom.Document; |
|||
self._log_error(results_val.rgstr_errors, str_link_to_queue + ' is not tagged with {{tl|DYKbotdo}}') |
|||
import org.jdom.Element; |
|||
return results_val |
|||
import org.jdom.Namespace; |
|||
import org.jdom.input.SAXBuilder; |
|||
# make sure the queue has <!--Hooks--> and <!--HooksEnd--> and find hooks |
|||
public abstract class EnWikiBot { |
|||
results_val.hooks_incoming = DYKUpdateBotUtils.extract_hooks(str_queue) |
|||
static { |
|||
if results_val.hooks_incoming is None: |
|||
TimeZone.setDefault(TimeZone.getTimeZone("Coordinated Universal Time")); |
|||
self._log_error(results_val.rgstr_errors, str_link_to_queue + ' is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>') |
|||
} |
|||
return results_val |
|||
public static final Namespace ns = Namespace.NO_NAMESPACE; |
|||
protected static final Locale BotLocale = Locale.forLanguageTag("en-US"); |
|||
protected static final SimpleDateFormat APITimestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", BotLocale); |
|||
protected static final String OverrideEditConflicts = "9999-12-31T23:59:59Z"; |
|||
protected static final String BaseEnWikiAPIURL = "https://en.wikipedia.org/w/"; |
|||
final String purgeLoc; |
|||
protected final int timeBetweenEdits; |
|||
protected final int numExceptionsBeforeAttemptedReset; |
|||
protected final String userName; |
|||
private final String password; |
|||
protected long lastRevId; |
|||
protected long lastDelId; |
|||
private MediaWikiBot enBot; |
|||
abstract protected boolean isOn(); |
|||
abstract protected void checkIfReset(); |
|||
public EnWikiBot(int timeBetweenEdits, int numExceptionsBeforeAttemptedReset, |
|||
String purgeLoc, String userName, String password) { |
|||
this.timeBetweenEdits = timeBetweenEdits; |
|||
this.numExceptionsBeforeAttemptedReset = numExceptionsBeforeAttemptedReset; |
|||
this.purgeLoc = purgeLoc; |
|||
this.userName = userName; |
|||
this.password = password; |
|||
String processInfo = ManagementFactory.getRuntimeMXBean().getName(); |
|||
log("PID: " + processInfo.substring(0, processInfo.indexOf('@'))); |
|||
log(Locale.getDefault().toLanguageTag()); |
|||
try { |
|||
enBot = new MediaWikiBot(BaseEnWikiAPIURL); |
|||
} catch (MalformedURLException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
lastRevId = getLastRevId(); |
|||
} |
|||
/** |
|||
* Gets the revision ID of the last edit made by the bot |
|||
* This function is used to make sure that the bot really has edited when it thinks it has |
|||
* This function is affected by server lag |
|||
* @return last revision ID |
|||
*/ |
|||
protected long getLastRevId() { |
|||
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query&list=usercontribs" + |
|||
"&uclimit=1&ucprop=ids&ucuser=" + userName; |
|||
Document doc = fetchUsingSAXBuilder(apiURL); |
|||
Element editInfo = doc.getRootElement().getChild("query", ns).getChild("usercontribs", ns).getChild("item", ns); |
|||
return Long.parseLong(editInfo.getAttributeValue("revid")); |
|||
} |
|||
/** |
|||
* Gets the revision ID of the last edit made by the bot at the given page |
|||
* This function is used to make sure that the bot really has edited when it thinks it has |
|||
* This function is not affected by server lag |
|||
* @return last revision ID |
|||
*/ |
|||
protected long getLastRevId(String title) { |
|||
title = MediaWiki.encode(title); |
|||
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query&prop=revisions" + |
|||
"&rvprop=ids&rvlimit=1&rvuser=" + userName + "&titles=" + title; |
|||
Document doc = fetchUsingSAXBuilder(apiURL); |
|||
Element pageInfo = doc.getRootElement().getChild("query", ns).getChild("pages", ns).getChild("page", ns); |
|||
if (pageInfo.getChildren().size() == 0) return 0; // the page has never been edited by the bot |
|||
Element editInfo = pageInfo.getChild("revisions", ns).getChild("rev", ns); |
|||
return Long.parseLong(editInfo.getAttributeValue("revid")); |
|||
} |
|||
/** |
|||
* Finds the redirects to the given page |
|||
* @param article title |
|||
* @param limit of the number of redirects to fetch |
|||
* @return list of pages that redirect to the given page |
|||
*/ |
|||
@SuppressWarnings("unchecked") |
|||
protected LinkedList<String> findRedirectsToPage(String title, int limit) { |
|||
LinkedList<String> redirects = new LinkedList<String>(); |
|||
String getRedirectsURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query" + |
|||
"&list=backlinks&blfilterredir=redirects&blnamespace=0&bllimit=" + limit + "&bltitle=" + |
|||
MediaWiki.encode(title); |
|||
Document redirectsInfo = fetchUsingSAXBuilder(getRedirectsURL); |
|||
Element backlinks = redirectsInfo.getRootElement().getChild("query", ns).getChild("backlinks", ns); |
|||
Iterator<Element> redirectIter = backlinks.getDescendants(); |
|||
while (redirectIter.hasNext()) { |
|||
redirects.add(redirectIter.next().getAttributeValue("title")); |
|||
} |
|||
return redirects; |
|||
} |
|||
/** |
|||
* Checks if the current page is a redirect by parsing the page |
|||
* @param text on the page |
|||
* @return null if the page isn't a redirect, name of the "redirect to" page if it is |
|||
*/ |
|||
protected String checkForPageRedirect(String pageText) |
|||
{ |
|||
String redirectTo = null; |
|||
if (pageText.toLowerCase().trim().startsWith("#redirect")) { |
|||
int linkStartIndex = pageText.indexOf("[[") + 2; |
|||
int linkPipeIndex = pageText.indexOf("|", linkStartIndex); |
|||
int linkEndIndex = pageText.indexOf("]]", linkStartIndex); |
|||
if (linkStartIndex < linkPipeIndex && linkPipeIndex < linkEndIndex) { |
|||
linkEndIndex = linkPipeIndex; |
|||
} |
|||
redirectTo = pageText.substring(linkStartIndex, linkEndIndex); |
|||
if (redirectTo.indexOf("#") != -1) { |
|||
redirectTo = redirectTo.substring(0, redirectTo.indexOf("#")); |
|||
} |
|||
} |
|||
return redirectTo; |
|||
} |
|||
/** |
|||
* Converts a wiki timestamp (like "2009-01-17T23:45:32Z") to a Java Calendar |
|||
* @param wikiTimestamp in wiki format |
|||
* @return Calendar set to the specified time in UTC |
|||
* @throws ParseException |
|||
*/ |
|||
public Calendar convertWikiTimestamp(String wikiTimestamp) throws ParseException { |
|||
GregorianCalendar time = new GregorianCalendar(BotLocale); |
|||
time.setTime(APITimestampFormat.parse(wikiTimestamp)); |
|||
return time; |
|||
} |
|||
# make sure the image/file is protected |
|||
/** |
|||
results_val.file_incoming = DYKUpdateBotUtils.find_file(results_val.hooks_incoming) |
|||
* Fetches the URL using SAXBuilder |
|||
if results_val.file_incoming: |
|||
* If an exception is thrown, the bot will wait at least 5 seconds before attempting again |
|||
str_protection_error = DYKUpdateBotUtils.check_if_protected(results_val.file_incoming, time_set_leaving) |
|||
* @param url you want to fetch (should be formatted in XML) |
|||
if str_protection_error: |
|||
* @return the XML tree in the form of a Document |
|||
self._log_error(results_val.rgstr_errors, str_protection_error) |
|||
*/ |
|||
else: |
|||
protected Document fetchUsingSAXBuilder(String url) { |
|||
self._log_warning(results_val.rgstr_warnings, 'Can\'t find the image / file for incoming DYK set\n') |
|||
int exceptionCounter = 0; |
|||
do { |
|||
try { |
|||
return new SAXBuilder().build(url); |
|||
} catch (Exception e) { |
|||
exceptionCounter++; |
|||
log("SAXbuilder exception caught, #" + exceptionCounter); |
|||
if (exceptionCounter > numExceptionsBeforeAttemptedReset) { |
|||
checkIfReset(); |
|||
} |
|||
// wait at least 5 seconds and at most an hour before attempting another read |
|||
sleep(Math.min(5000 + (1000 * exceptionCounter), 3600000)); |
|||
} |
|||
} while (true); |
|||
} |
|||
/** |
|||
* Checks if the bot is logged in, and logs in if not |
|||
* There's no easy way to tell if the bot's logged in, so the bot null edits its userpage |
|||
* and checks if its username shows up in the correct variables in the returned HTML |
|||
* Wikipedia automatically logs out a user one month after login |
|||
*/ |
|||
protected void checkifLoggedIn() { |
|||
SimpleArticle userpage = readContent("User:" + userName); |
|||
userpage.setEditSummary(""); |
|||
try { |
|||
String userpageHTML = enBot.performAction(new PostModifyContentWithEditConflicts(userpage)); |
|||
if (!userpageHTML.contains("\"wgUserName\":\"" + userName + "\"")) { |
|||
// the bot got logged off somehow |
|||
log("Logging in"); |
|||
login(); |
|||
} |
|||
} catch (Exception e) { |
|||
log("Caught exception during null edit on login check"); |
|||
} |
|||
} |
|||
/** |
|||
* Logs in to the wiki |
|||
*/ |
|||
protected void login() { |
|||
int exceptionCounter = 0; |
|||
LoginData login = new LoginData(); |
|||
do { |
|||
try { |
|||
enBot.performAction(new PostLoginNew(userName, password, login)); |
|||
return; |
|||
} catch (Exception e) { |
|||
exceptionCounter++; |
|||
log("Exception caught while logging in"); |
|||
if (exceptionCounter > numExceptionsBeforeAttemptedReset) { |
|||
checkIfReset(); |
|||
} |
|||
// wait at least 5 seconds and at most an hour before attempting another login |
|||
sleep(Math.min(5000 + (1000 * exceptionCounter), 3600000)); |
|||
} |
|||
} while (true); |
|||
} |
|||
/** |
|||
* Purges the given page |
|||
*/ |
|||
protected String purge(String page, boolean sleep) { |
|||
int loopCounter = 0; |
|||
do { |
|||
try { |
|||
String xmlReply = enBot.performAction(new PostPurge(page)); |
|||
if (xmlReply == null) throw new ActionException(); |
|||
if (sleep) sleep(timeBetweenEdits * 1000); |
|||
return xmlReply; |
|||
} catch (Exception e) { |
|||
loopCounter++; |
|||
log("Purge exception caught, #" + loopCounter); |
|||
if (loopCounter > numExceptionsBeforeAttemptedReset) { |
|||
checkIfReset(); |
|||
} |
|||
// wait at least 5 seconds and at most an hour before attempting another purge |
|||
sleep(Math.min(5000 + (1000 * loopCounter), 3600000)); |
|||
} |
|||
} while (true); |
|||
} |
|||
/** |
|||
* See documentation for readContent(MediaWikiBot, String) below |
|||
*/ |
|||
protected Article readContent(String pageName) { |
|||
return readContent(enBot, pageName); |
|||
} |
|||
/** |
|||
* Reads a Wikipedia page |
|||
* If an exception is thrown (most likely because of server connection issues), the bot will wait |
|||
* at least 5 seconds until attempting again |
|||
* The time between attempts increases by 1 second each attempt, up to a maximum of 1 hour |
|||
* @param bot that specifies which wiki you're reading from |
|||
* @param page to read |
|||
* @return the article |
|||
*/ |
|||
protected Article readContent(MediaWikiBot bot, String pageName) { |
|||
int loopCounter = 0; |
|||
do { |
|||
try { |
|||
return bot.readContent(pageName, GetRevision.CONTENT | GetRevision.TIMESTAMP); |
|||
} catch (Exception e) { |
|||
loopCounter++; |
|||
log("Read exception caught, #" + loopCounter); |
|||
if (loopCounter > numExceptionsBeforeAttemptedReset) { |
|||
checkIfReset(); |
|||
} |
|||
// wait at least 5 seconds and at most an hour before attempting another read |
|||
sleep(Math.min(5000 + (1000 * loopCounter), 3600000)); |
|||
} |
|||
} while (true); |
|||
} |
|||
/** |
|||
* See documentation for writeContent(MediaWikiBot, SimpleArticle) below |
|||
*/ |
|||
protected void writeContent(SimpleArticle page) { |
|||
writeContent(enBot, page); |
|||
} |
|||
/** |
|||
* Edits a Wikipedia page |
|||
* If an exception is thrown (most likely because of server connection issues), the bot will wait |
|||
* at least 5 seconds until attempting again |
|||
* The time between attempts increases by 1 second each attempt, up to a maximum of 1 hour |
|||
* @param logged-in bot |
|||
* @param page you want to edit |
|||
*/ |
|||
protected void writeContent(MediaWikiBot bot, SimpleArticle page) { |
|||
int loopCounter = 0; |
|||
String normalizedTitle = normalizeTitle(page.getLabel()); |
|||
if (!normalizedTitle.equals(page.getLabel())) { |
|||
log("Title normalized from " + page.getLabel() + " to " + normalizedTitle); |
|||
page.setLabel(normalizedTitle); |
|||
} |
|||
do { |
|||
try { |
|||
bot.performAction(new PostModifyContentWithEditConflicts(page)); |
|||
log("Editing " + page.getLabel()); |
|||
sleep(timeBetweenEdits * 1000); |
|||
long latestRevId = getLastRevId(page.getLabel()); |
|||
if (latestRevId <= lastRevId) { // the edit didn't go through |
|||
log("Edit didn't process correctly, attempting again"); |
|||
throw new ActionException(); |
|||
} else { |
|||
lastRevId = latestRevId; |
|||
} |
|||
return; |
|||
} catch (EditConflictException e) { |
|||
if (loopCounter > 0) { |
|||
log("Newer page available, but skipping to avoid " + |
|||
"double-editing (check for edit conflicts)"); |
|||
lastRevId = getLastRevId(page.getLabel()); |
|||
return; |
|||
} else { |
|||
throw e; |
|||
} |
|||
} catch (Exception e) { |
|||
// wait at least 5 seconds and at most an hour |
|||
sleep(Math.min(5000 + (1000 * loopCounter), 3600000)); |
|||
long latestRevId = getLastRevId(page.getLabel()); |
|||
if (latestRevId > lastRevId) { // the edit did go through |
|||
log("Edit processed correctly, continuing"); |
|||
lastRevId = latestRevId; |
|||
sleep(timeBetweenEdits * 1000); |
|||
return; |
|||
} // else |
|||
loopCounter++; |
|||
log("Write exception caught, #" + loopCounter); |
|||
if (loopCounter > numExceptionsBeforeAttemptedReset) { |
|||
checkIfReset(); |
|||
} |
|||
checkifLoggedIn(); //make sure we're logged in |
|||
} |
|||
} while (true); |
|||
} |
|||
# fetch T:DYK |
|||
/** |
|||
results_val.page_TDYK = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.TDYK_LOC) |
|||
* Deletes a Wikipedia page |
|||
str_tdyk = results_val.page_TDYK.text |
|||
* If an exception is thrown (most likely because of server connection issues), the bot will wait |
|||
* at least 5 seconds before attempting again |
|||
* The time between attempts increases by 1 second each attempt, up to a maximum of 10 attempts |
|||
* @param page to delete |
|||
* @param reason for deletion |
|||
*/ |
|||
protected void deleteContent(String pageName, String reason) { |
|||
boolean errorThrown = false; |
|||
int loopCounter = 0; |
|||
do { |
|||
try { |
|||
enBot.performAction(new PostDeleteWithReason(pageName, |
|||
reason, enBot.getSiteinfo(), enBot.getUserinfo())); |
|||
log("Deleting " + pageName); |
|||
errorThrown = false; |
|||
sleep(timeBetweenEdits * 1000); |
|||
long latestDelId = getLastDelId(); |
|||
if (latestDelId <= lastDelId) { // the delete didn't go through |
|||
log("Delete didn't process correctly, attempting again"); |
|||
throw new ActionException(); |
|||
} else { |
|||
lastDelId = latestDelId; |
|||
} |
|||
return; |
|||
} catch (Exception e) { |
|||
sleep(timeBetweenEdits * 1000); |
|||
long latestDelId = getLastDelId(); |
|||
if (latestDelId > lastDelId) { // the delete did go through |
|||
log("Delete processed correctly, continuing"); |
|||
lastDelId = latestDelId; |
|||
errorThrown = false; |
|||
return; |
|||
} // else |
|||
errorThrown = true; |
|||
loopCounter++; |
|||
log("Delete exception caught, #" + loopCounter); |
|||
checkifLoggedIn(); //make sure we're logged in |
|||
// wait at least 5 seconds and at most an hour before attempting another delete |
|||
sleep(Math.min(5000 + (1000 * loopCounter), 3600000)); |
|||
} |
|||
} while (errorThrown && loopCounter < 10); |
|||
} |
|||
# make sure T:DYK has <!--Hooks--> and <!--HooksEnd--> and find hooks |
|||
/** |
|||
results_val.hooks_outgoing = DYKUpdateBotUtils.extract_hooks(str_tdyk) |
|||
* Unprotects a Wikipedia page |
|||
if results_val.hooks_outgoing is None: |
|||
* If an exception is thrown (most likely because of server connection issues), the bot will wait |
|||
self._log_error(results_val.rgstr_errors, '[[' + DYKUpdateBot.TDYK_LOC + ']] is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>') |
|||
* at least 5 seconds before attempting again |
|||
return results_val |
|||
* The time between attempts increases by 1 second each attempt, up to a maximum of 10 attempts |
|||
* @param page to unprotect |
|||
* @param reason for unprotection |
|||
*/ |
|||
protected void unprotectContent(String pageName, String reason) { |
|||
boolean errorThrown = false; |
|||
int loopCounter = 0; |
|||
do { |
|||
try { |
|||
enBot.performAction(new PostUnprotectWithReason(pageName, reason)); |
|||
log("Unprotecting " + pageName); |
|||
errorThrown = false; |
|||
sleep(timeBetweenEdits * 1000); |
|||
return; |
|||
} catch (Exception e) { |
|||
errorThrown = true; |
|||
loopCounter++; |
|||
log("Unprotect exception caught, #" + loopCounter); |
|||
checkifLoggedIn(); //make sure we're logged in |
|||
// wait at least 5 seconds and at most an hour before attempting another delete |
|||
sleep(Math.min(5000 + (1000 * loopCounter), 3600000)); |
|||
} |
|||
} while (errorThrown && loopCounter < 10); |
|||
} |
|||
/** |
|||
* Gets the log ID of the last delete action by the bot |
|||
* This function is used to make sure that the bot really has deleted when it thinks it has |
|||
* @return last deletion log ID |
|||
*/ |
|||
protected long getLastDelId() { |
|||
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query&list=logevents" + |
|||
"&letype=delete&leprop=ids&lelimit=1&leuser=" + userName; |
|||
Document doc = fetchUsingSAXBuilder(apiURL); |
|||
Element itemInfo = doc.getRootElement().getChild("query", ns).getChild("logevents", ns).getChild("item", ns); |
|||
return Long.parseLong(itemInfo.getAttributeValue("logid")); |
|||
} |
|||
/** |
|||
* Normalizes a page title so Mediawiki will like it |
|||
* @param title to be normalized, e.g. "1922–23 Nelson F.C. season" |
|||
* @return normalized title, e.g. "1922–23 Nelson F.C. season" |
|||
*/ |
|||
protected String normalizeTitle(String pageName) { |
|||
pageName = pageName.replaceAll("&", "&"); |
|||
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=query&titles=" + |
|||
MediaWiki.encode(pageName); |
|||
Document doc = fetchUsingSAXBuilder(apiURL); |
|||
Element normalized = doc.getRootElement().getChild("query", ns).getChild("normalized", ns); |
|||
if (normalized == null) return pageName; |
|||
return normalized.getChild("n", ns).getAttributeValue("to"); |
|||
} |
|||
return results_val |
|||
/** |
|||
* Expands templates in the given wikitext |
|||
* @param the wikitext to be expanded |
|||
* @return expanded wikitext |
|||
*/ |
|||
protected String expandTemplates(String wikitext) { |
|||
String apiURL = BaseEnWikiAPIURL + "api.php?format=xml&action=expandtemplates&text=" + |
|||
MediaWiki.encode(wikitext); |
|||
Document doc = fetchUsingSAXBuilder(apiURL); |
|||
return doc.getRootElement().getChildText("expandtemplates", ns); |
|||
} |
|||
/** |
|||
* Pause for the time given |
|||
* @param time to wait, in milliseconds |
|||
*/ |
|||
protected void sleep(long milliseconds) { |
|||
do { |
|||
try { |
|||
this.wait(milliseconds); |
|||
return; |
|||
} catch (InterruptedException e1) { |
|||
log ("Interrupted exception caught"); |
|||
} |
|||
} while (true); |
|||
} |
|||
protected void log(String message) { |
|||
System.out.println(message); |
|||
} |
|||
/** |
|||
* Initializes the various loggers that the JWBF uses |
|||
*/ |
|||
protected static void initializeLoggers() { |
|||
BasicConfigurator.configure(); |
|||
Logger.getLogger("org.apache.commons.httpclient").setLevel(Level.FATAL); |
|||
Logger.getLogger("httpclient.wire").setLevel(Level.FATAL); |
|||
Logger.getLogger("net.sourceforge.jwbf").setLevel(Level.FATAL); |
|||
Logger.getLogger(PostModifyContentWithEditConflicts.class).setLevel(Level.FATAL); |
|||
} |
|||
} |
|||
</source> |
|||
def update_dyk(self, time_update, results) -> None: |
|||
if results.rgstr_errors: |
|||
return |
|||
str_link_to_queue = DYKUpdateBotUtils.wikilink_to_queue(results.num_queue, False) |
|||
# replace old hooks with new hooks |
|||
<source lang="java"> |
|||
results.page_TDYK.text = results.page_TDYK.text.replace(results.hooks_outgoing, results.hooks_incoming) |
|||
import java.util.Date; |
|||
self._edit(results.page_TDYK, 'Bot automatically updating DYK template with hooks copied from ' + str_link_to_queue) |
|||
# purge the Main Page |
|||
public class DYKFile { |
|||
pywikibot.Page(pywikibot.Site(), 'Main Page').purge() |
|||
private final String filename; |
|||
private final String type; |
|||
private Date dykDate; |
|||
private final String rolloverText; |
|||
private boolean cuploaded; |
|||
private String croppedFrom; |
|||
// for backwards-compatibility with other packages |
|||
public DYKFile(String filename, String type, String rolloverText) { |
|||
this.filename = filename; |
|||
this.type = type; |
|||
this.rolloverText = rolloverText; |
|||
this.cuploaded = false; |
|||
} |
|||
public DYKFile(String filename, String type) { |
|||
this.filename = filename; |
|||
this.type = type; |
|||
this.rolloverText = null; |
|||
this.cuploaded = false; |
|||
} |
|||
public String getFilename() { |
|||
return filename; |
|||
} |
|||
public String getType() { |
|||
return type; |
|||
} |
|||
public void setDYKDate(Date dykDate) { |
|||
this.dykDate = dykDate; |
|||
} |
|||
public Date getDYKDate() { |
|||
return dykDate; |
|||
} |
|||
public void setCuploaded(boolean cuploaded) { |
|||
this.cuploaded = cuploaded; |
|||
} |
|||
public boolean getCuploaded() { |
|||
return cuploaded; |
|||
} |
|||
public void setCroppedFrom(String filename) { |
|||
croppedFrom = filename; |
|||
} |
|||
public String getCroppedFrom() { |
|||
return croppedFrom; |
|||
} |
|||
public String toStatsString() { |
|||
if (type.equals("sound")) { |
|||
return "{{DYK Listen|" + filename + "|" + rolloverText + "}}"; |
|||
} else if (type.equals("video")) { |
|||
return "{{DYK Watch|" + filename + "|" + rolloverText + "}}"; |
|||
} else { |
|||
return "[[File:" + filename + "|100x100px|" + rolloverText + "]]"; |
|||
} |
|||
} |
|||
public String toString() { |
|||
return null; // unused code |
|||
} |
|||
} |
|||
</source> |
|||
# set last update time |
|||
time_update = time_update.replace(second=0, microsecond=0) |
|||
num_minutes_drift = self._calculate_drift(time_update, results.timedelta_between_updates) |
|||
time_update_with_drift = time_update + timedelta(minutes=num_minutes_drift) |
|||
page_last_update_time = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.LAST_UPDATE_TIME_LOC) |
|||
page_last_update_time.text = time_update_with_drift.isoformat() |
|||
self._edit(page_last_update_time, 'Resetting the clock' + (', with drift' if num_minutes_drift != 0 else '')) |
|||
# archive outgoing hooks |
|||
<source lang="java"> |
|||
page_archive = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.ARCHIVE_LOC) |
|||
import net.sourceforge.jwbf.actions.Post; |
|||
page_archive.text = DYKUpdateBotUtils.archive(page_archive.text, time_update, results.hooks_outgoing) |
|||
import net.sourceforge.jwbf.actions.mw.HttpAction; |
|||
self._edit(page_archive, 'Archiving latest set') |
|||
import net.sourceforge.jwbf.actions.mw.util.MWAction; |
|||
import net.sourceforge.jwbf.actions.mw.util.ProcessException; |
|||
# credits - article talk, user talk |
|||
public class PostPurge extends MWAction { |
|||
rgcredits = self._parse_and_populate_credits(results.page_queue, results.hooks_incoming, results.file_incoming, results.rgstr_warnings) |
|||
private final Post msg; |
|||
self._tag_articles(rgcredits, time_update) |
|||
self._give_user_credits(rgcredits, results.str_dykbotdo_signature) |
|||
public PostPurge(final String title) { |
|||
super(); |
|||
Post pm = new Post("/api.php?action=purge&format=xml"); |
|||
pm.addParam("titles", title); |
|||
msg = pm; |
|||
} |
|||
public String processAllReturningText(final String s) throws ProcessException { |
|||
return s; |
|||
} |
|||
public HttpAction getNextMessage() { |
|||
return msg; |
|||
} |
|||
# clear queue |
|||
} |
|||
results.page_queue.text = '{{User:DYKUpdateBot/REMOVE THIS LINE}}' |
|||
</source> |
|||
self._edit(results.page_queue, 'Update is done, removing the hooks') |
|||
# update next queue number |
|||
num_next_queue = (results.num_queue % DYKUpdateBot.NUM_QUEUES) + 1 |
|||
page_next_queue_num = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC) |
|||
page_next_queue_num.text = str(num_next_queue) |
|||
self._edit(page_next_queue_num, 'Next queue is ' + DYKUpdateBotUtils.wikilink_to_queue(num_next_queue, False)) |
|||
# tag outgoing file |
|||
<source lang="java"> |
|||
self._tag_outgoing_file(results.hooks_outgoing, time_update) |
|||
public class DYKResetException extends RuntimeException { |
|||
private static final long serialVersionUID = 6465485908664532508L; |
|||
} |
|||
</source> |
|||
def _post_almost_late_message_to_WTDYK(self, time_set_leaving, num_next_queue) -> None: |
|||
str_timestamp = time_set_leaving.isoformat() |
|||
page_wtdyk = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.WTDYK_LOC) |
|||
if str_timestamp in page_wtdyk.text: |
|||
return # bot already posted an "almost late" message for this update, don't post again |
|||
with open(str(pathlib.Path(__file__).parent / 'almostLate.txt'), 'r', encoding='utf-8') as f: |
|||
<source lang="java"> |
|||
str_almost_late = f.read() |
|||
public class EditConflictException extends RuntimeException { |
|||
private static final long serialVersionUID = 7595756569739191727L; |
|||
} |
|||
</source> |
|||
str_almost_late = str_almost_late.replace('queueNum', str(num_next_queue)) |
|||
str_almost_late = str_almost_late.replace('hoursLeft', 'two hours') |
|||
str_almost_late = str_almost_late.replace('uniqueSetIdentifier', str_timestamp) |
|||
self._append_and_edit(DYKUpdateBot.WTDYK_LOC, str_almost_late, 'DYK is almost late') |
|||
<source lang="java"> |
|||
import org.apache.log4j.Logger; |
|||
def _calculate_drift(self, time_update, timedelta_between_updates) -> int: |
|||
import net.sourceforge.jwbf.actions.Post; |
|||
num_max_advance_minutes = 0 |
|||
import net.sourceforge.jwbf.actions.mw.HttpAction; |
|||
num_max_delay_minutes = 0 |
|||
import net.sourceforge.jwbf.actions.mw.MediaWiki; |
|||
page_drift = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.DRIFT_LOC) |
|||
import net.sourceforge.jwbf.actions.mw.editing.PostDelete; |
|||
for str_line in page_drift.text.split('\n'): |
|||
import net.sourceforge.jwbf.actions.mw.util.ProcessException; |
|||
try: |
|||
import net.sourceforge.jwbf.contentRep.mw.Siteinfo; |
|||
num_minutes_parsed = int(str_line[str_line.find(':') + 1:]) |
|||
import net.sourceforge.jwbf.contentRep.mw.Userinfo; |
|||
if 'advance' in str_line: |
|||
num_max_advance_minutes = num_minutes_parsed |
|||
elif 'delay' in str_line: |
|||
num_max_delay_minutes = num_minutes_parsed |
|||
except: |
|||
DYKUpdateBotUtils.log('Couldn\'t parse drift') |
|||
return 0 |
|||
return DYKUpdateBotUtils.calculate_drift_core(time_update, |
|||
timedelta_between_updates, |
|||
num_max_advance_minutes, |
|||
num_max_delay_minutes) |
|||
def _parse_and_populate_credits(self, page_queue, hooks_incoming, file_incoming, rgstr_warnings) -> []: |
|||
public class PostDeleteWithReason extends PostDelete { |
|||
rgcredits = DYKUpdateBotUtils.parse_credits(page_queue.text) |
|||
protected static final Logger LOG = Logger.getLogger(PostDelete.class); |
|||
fn_log_warning = partial(self._log_warning, self, rgstr_warnings) |
|||
protected final String reason; |
|||
DYKUpdateBotUtils.validate_credits_articles(rgcredits, fn_log_warning) |
|||
protected final String title; |
|||
DYKUpdateBotUtils.validate_credits_users(rgcredits, fn_log_warning) |
|||
DYKUpdateBotUtils.populate_hooks_and_file(rgcredits, hooks_incoming, file_incoming.title(with_ns=False)) |
|||
for credit in rgcredits: |
|||
if credit.str_hook is None: |
|||
self._log_warning(rgstr_warnings, 'Couldn\'t find hook for [[{{0}}]], was the hook pulled or moved to a different set?'.format(credit.str_article)) |
|||
return rgcredits |
|||
def _tag_articles(self, rgcredits, time_update) -> None: |
|||
set_tagged = set() |
|||
for credit in rgcredits: |
|||
if credit.str_article in set_tagged: |
|||
continue |
|||
str_edit_summary = None |
|||
public PostDeleteWithReason(String title, String reason, Siteinfo si, Userinfo ui) |
|||
page_talk = pywikibot.Page(pywikibot.Site(), 'Talk:' + credit.str_article) |
|||
throws ProcessException { |
|||
page_talk.text, str_edit_summary = DYKUpdateBotUtils.tag_article_history(page_talk.text, credit, time_update) |
|||
super(title, si, ui); |
|||
if not str_edit_summary: |
|||
this.reason = reason; |
|||
str_dyktalk_tag, str_edit_summary = DYKUpdateBotUtils.build_dyktalk_tag(credit, time_update) |
|||
this.title = title; |
|||
page_talk.text = DYKUpdateBotUtils.add_template_to_talk(page_talk.text, str_dyktalk_tag) |
|||
} |
|||
self._edit(page_talk, str_edit_summary) |
|||
/** |
|||
* This method is copied from PostDelete, with the reason added into the URL |
|||
*/ |
|||
@Override |
|||
protected HttpAction getSecondRequest() { |
|||
HttpAction msg = null; |
|||
if (getToken() == null || getToken().length() == 0) { |
|||
throw new IllegalArgumentException( |
|||
"The argument 'token' must not be \"" |
|||
+ String.valueOf(getToken()) + "\""); |
|||
} |
|||
if (LOG.isTraceEnabled()) { |
|||
LOG.trace("enter PostDelete.generateDeleteRequest(String)"); |
|||
} |
|||
set_tagged.add(credit.str_article) |
|||
String uS = "/api.php" + "?action=delete" + "&title=" + MediaWiki.encode(title) + |
|||
"&token=" + MediaWiki.encode(getToken()) + |
|||
"&reason=" + MediaWiki.encode(reason) + "&format=xml"; |
|||
if (LOG.isDebugEnabled()) { |
|||
LOG.debug("delete url: \"" + uS + "\""); |
|||
} |
|||
Post pm = new Post(uS); |
|||
msg = pm; |
|||
def _give_user_credits(self, rgcredits, str_dykbotdo_signature) -> None: |
|||
return msg; |
|||
str_promoting_admin = DYKUpdateBotUtils.find_user_link(str_dykbotdo_signature) |
|||
} |
|||
for credit in rgcredits: |
|||
} |
|||
if not credit.str_user_talk: |
|||
</source> |
|||
continue |
|||
str_message, str_edit_summary = DYKUpdateBotUtils.build_user_talk_credit(credit, str_dykbotdo_signature, str_promoting_admin) |
|||
self._append_and_edit(credit.str_user_talk, str_message, str_edit_summary) |
|||
def _tag_outgoing_file(self, hooks_outgoing, time_update) -> None: |
|||
file_outgoing = DYKUpdateBotUtils.find_file(hooks_outgoing) |
|||
if file_outgoing: |
|||
file_outgoing_commons = pywikibot.FilePage(pywikibot.Site().image_repository(), file_outgoing.title()) |
|||
if file_outgoing.exists() or file_outgoing_commons.exists(): |
|||
str_dykfile_tag = '{{{{DYKfile|{d.day} {d:%B}|{d.year}}}}}'.format(d=time_update) |
|||
file_outgoing.text = DYKUpdateBotUtils.add_template_to_talk(file_outgoing.text, str_dykfile_tag) |
|||
self._edit(file_outgoing, 'File appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}'.format(d=time_update)) |
|||
if ('m-cropped' in file_outgoing.text.lower()) or ('c-uploaded' in file_outgoing.text.lower()): |
|||
DYKUpdateBotUtils.log('Outgoing file "{0}" tagged with {{m-cropped}} or {{c-uploaded}}'.format(file_outgoing.title())) |
|||
else: |
|||
DYKUpdateBotUtils.log('Special case (possible bug?): Outgoing file "{0}" doesn\'t exist'.format(file_outgoing.title())) |
|||
def _post_errors(self, rgstr_warnings, rgstr_errors) -> None: |
|||
<source lang="java"> |
|||
str_output = '' |
|||
//this is almost a straight copy & paste of revision 260 of JWBF's PostLogin |
|||
str_edit_summary = 'No errors or warnings; clear' |
|||
/* |
|||
* Copyright 2007 Thomas Stock. |
|||
* |
|||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not |
|||
* use this file except in compliance with the License. You may obtain a copy of |
|||
* the License at |
|||
* |
|||
* http://www.apache.org/licenses/LICENSE-2.0 |
|||
* |
|||
* Unless required by applicable law or agreed to in writing, software |
|||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
|||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
|||
* License for the specific language governing permissions and limitations under |
|||
* the License. |
|||
* |
|||
* Contributors: |
|||
* Philipp Kohl |
|||
* Carlos Valenzuela |
|||
*/ |
|||
if rgstr_warnings: |
|||
import java.io.IOException; |
|||
str_warnings = 'Bot warnings:\n' |
|||
import java.io.Reader; |
|||
str_warnings += '\n'.join('* {0}'.format(str_warning) for str_warning in rgstr_warnings) |
|||
import java.io.StringReader; |
|||
str_output = str_warnings + '\n\n' + str_output |
|||
str_edit_summary = 'Posting latest warnings' |
|||
if rgstr_errors: |
|||
import net.sourceforge.jwbf.actions.mw.login.PostLogin; |
|||
str_errors = 'Errors blocking the bot from updating DYK:\n' |
|||
import net.sourceforge.jwbf.actions.mw.util.ProcessException; |
|||
str_errors += '\n'.join('* {0}'.format(str_error) for str_error in rgstr_errors) |
|||
import net.sourceforge.jwbf.actions.mw.util.MWAction; |
|||
str_output = str_errors + '\n\n' + str_output |
|||
str_edit_summary = 'Bot is blocked from updating DYK, posting latest errors' |
|||
page_errors = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.ERROR_OUTPUT_LOC) |
|||
import org.apache.log4j.Logger; |
|||
if page_errors.text.strip() == str_output.strip(): |
|||
import org.jdom.DataConversionException; |
|||
return # if the errors are already on the page, don't post again |
|||
import org.jdom.Document; |
|||
page_errors.text = str_output.strip() |
|||
import org.jdom.Element; |
|||
self._edit(page_errors, str_edit_summary) |
|||
import org.jdom.JDOMException; |
|||
import org.jdom.input.SAXBuilder; |
|||
import org.xml.sax.InputSource; |
|||
/** |
|||
* |
|||
* @author Thomas Stock |
|||
*/ |
|||
public class PostLoginNew extends MWAction { |
|||
private final Logger log = Logger.getLogger(PostLogin.class); |
|||
private net.sourceforge.jwbf.actions.Post msg; |
|||
# --------------------------------------------- |
|||
# Core editing |
|||
# --------------------------------------------- |
|||
# Edge cases we're handling: |
|||
private final String success = "Success"; |
|||
# * {{nobots}} |
|||
private final String wrongPass = "WrongPass"; |
|||
# * Redirects |
|||
private final String notExists = "NotExists"; |
|||
# * Page doesn't exist |
|||
private final String needToken = "NeedToken"; |
|||
# * Edit conflicts |
|||
private net.sourceforge.jwbf.bots.util.LoginData login = null; |
|||
# * Protected page |
|||
private boolean reTry = false; |
|||
def _append_and_edit(self, str_title, str_message, str_edit_summary) -> None: |
|||
private boolean reTryLimit = true; |
|||
page_to_edit = pywikibot.Page(pywikibot.Site(), str_title) |
|||
private final String username; |
|||
if page_to_edit.isRedirectPage(): |
|||
private final String pw; |
|||
page_to_edit = page_to_edit.getRedirectTarget() |
|||
if not page_to_edit.botMayEdit(): |
|||
# Attempting to save the page when botMayEdit() is False will throw an OtherPageSaveError |
|||
DYKUpdateBotUtils.log('Couldn\'t edit ' + page_to_edit.title() + ' due to {{bots}} or {{nobots}}') |
|||
return |
|||
retry = True |
|||
/** |
|||
while retry: |
|||
* |
|||
retry = False |
|||
* @param username the |
|||
try: |
|||
* @param pw password |
|||
if not page_to_edit.text.isspace(): |
|||
* @param domain a |
|||
page_to_edit.text += '\n\n' |
|||
* @param login a |
|||
page_to_edit.text += str_message |
|||
*/ |
|||
self._edit(page_to_edit, str_edit_summary) |
|||
public PostLoginNew(final String username, final String pw, net.sourceforge.jwbf.bots.util.LoginData login) { |
|||
except pywikibot.EditConflict: |
|||
super(); |
|||
retry = True |
|||
this.login = login; |
|||
DYKUpdateBotUtils.log('Edit conflicted on ' + page_to_edit.title() + ' will retry after a short nap') |
|||
this.username = username; |
|||
pywikibot.sleep(10) # sleep for 10 seconds |
|||
this.pw = pw; |
|||
page_to_edit = pywikibot.Page(pywikibot.Site(), page_to_edit.title()) |
|||
msg = getLoginMsg(username, pw, null); |
|||
def _is_on(self) -> bool: |
|||
} |
|||
with open(str(pathlib.Path(__file__).parent / 'UpdateBotSwitch.txt'), 'r', encoding='utf-8') as f: |
|||
str_file_switch = f.read() |
|||
is_file_switch_on = str_file_switch.strip().lower() == 'on' |
|||
if not is_file_switch_on: |
|||
DYKUpdateBotUtils.log('Text file switch is not "on", exiting...') |
|||
return is_file_switch_on |
|||
def _edit(self, page_to_edit, str_edit_summary) -> None: |
|||
private net.sourceforge.jwbf.actions.Post getLoginMsg(final String username, final String pw, |
|||
DYKUpdateBotUtils.log('Editing ' + page_to_edit.title()) |
|||
final String token) { |
|||
if (not page_to_edit.exists()) and DYKUpdateBotUtils.check_if_salted(page_to_edit): |
|||
net.sourceforge.jwbf.actions.Post pm = new net.sourceforge.jwbf.actions.Post("/api.php?action=login&format=xml"); |
|||
DYKUpdateBotUtils.log('Special case: ' + page_to_edit.title() + ' is salted, skipping...') |
|||
pm.addParam("lgname", username); |
|||
return |
|||
try: |
|||
page_to_edit.save(str_edit_summary, minor=False) |
|||
# For a dry run where the bot outputs to local files, comment out the above line and uncomment the lines below |
|||
} |
|||
# DYKUpdateBotUtils.log('Edit summary: ' + str_edit_summary) |
|||
return pm; |
|||
# filename = ''.join(character for character in page_to_edit.title() if character not in '\/:*?<>|"') + '.txt' |
|||
} |
|||
# with open(str(pathlib.Path(__file__).parent / 'TestResources' / filename), 'w', encoding='utf-8') as file_write: |
|||
# file_write.write(page_to_edit.text) |
|||
except pywikibot.exceptions.LockedPage: # I'm not sure it's possible to hit this with an adminbot... |
|||
DYKUpdateBotUtils.log('Special case: ' + page_to_edit.title() + ' is protected, skipping...') |
|||
def _log_error(self, rgstr_errors, str_error) -> None: |
|||
/** |
|||
rgstr_errors.append(str_error) |
|||
* {@inheritDoc} |
|||
DYKUpdateBotUtils.log('Error: ' + str_error) |
|||
*/ |
|||
@Override |
|||
public String processAllReturningText(final String s) throws ProcessException { |
|||
SAXBuilder builder = new SAXBuilder(); |
|||
Element root = null; |
|||
try { |
|||
Reader i = new StringReader(s); |
|||
Document doc = builder.build(new InputSource(i)); |
|||
def _log_warning(self, rgstr_warnings, str_warning) -> None: |
|||
root = doc.getRootElement(); |
|||
rgstr_warnings.append(str_warning) |
|||
findContent(root); |
|||
DYKUpdateBotUtils.log('Warning: ' + str_warning) |
|||
} catch (JDOMException e) { |
|||
log.error(e.getClass().getName() + e.getLocalizedMessage()); |
|||
} catch (IOException e) { |
|||
log.error(e.getClass().getName() + e.getLocalizedMessage()); |
|||
} catch (NullPointerException e) { |
|||
log.error(e.getClass().getName() + e.getLocalizedMessage()); |
|||
throw new ProcessException("No regular content was found, check your api\n::" + s); |
|||
} catch (Exception e) { |
|||
log.error(e.getClass().getName() + e.getLocalizedMessage()); |
|||
throw new ProcessException(e.getLocalizedMessage()); |
|||
} |
|||
# Set of methods broken out for easier unit testability |
|||
# Unless otherwise noted, these methods don't make network calls |
|||
# Do Not edit the wiki from within these methods, otherwise unit tests will edit the wiki! |
|||
return s; |
|||
} |
|||
/** |
|||
* |
|||
* @param startElement the, where the search begins |
|||
* @throws ProcessException if problems with login |
|||
*/ |
|||
private void findContent(final Element startElement) throws ProcessException { |
|||
class DYKUpdateBotUtils(): |
|||
Element loginEl = startElement.getChild("login", EnWikiBot.ns); |
|||
@staticmethod |
|||
String result = loginEl.getAttributeValue("result"); |
|||
def wikilink_to_queue(num_queue, capitalize) -> str: |
|||
if (result.equalsIgnoreCase(success)) { |
|||
return '[[{0}{1}|{2}ueue {1}]]'.format(DYKUpdateBot.QUEUE_ROOT_LOC, |
|||
try { |
|||
num_queue, |
|||
login.setup(loginEl.getAttribute("lguserid").getIntValue() |
|||
'Q' if capitalize else 'q') |
|||
, loginEl.getAttributeValue("lgusername"), "0", true); |
|||
} catch (DataConversionException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
} else if (result.equalsIgnoreCase(needToken) && reTryLimit ) { |
|||
msg = getLoginMsg(username, pw, loginEl.getAttributeValue("token")); |
|||
reTry = true; |
|||
reTryLimit = false; |
|||
} else if (result.equalsIgnoreCase(wrongPass)) { |
|||
throw new ProcessException("Wrong Password"); |
|||
} else if (result.equalsIgnoreCase(notExists)) { |
|||
throw new ProcessException("No sutch User"); |
|||
} |
|||
# Returns a tuple: |
|||
} |
|||
# * First value is True if dykbotdo was found, False if not |
|||
/** |
|||
# * Second value is the admin signature in dykbotdo, or None if not found |
|||
* {@inheritDoc} |
|||
@staticmethod |
|||
*/ |
|||
def parse_dykbotdo(str_queue) -> (bool, str): |
|||
public net.sourceforge.jwbf.actions.mw.HttpAction getNextMessage() { |
|||
templates_in_queue = mwparserfromhell.parse(str_queue, skip_style_tags=True).filter_templates() |
|||
return msg; |
|||
for template in templates_in_queue: |
|||
} |
|||
if template.name.matches('DYKbotdo'): |
|||
return True, str(template.get(1)) if template.has(1) else None |
|||
return False, None |
|||
# Returns: |
|||
# * Hooks if <!--Hooks--> and <!--HooksEnd--> tags are in order |
|||
* @see net.sourceforge.jwbf.mediawiki.actions.util.MWAction#hasMoreMessages() |
|||
* |
# * None if not |
||
@ |
@staticmethod |
||
def extract_hooks(str_queue_or_tdyk) -> str: |
|||
public boolean hasMoreMessages() { |
|||
idx_hooks_tag = str_queue_or_tdyk.find('<!--Hooks-->') |
|||
boolean temp = super.hasMoreMessages() || reTry; |
|||
idx_hooksend_tag = str_queue_or_tdyk.find('<!--HooksEnd-->', max(idx_hooks_tag, 0)) |
|||
reTry = false; |
|||
if min(idx_hooks_tag, idx_hooksend_tag) == -1: |
|||
return temp; |
|||
return None |
|||
} |
|||
return str_queue_or_tdyk[idx_hooks_tag + 12:idx_hooksend_tag].strip() |
|||
} |
|||
</source> |
|||
# Returns: |
|||
# * pywikibot.FilePage of the file in the DYK set if detected |
|||
# * None if not |
|||
@staticmethod |
|||
def find_file(str_hooks) -> pywikibot.FilePage: |
|||
templates_in_hooks = mwparserfromhell.parse(str_hooks, skip_style_tags=True).filter_templates() |
|||
for template in templates_in_hooks: |
|||
if template.name.matches('Main page image/DYK'): |
|||
# Note it's fine whether the parameter is File:XYZ.jpg, Image:XYZ.jpg, or XYZ.jpg |
|||
# all three formats will create the same FilePage object returning File:XYZ.jpg from title() |
|||
str_file = str(template.get('image').value) |
|||
if '{{!}}' in str_file: |
|||
DYKUpdateBotUtils.log('Special case: Stripping everything after pipe from filename "{0}"'.format(str_file)) |
|||
str_file = str_file[:str_file.find('{{!}}')] |
|||
return pywikibot.FilePage(pywikibot.Site(), str_file) |
|||
return None |
|||
# This method makes network calls to the Wikipedia API (read-only) |
|||
<source lang="java"> |
|||
# Returns: |
|||
// this is JWBF's PostModifyContent (rev 178) modified for edit conflicts and new(er) edit token requirements |
|||
# * None if protection looks good |
|||
/* |
|||
# * A string describing the issue if not |
|||
* Copyright 2007 Thomas Stock. |
|||
# Cases to validate if changing this function (leverage the unit tests!): |
|||
* |
|||
# * File that doesn't exist |
|||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not |
|||
# * File:Nlksjdkfjskdljflkdsjfame.jpg |
|||
* use this file except in compliance with the License. You may obtain a copy of |
|||
# * Fully not-protected file |
|||
* the License at |
|||
# * en:File:Emmelie de Forest Hunter & Prey.png and commons:File:Novo Selo TE 01.JPG |
|||
* |
|||
# * Fully not-protected file on Commons with an enwiki description page |
|||
* http://www.apache.org/licenses/LICENSE-2.0 |
|||
# * en:File:MET Breuer (48377070386).jpg |
|||
* |
|||
# * Semi-protected file |
|||
* Unless required by applicable law or agreed to in writing, software |
|||
# * en:File:Amy Barlow.jpg and commons:File:Flag of Palestine.svg |
|||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
|||
# * Fully protected file indefinitely protected |
|||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
|||
# * en:File:George Floyd neck knelt on by police officer.png and commons:File:Name.jpg |
|||
* License for the specific language governing permissions and limitations under |
|||
# * Fully protected file via cascading protection |
|||
* the License. |
|||
# * en:File:WPVG icon 2016.svg and commons:File:Wikitech-2020-logo.svg |
|||
* |
|||
# * Fully protected file with protection expiring before set leaves the Main Page |
|||
* Contributors: |
|||
# * Use the API to find examples: |
|||
* |
|||
# * https://commons.wikimedia.org/w/api.php?action=query&list=allpages&apnamespace=6&apprtype=edit&apprexpiry=definite&apprlevel=sysop&aplimit=500 |
|||
*/ |
|||
# * Fully protected file with protection expiring after set leaves the Main Page |
|||
import java.text.ParseException; |
|||
# * see URL above |
|||
import java.text.SimpleDateFormat; |
|||
@staticmethod |
|||
import java.util.Hashtable; |
|||
def check_if_protected(filepage, time_set_leaving) -> str: |
|||
str_file_for_output = filepage.title(as_link=True, textlink=True) |
|||
filepage_commons = pywikibot.FilePage(pywikibot.Site().image_repository(), filepage.title()) |
|||
if not (filepage.exists() or filepage_commons.exists()): |
|||
return str_file_for_output + ' does not exist' |
|||
on_commons = filepage.file_is_shared() |
|||
if on_commons: |
|||
filepage = filepage_commons |
|||
edit_protections = filepage.protection().get('edit') |
|||
if edit_protections is None: |
|||
if on_commons: |
|||
return str_file_for_output + ' is not protected; either 1) Upload the file to en.wiki, or 2) protect the file at Commons' |
|||
else: # on enwiki |
|||
return str_file_for_output + ' is not protected' |
|||
if edit_protections[0] != 'sysop': |
|||
return str_file_for_output + ' is not fully protected' |
|||
str_prot_end = edit_protections[1] |
|||
if str_prot_end == 'infinity': |
|||
return None |
|||
time_prot_end = pywikibot.Timestamp.fromISOformat(str_prot_end).replace(tzinfo=timezone.utc) |
|||
if time_prot_end < time_set_leaving: |
|||
return 'The protection for ' + str_file_for_output + ' will expire before or while it\'s on the Main Page' |
|||
return None # protection expires after set leaves the Main Page |
|||
@staticmethod |
|||
import net.sourceforge.jwbf.actions.Post; |
|||
def calculate_drift_core(time_update, timedelta_between_updates, minutes_max_advance, minutes_max_delay) -> int: |
|||
import net.sourceforge.jwbf.actions.mw.HttpAction; |
|||
seconds_per_day = 60 * 60 * 24 |
|||
import net.sourceforge.jwbf.actions.mw.MediaWiki; |
|||
seconds_least_difference_from_0000 = 60 * 60 * 24 |
|||
import net.sourceforge.jwbf.actions.mw.util.MWAction; |
|||
set_seconds_differences = set() |
|||
import net.sourceforge.jwbf.actions.mw.util.ProcessException; |
|||
time_iter = time_update |
|||
import net.sourceforge.jwbf.contentRep.mw.ContentAccessable; |
|||
while True: |
|||
import net.sourceforge.jwbf.contentRep.mw.SimpleArticle; |
|||
current_difference_from_0000 = int(time_iter.timestamp()) % seconds_per_day |
|||
if current_difference_from_0000 > (seconds_per_day / 2): |
|||
current_difference_from_0000 = -(seconds_per_day - current_difference_from_0000) |
|||
if abs(seconds_least_difference_from_0000) > abs(current_difference_from_0000): |
|||
seconds_least_difference_from_0000 = current_difference_from_0000 |
|||
if seconds_least_difference_from_0000 == 0: |
|||
break |
|||
if (current_difference_from_0000 in set_seconds_differences) or (len(set_seconds_differences) >= 24): |
|||
break |
|||
set_seconds_differences.add(current_difference_from_0000) |
|||
time_iter = time_iter + timedelta_between_updates |
|||
if seconds_least_difference_from_0000 > 0: |
|||
import org.apache.log4j.Logger; |
|||
return -min(minutes_max_advance, seconds_least_difference_from_0000 // 60) |
|||
elif seconds_least_difference_from_0000 < 0: |
|||
return min(minutes_max_delay, -seconds_least_difference_from_0000 // 60) |
|||
else: |
|||
return 0 |
|||
# This method makes network calls to the Wikipedia API (read-only) |
|||
/** |
|||
@staticmethod |
|||
* |
|||
def check_if_salted(page) -> bool: |
|||
* |
|||
create_protections = page.protection().get('create') |
|||
* Writes an article. |
|||
return create_protections and (create_protections[0] == 'sysop') |
|||
* |
|||
* |
|||
* TODO no api use. |
|||
* @author Thomas Stock |
|||
* @supportedBy MediaWiki 1.9.x, 1.10.x, 1.11.x, 1.12.x, 1.13.x, 1.14.x |
|||
* |
|||
*/ |
|||
public class PostModifyContentWithEditConflicts extends MWAction { |
|||
@staticmethod |
|||
protected static final SimpleDateFormat WpTimestampFormat = new SimpleDateFormat("yyyyMMddHHmmss"); |
|||
def archive(str_archive, time_update, hooks_outgoing) -> str: |
|||
private int numMessagesSent = 0; |
|||
str_section_heading = '==={d.day} {d:%B} {d.year}==='.format(d=time_update) |
|||
private final ContentAccessable article; |
|||
str_set_heading = '*\'\'\'\'\'{d:%H}:{d:%M}, {d.day} {d:%B} {d.year} (UTC)\'\'\'\'\''.format(d=time_update) |
|||
private static final Logger LOG = Logger.getLogger(PostModifyContentWithEditConflicts.class); |
|||
idx_this_date = str_archive.find(str_section_heading) # check if there is a section heading already for today |
|||
private Hashtable<String, String> table = new Hashtable<String, String>(); |
|||
if idx_this_date == -1: # if there isn't, create a new section heading |
|||
idx_insert_section = str_archive.find('\n', str_archive.find('<!--BOTPOINTER-->')) + 1 |
|||
str_archive = DYKUpdateBotUtils._insert_str(str_archive, idx_insert_section, str_section_heading + '\n') |
|||
idx_this_date = idx_insert_section |
|||
idx_this_date = str_archive.find('\n', idx_this_date) + 1 |
|||
return DYKUpdateBotUtils._insert_str(str_archive, idx_this_date, str_set_heading + '\n' + hooks_outgoing + '\n\n') |
|||
@staticmethod |
|||
/** |
|||
def parse_credits(str_queue) -> []: |
|||
* |
|||
templates_in_queue = mwparserfromhell.parse(str_queue, skip_style_tags=True).filter_templates() |
|||
* @param a |
|||
rgcredits = [] |
|||
for template in templates_in_queue: |
|||
*/ |
|||
if template.name.matches('DYKmake') or template.name.matches('DYKnom'): |
|||
public PostModifyContentWithEditConflicts(final ContentAccessable a) { |
|||
if not (template.has(1) and template.has(2)): |
|||
this.article = a; |
|||
continue |
|||
} |
|||
credit = DYKCredit() |
|||
credit.str_article = html.unescape(str(template.get(1).value)) |
|||
credit.str_user = html.unescape(str(template.get(2).value)) |
|||
credit.is_dykmake = template.name.matches('DYKmake') |
|||
if template.has('subpage'): |
|||
str_subpage = html.unescape(str(template.get('subpage').value)) |
|||
if str_subpage != '': |
|||
credit.str_nompage = 'Template:Did you know nominations/' + str_subpage |
|||
# sanitize |
|||
if (credit.str_article == 'Example' or credit.str_article == '' or |
|||
public HttpAction getNextMessage() { |
|||
credit.str_user == '' or credit.str_user == 'Editor' or credit.str_user == 'Nominator'): |
|||
++numMessagesSent; |
|||
continue |
|||
credit.str_article = credit.str_article.replace('[[', '').replace(']]', '') |
|||
Post postMessage = new Post("/index.php?title=" + MediaWiki.encode(article.getLabel()) + "&action=submit"); |
|||
rgcredits.append(credit) |
|||
return rgcredits |
|||
if (numMessagesSent == 1) { |
|||
return postMessage; // send off first request to grab edit token from the response |
|||
} |
|||
# This method makes network calls to the Wikipedia API (read-only) |
|||
postMessage.addParam("wpSave", "Save"); |
|||
# As "output", sets str_article on valid credits & deletes credits for nonexistent articles |
|||
@staticmethod |
|||
def validate_credits_articles(rgcredits, fn_log_warning) -> None: |
|||
# Articles: |
|||
# * expand any templates in the article name |
|||
# * delete credits for nonexistent articles |
|||
# * follow redirects |
|||
# * normalize titles |
|||
dict_processed = {} |
|||
for idx_credit in reversed(range(len(rgcredits))): |
|||
str_article_orig = rgcredits[idx_credit].str_article |
|||
if str_article_orig in dict_processed: |
|||
rgcredits[idx_credit].str_article = dict_processed[str_article_orig].str_article |
|||
continue |
|||
str_article_processed = str_article_orig |
|||
postMessage.addParam("wpUltimateParam", table.get("wpUltimateParam")); |
|||
if '}}' in str_article_processed: |
|||
str_article_processed = pywikibot.Site().expand_text(text=str_article_processed) |
|||
DYKUpdateBotUtils.log('Special case: Credit article title contains template "{0}"->"{1}"'.format(str_article_orig, str_article_processed)) |
|||
page_article = pywikibot.Page(pywikibot.Site(), str_article_processed) |
|||
if page_article.isRedirectPage(): |
|||
page_article = page_article.getRedirectTarget() |
|||
if not page_article.exists(): |
|||
fn_log_warning('Article [[{0}]] does not exist'.format(str_article_orig)) |
|||
del rgcredits[idx_credit] |
|||
continue |
|||
str_article_processed = page_article.title() |
|||
rgcredits[idx_credit].str_article = str_article_processed |
|||
dict_processed[str_article_orig] = rgcredits[idx_credit] |
|||
# This method makes network calls to the Wikipedia API (read-only) |
|||
postMessage.addParam("wpUnicodeCheck", table.get("wpUnicodeCheck")); |
|||
# As "output", sets str_user_talk on valid credits |
|||
@staticmethod |
|||
def validate_credits_users(rgcredits, fn_log_warning) -> None: |
|||
# Users: |
|||
# * expand any templates in the username |
|||
# * check for nonexistent users |
|||
# * follow redirects |
|||
# * normalize titles |
|||
dict_processed = {} |
|||
for credit in rgcredits: |
|||
str_user_orig = credit.str_user |
|||
if str_user_orig in dict_processed: |
|||
credit.str_user_talk = dict_processed[str_user_orig].str_user_talk |
|||
continue |
|||
str_user_processed = str_user_orig |
|||
postMessage.addParam("wpStarttime", table.get("wpStarttime")); |
|||
if '}}' in str_user_processed: |
|||
str_user_processed = pywikibot.Site().expand_text(text=str_user_processed) |
|||
DYKUpdateBotUtils.log('Special case: Credit username contains template "{0}"->"{1}"'.format(str_user_orig, str_user_processed)) |
|||
user = pywikibot.User(pywikibot.Site(), str_user_processed) |
|||
is_valid_user = user.isRegistered() or (user.isAnonymous() and user.last_edit) |
|||
if not is_valid_user: |
|||
# was the user recently renamed? |
|||
# example API call: https://en.wikipedia.org/w/api.php?action=query&list=logevents&letype=renameuser&letitle=User:Carrot%20official&lelimit=1 |
|||
for entry in pywikibot.Site().logevents('renameuser', page=user.title(), total=1): |
|||
if entry['params']['olduser'] == user.username: |
|||
user = pywikibot.User(pywikibot.Site(), entry['params']['newuser']) |
|||
DYKUpdateBotUtils.log('Special case: User listed in credit was renamed "{0}"->"{1}"'.format(str_user_orig, user.username)) |
|||
is_valid_user = user.isRegistered() or (user.isAnonymous() and user.last_edit) |
|||
if is_valid_user: |
|||
postMessage.addParam("wpEditToken", table.get("wpEditToken")); |
|||
page_usertalk = user.getUserTalkPage() |
|||
if page_usertalk.isRedirectPage(): |
|||
try { |
|||
DYKUpdateBotUtils.log('Special case: User talk is a redirect "{0}"'.format(page_usertalk.title())) |
|||
if (WpTimestampFormat.parse(table.get("wpEdittime")).getTime() > |
|||
page_usertalk = page_usertalk.getRedirectTarget() |
|||
((SimpleArticle) article).getEditTimestamp().getTime()) { |
|||
if page_usertalk.isTalkPage(): |
|||
throw new EditConflictException(); |
|||
# no funny business - the redirect above shouldn't make the bot, eg, tag the Main Page with a DYK credit |
|||
} |
|||
credit.str_user_talk = page_usertalk.title() |
|||
} catch (ParseException e) {} // impossible |
|||
else: |
|||
postMessage.addParam("wpEdittime", table.get("wpEdittime")); |
|||
fn_log_warning('The username \'{0}\' is invalid'.format(str_user_orig)) |
|||
dict_processed[str_user_orig] = credit |
|||
postMessage.addParam("wpTextbox1", article.getText()); |
|||
# This method makes network calls to the Wikipedia API (read-only) if: |
|||
String editSummaryText = article.getEditSummary(); |
|||
# * There's a template within the hooks |
|||
if (editSummaryText != null && editSummaryText.length() > 200) { |
|||
# * There's no string match between the article listed in the credit and the hooks - redirect search |
|||
editSummaryText = editSummaryText.substring(0, 200); |
|||
# As "output", sets str_hook and (if first hook) str_file on credits |
|||
} |
|||
@staticmethod |
|||
def populate_hooks_and_file(rgcredits, str_hooks, str_file) -> None: |
|||
# remove stuff at the top that isn't hooks (eg image) |
|||
str_hooks = str_hooks[str_hooks.rfind('\n', 0, str_hooks.find('...')):].strip() |
|||
# expand templates |
|||
postMessage.addParam("wpSummary", editSummaryText); |
|||
str_hooks_normalized = str_hooks |
|||
if (article.isMinorEdit()) { |
|||
if '}}' in str_hooks_normalized: |
|||
postMessage.addParam("wpMinoredit", "1"); |
|||
str_hooks_normalized = pywikibot.Site().expand_text(text=str_hooks_normalized) |
|||
} |
|||
# unescape HTML and replace non-breaking spaces with normal spaces |
|||
LOG.info("WRITE: " + article.getLabel()); |
|||
str_hooks_normalized = html.unescape(str_hooks_normalized).replace(html.unescape(' '), ' ') |
|||
return postMessage; |
|||
} |
|||
rghooks_orig = str_hooks.split('\n') |
|||
@Override |
|||
rghooks_normalized = str_hooks_normalized.lower().split('\n') |
|||
public boolean hasMoreMessages() { |
|||
return numMessagesSent < 2; |
|||
} |
|||
# remove any lines without '...' and trim any leading characters, like * |
|||
@Override |
|||
for idx_hook in reversed(range(len(rghooks_orig))): |
|||
public String processReturningText(String returnedHTML, HttpAction action) |
|||
str_hook = rghooks_orig[idx_hook] |
|||
throws ProcessException { |
|||
idx_that = str_hook.find('...') |
|||
if (numMessagesSent == 1) { |
|||
if idx_that == -1: |
|||
parseWpValues(returnedHTML); |
|||
del rghooks_orig[idx_hook] |
|||
LOG.debug(table); |
|||
del rghooks_normalized[idx_hook] |
|||
} |
|||
else: |
|||
return returnedHTML; |
|||
rghooks_orig[idx_hook] = str_hook[idx_that:] |
|||
} |
|||
# search for the hook for each article |
|||
/** |
|||
dict_processed = {} |
|||
* |
|||
for credit in rgcredits: |
|||
* @param text |
|||
if credit.str_article in dict_processed: |
|||
credit.str_hook = dict_processed[credit.str_article].str_hook |
|||
* @param table |
|||
credit.str_file = dict_processed[credit.str_article].str_file |
|||
* table with required values |
|||
continue |
|||
*/ |
|||
private void parseWpValues(final String text) { |
|||
String[] tParts = text.split("\n"); |
|||
// System.out.println(tParts.length); |
|||
for (int i = 0; i < tParts.length; i++) { |
|||
if (tParts[i].indexOf("wpEditToken") > 0) { |
|||
// \<input type='hidden' value=\"(.*?)\" name=\"wpEditToken\" |
|||
int begin = tParts[i].indexOf("value") + 7; |
|||
int end = tParts[i].indexOf("name") - 2; |
|||
// System.out.println(line.substring(begin, end)); |
|||
// System.out.println("read wp token:" + tParts[i]); |
|||
table.put("wpEditToken", tParts[i].substring(begin, end)); |
|||
idx_found_hook = DYKUpdateBotUtils._find_hook(credit.str_article, rghooks_normalized) |
|||
} else if (tParts[i].indexOf("wpEdittime") > 0) { |
|||
if idx_found_hook == -1: # maybe the hook links to a page that redirects to str_article? |
|||
// value="(\d+)" name=["\']wpEdittime["\'] |
|||
page_article = pywikibot.Page(pywikibot.Site(), credit.str_article) |
|||
int begin = tParts[i].indexOf("value") + 7; |
|||
for page_redirect in page_article.getReferences(filter_redirects=True, namespaces=pywikibot.site.Namespace.MAIN): |
|||
int end = tParts[i].indexOf("name") - 2; |
|||
idx_found_hook = DYKUpdateBotUtils._find_hook(page_redirect.title(), rghooks_normalized) |
|||
// System.out.println( "read wp edit: " + |
|||
if idx_found_hook != -1: |
|||
// tParts[i].substring(begin, end)); |
|||
DYKUpdateBotUtils.log('Special case: Hook matches redirect to article "{0}"'.format(credit.str_article)) |
|||
break # got a hit! no need to keep iterating through redirects |
|||
if idx_found_hook >= 0: |
|||
table.put("wpEdittime", tParts[i].substring(begin, end)); |
|||
credit.str_hook = rghooks_orig[idx_found_hook] |
|||
if idx_found_hook == 0: |
|||
credit.str_file = str_file |
|||
dict_processed[credit.str_article] = credit |
|||
} else if (tParts[i].indexOf("wpStarttime") > 0) { |
|||
// value="(\d+)" name=["\']wpStarttime["\'] |
|||
int begin = tParts[i].indexOf("value") + 7; |
|||
int end = tParts[i].indexOf("name") - 2; |
|||
// System.out.println("read wp start:" + tParts[i]); |
|||
@staticmethod |
|||
table.put("wpStarttime", tParts[i].substring(begin, end)); |
|||
def _find_hook(str_article, rghooks_normalized) -> int: |
|||
str_article_lower = str_article.lower() |
|||
for idx_hook, str_hook_normalized in enumerate(rghooks_normalized): |
|||
if str_article_lower in str_hook_normalized: |
|||
return idx_hook |
|||
return -1 |
|||
@staticmethod |
|||
} else if (tParts[i].indexOf("wpUnicodeCheck") > 0) { |
|||
def tag_article_history(str_talk, credit, time_update) -> (str, str): |
|||
// \<input type='hidden' value=\"(.*?)\" name=\"wpUnicodeCheck\" |
|||
template_ah = None |
|||
int begin = tParts[i].indexOf("value") + 7; |
|||
templates_on_talk = mwparserfromhell.parse(str_talk, skip_style_tags=True).filter_templates() |
|||
int end = tParts[i].indexOf("name", begin) - 2; |
|||
for template in templates_on_talk: |
|||
// System.out.println(line.substring(begin, end)); |
|||
tname = template.name |
|||
// System.out.println("read wp token:" + tParts[i]); |
|||
if (tname.matches('Article history') or tname.matches('Articlehistory') or |
|||
table.put("wpUnicodeCheck", tParts[i].substring(begin, end)); |
|||
tname.matches('Article History') or tname.matches('ArticleHistory') or |
|||
} else if (tParts[i].indexOf("wpUltimateParam") > 0) { |
|||
tname.matches('Article milestones') or tname.matches('Articlemilestones')): |
|||
// \<input type='hidden' value=\"(.*?)\" name=\"wpUltimateParam\" |
|||
template_ah = template |
|||
int begin = tParts[i].indexOf("value") + 7; |
|||
break |
|||
int end = tParts[i].indexOf("name", begin) - 2; |
|||
// System.out.println(line.substring(begin, end)); |
|||
// System.out.println("read wp token:" + tParts[i]); |
|||
table.put("wpUltimateParam", tParts[i].substring(begin, end)); |
|||
} |
|||
} |
|||
str_edit_summary = None |
|||
} |
|||
if template_ah: |
|||
str_edit_summary = ('Article appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}' |
|||
', adding to {{{{[[Template:Article history|Article history]]}}}}'.format(d=time_update)) |
|||
str_article_history_orig = str(template_ah) |
|||
# According to documentation at Template:Article_history, DYK params go between |currentstatus and |topic |
|||
param_topic = template_ah.get('topic') if template_ah.has('topic') else None |
|||
template_ah.add('dykdate', '{d.day} {d:%B} {d.year}'.format(d=time_update), before=param_topic) |
|||
if credit.str_hook: |
|||
template_ah.add('dykentry', credit.str_hook, before=param_topic) |
|||
if credit.str_nompage: |
|||
template_ah.add('dyknom', credit.str_nompage, before=param_topic) |
|||
str_talk = str_talk.replace(str_article_history_orig, str(template_ah)) |
|||
return str_talk, str_edit_summary |
|||
# Returns a tuple: |
|||
} |
|||
# * First value is the dyktalk tag |
|||
</source> |
|||
# * Second value is the edit summary |
|||
@staticmethod |
|||
def build_dyktalk_tag(credit, time_update) -> (str, str): |
|||
str_tag = '\n{{{{DYK talk|{d.day} {d:%B}|{d.year}{str_image_param}{str_hook_param}{str_nompage_param}}}}}'.format( |
|||
d=time_update, |
|||
str_image_param=('|image=' + credit.str_file) if credit.str_file else '', |
|||
str_hook_param=('|entry=' + credit.str_hook) if credit.str_hook else '', |
|||
str_nompage_param=('|nompage=' + credit.str_nompage) if credit.str_nompage else '') |
|||
str_edit_summary = ('Article appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}' |
|||
', adding {{{{[[Template:DYK talk|DYK talk]]}}}}'.format(d=time_update)) |
|||
return str_tag, str_edit_summary |
|||
@staticmethod |
|||
def add_template_to_talk(str_talk, str_tag) -> str: |
|||
idx_first_section = str_talk.find('==') |
|||
if idx_first_section == -1: |
|||
idx_first_section = len(str_talk) |
|||
str_header = str_talk[:idx_first_section] |
|||
idx_last_template = DYKUpdateBotUtils._last_template_index(str_header) |
|||
if (idx_last_template < len(str_talk)) and (str_talk[idx_last_template] != '\n'): |
|||
str_tag = str_tag + '\n' |
|||
return DYKUpdateBotUtils._insert_str(str_talk, idx_last_template, str_tag).strip() |
|||
@staticmethod |
|||
<source lang="java"> |
|||
def _last_template_index(str_header) -> int: |
|||
import java.io.IOException; |
|||
# To a human reader, GA / DYK etc discussions aren't templates, they're part of the content |
|||
import java.io.StringReader; |
|||
# so detect and remove them from what we consider the header |
|||
# GA discussion transclusion example from Talk:Icos: {{Talk:Icos/GA1}} |
|||
# DYK discussion transclusion example from Special:Diff/873606519: {{Did you know nominations/Bishop John Carroll (statue)}} |
|||
match = search('\{\{\s*([Tt]alk:|[Tt]emplate talk:|([Tt]emplate:\s*)?[Dd]id you know nominations/)', str_header) |
|||
if match: |
|||
str_header = str_header[:match.start()] |
|||
idx_last_template = str_header.rfind('}}') |
|||
if idx_last_template == -1: |
|||
idx_last_template = 0 |
|||
else: |
|||
idx_last_template += 2 |
|||
return idx_last_template |
|||
# Returns username if one was found, None if not |
|||
import org.jdom.Document; |
|||
@staticmethod |
|||
import org.jdom.JDOMException; |
|||
def find_user_link(str_dykbotdo_signature) -> str: |
|||
import org.jdom.input.SAXBuilder; |
|||
links_in_sig = mwparserfromhell.parse(str_dykbotdo_signature, skip_style_tags=True).filter_wikilinks() |
|||
import org.xml.sax.InputSource; |
|||
for link in links_in_sig: |
|||
str_title = str(link.title) |
|||
idx_user_or_usertalk = max(str_title.find('User:'), str_title.find('User talk:')) |
|||
if idx_user_or_usertalk != -1: |
|||
str_user = str_title[str_title.find(':', idx_user_or_usertalk) + 1:] |
|||
idx_trailing = max(str_user.find('#'), str_user.find('/')) |
|||
if idx_trailing != -1: |
|||
str_user = str_user[:idx_trailing] |
|||
return str_user |
|||
return None |
|||
# Returns a tuple: |
|||
import net.sourceforge.jwbf.actions.Get; |
|||
# * First value is the message on the talk page (section + credit + signature) |
|||
import net.sourceforge.jwbf.actions.Post; |
|||
# * Second value is the edit summary |
|||
import net.sourceforge.jwbf.actions.mw.HttpAction; |
|||
@staticmethod |
|||
import net.sourceforge.jwbf.actions.mw.MediaWiki; |
|||
def build_user_talk_credit(credit, str_dykbotdo_signature, str_promoting_admin) -> (str, str): |
|||
import net.sourceforge.jwbf.actions.mw.util.MWAction; |
|||
str_message = ('==DYK for {str_article}==\n' |
|||
'{{{{subst:Template:{str_template} |article={str_article} {str_hook_param} ' |
|||
'{str_nompage_param} |optional= }}}} {str_sig}' |
|||
.format(str_article=credit.str_article, |
|||
str_template='DYKmake/DYKmakecredit' if credit.is_dykmake else 'DYKnom/DYKnomcredit', |
|||
str_hook_param=('|hook=' + credit.str_hook) if credit.str_hook else '', |
|||
str_nompage_param=('|nompage=' + credit.str_nompage) if credit.str_nompage else '', |
|||
str_sig=(str_dykbotdo_signature + ' ~~~~~') if str_dykbotdo_signature else '~~~~')) |
|||
str_edit_summary = 'Giving DYK credit for [[{str_article}]]'.format(str_article=credit.str_article) |
|||
if str_promoting_admin: |
|||
str_edit_summary += ' on behalf of [[User:{str_username}|{str_username}]]'.format(str_username=str_promoting_admin) |
|||
return str_message, str_edit_summary |
|||
@staticmethod |
|||
public class PostUnprotectWithReason extends MWAction { |
|||
def _insert_str(str_target, idx, str_insert) -> str: |
|||
private final String title; |
|||
return str_target[:idx] + str_insert + str_target[idx:] |
|||
private final String reason; |
|||
private final Get tokenRequest; |
|||
private String token; |
|||
private boolean inHandshake = true; |
|||
private boolean finished = false; |
|||
public PostUnprotectWithReason(String title, String reason) throws JDOMException, IOException { |
|||
this.title = title; |
|||
this.reason = reason; |
|||
if (title == null || title.length() == 0) { |
|||
throw new IllegalArgumentException("The argument 'title' must not be null or empty"); |
|||
} |
|||
// URL to fetch a protect token from the API |
|||
String url = "/api.php?format=xml&action=query&prop=info&titles=" + |
|||
MediaWiki.encode(title) + "&intoken=protect"; |
|||
tokenRequest = new Get(url); |
|||
} |
|||
@Override |
|||
public String processReturningText(String s, HttpAction response) { |
|||
if (response.getRequest().equals(tokenRequest.getRequest())) { |
|||
Document tokenPage; |
|||
try { |
|||
tokenPage = new SAXBuilder().build(new InputSource(new StringReader(s))); |
|||
token = tokenPage.getRootElement().getChild("query", EnWikiBot.ns).getChild("pages", EnWikiBot.ns) |
|||
.getChild("page", EnWikiBot.ns).getAttributeValue("protecttoken"); |
|||
} catch (JDOMException e) { |
|||
throw new UnprotectException(); |
|||
} catch (IOException e) { |
|||
throw new UnprotectException(); |
|||
} |
|||
} |
|||
return ""; |
|||
} |
|||
protected HttpAction getSecondRequest() { |
|||
HttpAction unprotectRequest = null; |
|||
if (token == null || token.length() == 0) { |
|||
throw new IllegalArgumentException( |
|||
"The argument 'token' must not be \"" |
|||
+ token + "\""); |
|||
} |
|||
String bar = MediaWiki.encode("|"); |
|||
@staticmethod |
|||
String unprotectURL = "/api.php?format=xml&action=protect" + |
|||
def log(str_to_log) -> None: |
|||
"&protections=edit=all" + bar + "move=all" + bar + "upload=all" + |
|||
print(str_to_log, flush=True) |
|||
"&title=" + MediaWiki.encode(title) + |
|||
"&token=" + MediaWiki.encode(token) + |
|||
"&reason=" + MediaWiki.encode(reason); |
|||
unprotectRequest = new Post(unprotectURL); |
|||
return unprotectRequest; |
|||
} |
|||
class ValidationResults(): |
|||
@Override |
|||
def __init__(self) -> None: |
|||
public HttpAction getNextMessage() { |
|||
self.rgstr_errors = [] |
|||
if (inHandshake) { |
|||
self.rgstr_warnings = [] |
|||
inHandshake = false; |
|||
self.page_TDYK = None |
|||
return tokenRequest; |
|||
self.page_queue = None |
|||
} else { |
|||
self.num_queue = 0 |
|||
finished = true; |
|||
self.file_incoming = None |
|||
return getSecondRequest(); |
|||
self.hooks_incoming = None |
|||
} |
|||
self.hooks_outgoing = None |
|||
} |
|||
self.str_dykbotdo_signature = None |
|||
self.timedelta_between_updates = None |
|||
@Override |
|||
public boolean hasMoreMessages() { |
|||
return !finished; |
|||
class DYKCredit(): |
|||
} |
|||
def __init__(self) -> None: |
|||
self.str_article = None |
|||
public class UnprotectException extends RuntimeException { |
|||
self.str_user = None |
|||
private static final long serialVersionUID = 1L; |
|||
self.str_user_talk = None |
|||
} |
|||
self.str_nompage = None |
|||
} |
|||
self.is_dykmake = True |
|||
self.str_hook = None |
|||
self.str_file = None |
|||
def __str__(self): |
|||
return 'DYKCredit! article:{0}, user:{1}, nompage:{2}, is_dykmake:{3}, hook:{4}, file:{5}'.format( |
|||
self.str_article, self.str_user, self.str_nompage, self.is_dykmake, self.str_hook, self.str_file) |
|||
def main() -> None: |
|||
bot = DYKUpdateBot() |
|||
bot.run() |
|||
if __name__ == '__main__': |
|||
main() |
|||
</source> |
</source> |
Revision as of 05:01, 10 May 2021
Below is the code for DYKUpdateBot. The bot runs on WP:Pywikibot.
import os
import pathlib
import pywikibot
import mwparserfromhell
import html
from datetime import datetime, timedelta, timezone
from functools import partial
from re import search
class DYKUpdateBot():
TDYK_LOC = 'Template:Did you know'
NEXT_UPDATE_QUEUE_LOC = 'Template:Did you know/Queue/Next'
LAST_UPDATE_TIME_LOC = 'Template:Did you know/Next update/Time'
TIME_BETWEEN_UPDATES_LOC = 'User:DYKUpdateBot/Time Between Updates'
QUEUE_ROOT_LOC = 'Template:Did you know/Queue/'
WTDYK_LOC = 'Wikipedia talk:Did you know'
ARCHIVE_LOC = 'Wikipedia:Recent additions'
ERROR_OUTPUT_LOC = 'User:DYKUpdateBot/Errors'
DRIFT_LOC = 'User:DYKUpdateBot/ResyncDrift'
SECONDS_BETWEEN_STATUS_CHECKS = 600
NUM_QUEUES = 7
def run(self) -> None:
DYKUpdateBotUtils.log('PID: {0}'.format(os.getpid()))
pywikibot.Site().login()
while self._is_on() and pywikibot.Site().logged_in():
DYKUpdateBotUtils.log(datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S %Z'))
results = ValidationResults()
seconds_until_next_update = DYKUpdateBot.SECONDS_BETWEEN_STATUS_CHECKS # placeholder
time_next_update, time_next_update_leaving = self._calculate_next_update_time(results.rgstr_errors)
if not results.rgstr_errors:
time_now = pywikibot.Site().server_time().replace(tzinfo=timezone.utc)
seconds_until_next_update = int((time_next_update - time_now).total_seconds())
DYKUpdateBotUtils.log('Seconds left until next update: {0}'.format(seconds_until_next_update))
if seconds_until_next_update < 7200:
self.validate_before_update(results, time_next_update_leaving)
if seconds_until_next_update <= 0:
results.timedelta_between_updates = time_next_update_leaving - time_next_update
self.update_dyk(time_now, results)
self._post_errors(results.rgstr_warnings, results.rgstr_errors)
results = None
seconds_to_sleep = DYKUpdateBot.SECONDS_BETWEEN_STATUS_CHECKS
if seconds_until_next_update > 0:
seconds_to_sleep = min(seconds_to_sleep, seconds_until_next_update)
pywikibot.sleep(seconds_to_sleep)
def _calculate_next_update_time(self, rgstr_errors) -> (pywikibot.Timestamp, pywikibot.Timestamp):
page_last_update_time = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.LAST_UPDATE_TIME_LOC)
time_next_update = datetime.now(timezone.utc) # placeholder
try:
time_next_update = pywikibot.Timestamp.fromISOformat(page_last_update_time.text.strip()).replace(tzinfo=timezone.utc)
except:
self._log_error(rgstr_errors, 'Time at [[' + DYKUpdateBot.LAST_UPDATE_TIME_LOC +
']] is not formatted correctly')
return time_next_update, time_next_update
page_time_between_updates = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.TIME_BETWEEN_UPDATES_LOC)
seconds_between_updates = 0 # placeholder
try:
seconds_between_updates = int(page_time_between_updates.text)
except ValueError:
self._log_error(rgstr_errors, 'Time between updates at [[' + DYKUpdateBot.TIME_BETWEEN_UPDATES_LOC +
']] is not formatted correctly')
return time_next_update, time_next_update
time_next_update = time_next_update + timedelta(seconds=seconds_between_updates)
return time_next_update, time_next_update + timedelta(seconds=seconds_between_updates)
# Returns:
# * Int of the next queue number, parsed from NEXT_UPDATE_QUEUE_LOC
# * 0 if NEXT_UPDATE_QUEUE_LOC doesn't parse to an int
def _find_next_queue_number(self) -> int:
page = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC)
num_next_queue = 0
try:
num_next_queue = int(page.text)
except ValueError:
pass
return num_next_queue
def validate_before_update(self, results_val, time_set_leaving):
# figure out which queue to update from
results_val.num_queue = self._find_next_queue_number()
if results_val.num_queue == 0:
self._log_error(results_val.rgstr_errors, 'Could not parse [[{0}]]; check if it\'s a number 1-{1}'
.format(DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC, DYKUpdateBot.NUM_QUEUES))
return results_val
# get the wikitext of the queue
results_val.page_queue = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.QUEUE_ROOT_LOC + str(results_val.num_queue))
str_queue = results_val.page_queue.text
str_link_to_queue = DYKUpdateBotUtils.wikilink_to_queue(results_val.num_queue, True)
# make sure all curly braces are matched
if str_queue.count('{{') != str_queue.count('}}'):
self._log_error(results_val.rgstr_errors, 'Unmatched left <nowiki>("{{") and right ("}}")</nowiki> curly braces in ' + str_link_to_queue)
return results_val
# make sure the queue has {{DYKbotdo}}
has_dykbotdo, results_val.str_dykbotdo_signature = DYKUpdateBotUtils.parse_dykbotdo(str_queue)
if not has_dykbotdo:
self._post_almost_late_message_to_WTDYK(time_set_leaving, results_val.num_queue)
self._log_error(results_val.rgstr_errors, str_link_to_queue + ' is not tagged with {{tl|DYKbotdo}}')
return results_val
# make sure the queue has <!--Hooks--> and <!--HooksEnd--> and find hooks
results_val.hooks_incoming = DYKUpdateBotUtils.extract_hooks(str_queue)
if results_val.hooks_incoming is None:
self._log_error(results_val.rgstr_errors, str_link_to_queue + ' is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>')
return results_val
# make sure the image/file is protected
results_val.file_incoming = DYKUpdateBotUtils.find_file(results_val.hooks_incoming)
if results_val.file_incoming:
str_protection_error = DYKUpdateBotUtils.check_if_protected(results_val.file_incoming, time_set_leaving)
if str_protection_error:
self._log_error(results_val.rgstr_errors, str_protection_error)
else:
self._log_warning(results_val.rgstr_warnings, 'Can\'t find the image / file for incoming DYK set\n')
# fetch T:DYK
results_val.page_TDYK = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.TDYK_LOC)
str_tdyk = results_val.page_TDYK.text
# make sure T:DYK has <!--Hooks--> and <!--HooksEnd--> and find hooks
results_val.hooks_outgoing = DYKUpdateBotUtils.extract_hooks(str_tdyk)
if results_val.hooks_outgoing is None:
self._log_error(results_val.rgstr_errors, '[[' + DYKUpdateBot.TDYK_LOC + ']] is missing a <nowiki><!--Hooks--> or <!--HooksEnd--></nowiki>')
return results_val
return results_val
def update_dyk(self, time_update, results) -> None:
if results.rgstr_errors:
return
str_link_to_queue = DYKUpdateBotUtils.wikilink_to_queue(results.num_queue, False)
# replace old hooks with new hooks
results.page_TDYK.text = results.page_TDYK.text.replace(results.hooks_outgoing, results.hooks_incoming)
self._edit(results.page_TDYK, 'Bot automatically updating DYK template with hooks copied from ' + str_link_to_queue)
# purge the Main Page
pywikibot.Page(pywikibot.Site(), 'Main Page').purge()
# set last update time
time_update = time_update.replace(second=0, microsecond=0)
num_minutes_drift = self._calculate_drift(time_update, results.timedelta_between_updates)
time_update_with_drift = time_update + timedelta(minutes=num_minutes_drift)
page_last_update_time = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.LAST_UPDATE_TIME_LOC)
page_last_update_time.text = time_update_with_drift.isoformat()
self._edit(page_last_update_time, 'Resetting the clock' + (', with drift' if num_minutes_drift != 0 else ''))
# archive outgoing hooks
page_archive = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.ARCHIVE_LOC)
page_archive.text = DYKUpdateBotUtils.archive(page_archive.text, time_update, results.hooks_outgoing)
self._edit(page_archive, 'Archiving latest set')
# credits - article talk, user talk
rgcredits = self._parse_and_populate_credits(results.page_queue, results.hooks_incoming, results.file_incoming, results.rgstr_warnings)
self._tag_articles(rgcredits, time_update)
self._give_user_credits(rgcredits, results.str_dykbotdo_signature)
# clear queue
results.page_queue.text = '{{User:DYKUpdateBot/REMOVE THIS LINE}}'
self._edit(results.page_queue, 'Update is done, removing the hooks')
# update next queue number
num_next_queue = (results.num_queue % DYKUpdateBot.NUM_QUEUES) + 1
page_next_queue_num = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.NEXT_UPDATE_QUEUE_LOC)
page_next_queue_num.text = str(num_next_queue)
self._edit(page_next_queue_num, 'Next queue is ' + DYKUpdateBotUtils.wikilink_to_queue(num_next_queue, False))
# tag outgoing file
self._tag_outgoing_file(results.hooks_outgoing, time_update)
def _post_almost_late_message_to_WTDYK(self, time_set_leaving, num_next_queue) -> None:
str_timestamp = time_set_leaving.isoformat()
page_wtdyk = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.WTDYK_LOC)
if str_timestamp in page_wtdyk.text:
return # bot already posted an "almost late" message for this update, don't post again
with open(str(pathlib.Path(__file__).parent / 'almostLate.txt'), 'r', encoding='utf-8') as f:
str_almost_late = f.read()
str_almost_late = str_almost_late.replace('queueNum', str(num_next_queue))
str_almost_late = str_almost_late.replace('hoursLeft', 'two hours')
str_almost_late = str_almost_late.replace('uniqueSetIdentifier', str_timestamp)
self._append_and_edit(DYKUpdateBot.WTDYK_LOC, str_almost_late, 'DYK is almost late')
def _calculate_drift(self, time_update, timedelta_between_updates) -> int:
num_max_advance_minutes = 0
num_max_delay_minutes = 0
page_drift = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.DRIFT_LOC)
for str_line in page_drift.text.split('\n'):
try:
num_minutes_parsed = int(str_line[str_line.find(':') + 1:])
if 'advance' in str_line:
num_max_advance_minutes = num_minutes_parsed
elif 'delay' in str_line:
num_max_delay_minutes = num_minutes_parsed
except:
DYKUpdateBotUtils.log('Couldn\'t parse drift')
return 0
return DYKUpdateBotUtils.calculate_drift_core(time_update,
timedelta_between_updates,
num_max_advance_minutes,
num_max_delay_minutes)
def _parse_and_populate_credits(self, page_queue, hooks_incoming, file_incoming, rgstr_warnings) -> []:
rgcredits = DYKUpdateBotUtils.parse_credits(page_queue.text)
fn_log_warning = partial(self._log_warning, self, rgstr_warnings)
DYKUpdateBotUtils.validate_credits_articles(rgcredits, fn_log_warning)
DYKUpdateBotUtils.validate_credits_users(rgcredits, fn_log_warning)
DYKUpdateBotUtils.populate_hooks_and_file(rgcredits, hooks_incoming, file_incoming.title(with_ns=False))
for credit in rgcredits:
if credit.str_hook is None:
self._log_warning(rgstr_warnings, 'Couldn\'t find hook for [[{{0}}]], was the hook pulled or moved to a different set?'.format(credit.str_article))
return rgcredits
def _tag_articles(self, rgcredits, time_update) -> None:
set_tagged = set()
for credit in rgcredits:
if credit.str_article in set_tagged:
continue
str_edit_summary = None
page_talk = pywikibot.Page(pywikibot.Site(), 'Talk:' + credit.str_article)
page_talk.text, str_edit_summary = DYKUpdateBotUtils.tag_article_history(page_talk.text, credit, time_update)
if not str_edit_summary:
str_dyktalk_tag, str_edit_summary = DYKUpdateBotUtils.build_dyktalk_tag(credit, time_update)
page_talk.text = DYKUpdateBotUtils.add_template_to_talk(page_talk.text, str_dyktalk_tag)
self._edit(page_talk, str_edit_summary)
set_tagged.add(credit.str_article)
def _give_user_credits(self, rgcredits, str_dykbotdo_signature) -> None:
str_promoting_admin = DYKUpdateBotUtils.find_user_link(str_dykbotdo_signature)
for credit in rgcredits:
if not credit.str_user_talk:
continue
str_message, str_edit_summary = DYKUpdateBotUtils.build_user_talk_credit(credit, str_dykbotdo_signature, str_promoting_admin)
self._append_and_edit(credit.str_user_talk, str_message, str_edit_summary)
def _tag_outgoing_file(self, hooks_outgoing, time_update) -> None:
file_outgoing = DYKUpdateBotUtils.find_file(hooks_outgoing)
if file_outgoing:
file_outgoing_commons = pywikibot.FilePage(pywikibot.Site().image_repository(), file_outgoing.title())
if file_outgoing.exists() or file_outgoing_commons.exists():
str_dykfile_tag = '{{{{DYKfile|{d.day} {d:%B}|{d.year}}}}}'.format(d=time_update)
file_outgoing.text = DYKUpdateBotUtils.add_template_to_talk(file_outgoing.text, str_dykfile_tag)
self._edit(file_outgoing, 'File appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}'.format(d=time_update))
if ('m-cropped' in file_outgoing.text.lower()) or ('c-uploaded' in file_outgoing.text.lower()):
DYKUpdateBotUtils.log('Outgoing file "{0}" tagged with {{m-cropped}} or {{c-uploaded}}'.format(file_outgoing.title()))
else:
DYKUpdateBotUtils.log('Special case (possible bug?): Outgoing file "{0}" doesn\'t exist'.format(file_outgoing.title()))
def _post_errors(self, rgstr_warnings, rgstr_errors) -> None:
str_output = ''
str_edit_summary = 'No errors or warnings; clear'
if rgstr_warnings:
str_warnings = 'Bot warnings:\n'
str_warnings += '\n'.join('* {0}'.format(str_warning) for str_warning in rgstr_warnings)
str_output = str_warnings + '\n\n' + str_output
str_edit_summary = 'Posting latest warnings'
if rgstr_errors:
str_errors = 'Errors blocking the bot from updating DYK:\n'
str_errors += '\n'.join('* {0}'.format(str_error) for str_error in rgstr_errors)
str_output = str_errors + '\n\n' + str_output
str_edit_summary = 'Bot is blocked from updating DYK, posting latest errors'
page_errors = pywikibot.Page(pywikibot.Site(), DYKUpdateBot.ERROR_OUTPUT_LOC)
if page_errors.text.strip() == str_output.strip():
return # if the errors are already on the page, don't post again
page_errors.text = str_output.strip()
self._edit(page_errors, str_edit_summary)
# ---------------------------------------------
# Core editing
# ---------------------------------------------
# Edge cases we're handling:
# * {{nobots}}
# * Redirects
# * Page doesn't exist
# * Edit conflicts
# * Protected page
def _append_and_edit(self, str_title, str_message, str_edit_summary) -> None:
page_to_edit = pywikibot.Page(pywikibot.Site(), str_title)
if page_to_edit.isRedirectPage():
page_to_edit = page_to_edit.getRedirectTarget()
if not page_to_edit.botMayEdit():
# Attempting to save the page when botMayEdit() is False will throw an OtherPageSaveError
DYKUpdateBotUtils.log('Couldn\'t edit ' + page_to_edit.title() + ' due to {{bots}} or {{nobots}}')
return
retry = True
while retry:
retry = False
try:
if not page_to_edit.text.isspace():
page_to_edit.text += '\n\n'
page_to_edit.text += str_message
self._edit(page_to_edit, str_edit_summary)
except pywikibot.EditConflict:
retry = True
DYKUpdateBotUtils.log('Edit conflicted on ' + page_to_edit.title() + ' will retry after a short nap')
pywikibot.sleep(10) # sleep for 10 seconds
page_to_edit = pywikibot.Page(pywikibot.Site(), page_to_edit.title())
def _is_on(self) -> bool:
with open(str(pathlib.Path(__file__).parent / 'UpdateBotSwitch.txt'), 'r', encoding='utf-8') as f:
str_file_switch = f.read()
is_file_switch_on = str_file_switch.strip().lower() == 'on'
if not is_file_switch_on:
DYKUpdateBotUtils.log('Text file switch is not "on", exiting...')
return is_file_switch_on
def _edit(self, page_to_edit, str_edit_summary) -> None:
DYKUpdateBotUtils.log('Editing ' + page_to_edit.title())
if (not page_to_edit.exists()) and DYKUpdateBotUtils.check_if_salted(page_to_edit):
DYKUpdateBotUtils.log('Special case: ' + page_to_edit.title() + ' is salted, skipping...')
return
try:
page_to_edit.save(str_edit_summary, minor=False)
# For a dry run where the bot outputs to local files, comment out the above line and uncomment the lines below
# DYKUpdateBotUtils.log('Edit summary: ' + str_edit_summary)
# filename = ''.join(character for character in page_to_edit.title() if character not in '\/:*?<>|"') + '.txt'
# with open(str(pathlib.Path(__file__).parent / 'TestResources' / filename), 'w', encoding='utf-8') as file_write:
# file_write.write(page_to_edit.text)
except pywikibot.exceptions.LockedPage: # I'm not sure it's possible to hit this with an adminbot...
DYKUpdateBotUtils.log('Special case: ' + page_to_edit.title() + ' is protected, skipping...')
def _log_error(self, rgstr_errors, str_error) -> None:
rgstr_errors.append(str_error)
DYKUpdateBotUtils.log('Error: ' + str_error)
def _log_warning(self, rgstr_warnings, str_warning) -> None:
rgstr_warnings.append(str_warning)
DYKUpdateBotUtils.log('Warning: ' + str_warning)
# Set of methods broken out for easier unit testability
# Unless otherwise noted, these methods don't make network calls
# Do Not edit the wiki from within these methods, otherwise unit tests will edit the wiki!
class DYKUpdateBotUtils():
@staticmethod
def wikilink_to_queue(num_queue, capitalize) -> str:
return '[[{0}{1}|{2}ueue {1}]]'.format(DYKUpdateBot.QUEUE_ROOT_LOC,
num_queue,
'Q' if capitalize else 'q')
# Returns a tuple:
# * First value is True if dykbotdo was found, False if not
# * Second value is the admin signature in dykbotdo, or None if not found
@staticmethod
def parse_dykbotdo(str_queue) -> (bool, str):
templates_in_queue = mwparserfromhell.parse(str_queue, skip_style_tags=True).filter_templates()
for template in templates_in_queue:
if template.name.matches('DYKbotdo'):
return True, str(template.get(1)) if template.has(1) else None
return False, None
# Returns:
# * Hooks if <!--Hooks--> and <!--HooksEnd--> tags are in order
# * None if not
@staticmethod
def extract_hooks(str_queue_or_tdyk) -> str:
idx_hooks_tag = str_queue_or_tdyk.find('<!--Hooks-->')
idx_hooksend_tag = str_queue_or_tdyk.find('<!--HooksEnd-->', max(idx_hooks_tag, 0))
if min(idx_hooks_tag, idx_hooksend_tag) == -1:
return None
return str_queue_or_tdyk[idx_hooks_tag + 12:idx_hooksend_tag].strip()
# Returns:
# * pywikibot.FilePage of the file in the DYK set if detected
# * None if not
@staticmethod
def find_file(str_hooks) -> pywikibot.FilePage:
templates_in_hooks = mwparserfromhell.parse(str_hooks, skip_style_tags=True).filter_templates()
for template in templates_in_hooks:
if template.name.matches('Main page image/DYK'):
# Note it's fine whether the parameter is File:XYZ.jpg, Image:XYZ.jpg, or XYZ.jpg
# all three formats will create the same FilePage object returning File:XYZ.jpg from title()
str_file = str(template.get('image').value)
if '{{!}}' in str_file:
DYKUpdateBotUtils.log('Special case: Stripping everything after pipe from filename "{0}"'.format(str_file))
str_file = str_file[:str_file.find('{{!}}')]
return pywikibot.FilePage(pywikibot.Site(), str_file)
return None
# This method makes network calls to the Wikipedia API (read-only)
# Returns:
# * None if protection looks good
# * A string describing the issue if not
# Cases to validate if changing this function (leverage the unit tests!):
# * File that doesn't exist
# * File:Nlksjdkfjskdljflkdsjfame.jpg
# * Fully not-protected file
# * en:File:Emmelie de Forest Hunter & Prey.png and commons:File:Novo Selo TE 01.JPG
# * Fully not-protected file on Commons with an enwiki description page
# * en:File:MET Breuer (48377070386).jpg
# * Semi-protected file
# * en:File:Amy Barlow.jpg and commons:File:Flag of Palestine.svg
# * Fully protected file indefinitely protected
# * en:File:George Floyd neck knelt on by police officer.png and commons:File:Name.jpg
# * Fully protected file via cascading protection
# * en:File:WPVG icon 2016.svg and commons:File:Wikitech-2020-logo.svg
# * Fully protected file with protection expiring before set leaves the Main Page
# * Use the API to find examples:
# * https://commons.wikimedia.org/w/api.php?action=query&list=allpages&apnamespace=6&apprtype=edit&apprexpiry=definite&apprlevel=sysop&aplimit=500
# * Fully protected file with protection expiring after set leaves the Main Page
# * see URL above
@staticmethod
def check_if_protected(filepage, time_set_leaving) -> str:
str_file_for_output = filepage.title(as_link=True, textlink=True)
filepage_commons = pywikibot.FilePage(pywikibot.Site().image_repository(), filepage.title())
if not (filepage.exists() or filepage_commons.exists()):
return str_file_for_output + ' does not exist'
on_commons = filepage.file_is_shared()
if on_commons:
filepage = filepage_commons
edit_protections = filepage.protection().get('edit')
if edit_protections is None:
if on_commons:
return str_file_for_output + ' is not protected; either 1) Upload the file to en.wiki, or 2) protect the file at Commons'
else: # on enwiki
return str_file_for_output + ' is not protected'
if edit_protections[0] != 'sysop':
return str_file_for_output + ' is not fully protected'
str_prot_end = edit_protections[1]
if str_prot_end == 'infinity':
return None
time_prot_end = pywikibot.Timestamp.fromISOformat(str_prot_end).replace(tzinfo=timezone.utc)
if time_prot_end < time_set_leaving:
return 'The protection for ' + str_file_for_output + ' will expire before or while it\'s on the Main Page'
return None # protection expires after set leaves the Main Page
@staticmethod
def calculate_drift_core(time_update, timedelta_between_updates, minutes_max_advance, minutes_max_delay) -> int:
seconds_per_day = 60 * 60 * 24
seconds_least_difference_from_0000 = 60 * 60 * 24
set_seconds_differences = set()
time_iter = time_update
while True:
current_difference_from_0000 = int(time_iter.timestamp()) % seconds_per_day
if current_difference_from_0000 > (seconds_per_day / 2):
current_difference_from_0000 = -(seconds_per_day - current_difference_from_0000)
if abs(seconds_least_difference_from_0000) > abs(current_difference_from_0000):
seconds_least_difference_from_0000 = current_difference_from_0000
if seconds_least_difference_from_0000 == 0:
break
if (current_difference_from_0000 in set_seconds_differences) or (len(set_seconds_differences) >= 24):
break
set_seconds_differences.add(current_difference_from_0000)
time_iter = time_iter + timedelta_between_updates
if seconds_least_difference_from_0000 > 0:
return -min(minutes_max_advance, seconds_least_difference_from_0000 // 60)
elif seconds_least_difference_from_0000 < 0:
return min(minutes_max_delay, -seconds_least_difference_from_0000 // 60)
else:
return 0
# This method makes network calls to the Wikipedia API (read-only)
@staticmethod
def check_if_salted(page) -> bool:
create_protections = page.protection().get('create')
return create_protections and (create_protections[0] == 'sysop')
@staticmethod
def archive(str_archive, time_update, hooks_outgoing) -> str:
str_section_heading = '==={d.day} {d:%B} {d.year}==='.format(d=time_update)
str_set_heading = '*\'\'\'\'\'{d:%H}:{d:%M}, {d.day} {d:%B} {d.year} (UTC)\'\'\'\'\''.format(d=time_update)
idx_this_date = str_archive.find(str_section_heading) # check if there is a section heading already for today
if idx_this_date == -1: # if there isn't, create a new section heading
idx_insert_section = str_archive.find('\n', str_archive.find('<!--BOTPOINTER-->')) + 1
str_archive = DYKUpdateBotUtils._insert_str(str_archive, idx_insert_section, str_section_heading + '\n')
idx_this_date = idx_insert_section
idx_this_date = str_archive.find('\n', idx_this_date) + 1
return DYKUpdateBotUtils._insert_str(str_archive, idx_this_date, str_set_heading + '\n' + hooks_outgoing + '\n\n')
@staticmethod
def parse_credits(str_queue) -> []:
templates_in_queue = mwparserfromhell.parse(str_queue, skip_style_tags=True).filter_templates()
rgcredits = []
for template in templates_in_queue:
if template.name.matches('DYKmake') or template.name.matches('DYKnom'):
if not (template.has(1) and template.has(2)):
continue
credit = DYKCredit()
credit.str_article = html.unescape(str(template.get(1).value))
credit.str_user = html.unescape(str(template.get(2).value))
credit.is_dykmake = template.name.matches('DYKmake')
if template.has('subpage'):
str_subpage = html.unescape(str(template.get('subpage').value))
if str_subpage != '':
credit.str_nompage = 'Template:Did you know nominations/' + str_subpage
# sanitize
if (credit.str_article == 'Example' or credit.str_article == '' or
credit.str_user == '' or credit.str_user == 'Editor' or credit.str_user == 'Nominator'):
continue
credit.str_article = credit.str_article.replace('[[', '').replace(']]', '')
rgcredits.append(credit)
return rgcredits
# This method makes network calls to the Wikipedia API (read-only)
# As "output", sets str_article on valid credits & deletes credits for nonexistent articles
@staticmethod
def validate_credits_articles(rgcredits, fn_log_warning) -> None:
# Articles:
# * expand any templates in the article name
# * delete credits for nonexistent articles
# * follow redirects
# * normalize titles
dict_processed = {}
for idx_credit in reversed(range(len(rgcredits))):
str_article_orig = rgcredits[idx_credit].str_article
if str_article_orig in dict_processed:
rgcredits[idx_credit].str_article = dict_processed[str_article_orig].str_article
continue
str_article_processed = str_article_orig
if '}}' in str_article_processed:
str_article_processed = pywikibot.Site().expand_text(text=str_article_processed)
DYKUpdateBotUtils.log('Special case: Credit article title contains template "{0}"->"{1}"'.format(str_article_orig, str_article_processed))
page_article = pywikibot.Page(pywikibot.Site(), str_article_processed)
if page_article.isRedirectPage():
page_article = page_article.getRedirectTarget()
if not page_article.exists():
fn_log_warning('Article [[{0}]] does not exist'.format(str_article_orig))
del rgcredits[idx_credit]
continue
str_article_processed = page_article.title()
rgcredits[idx_credit].str_article = str_article_processed
dict_processed[str_article_orig] = rgcredits[idx_credit]
# This method makes network calls to the Wikipedia API (read-only)
# As "output", sets str_user_talk on valid credits
@staticmethod
def validate_credits_users(rgcredits, fn_log_warning) -> None:
# Users:
# * expand any templates in the username
# * check for nonexistent users
# * follow redirects
# * normalize titles
dict_processed = {}
for credit in rgcredits:
str_user_orig = credit.str_user
if str_user_orig in dict_processed:
credit.str_user_talk = dict_processed[str_user_orig].str_user_talk
continue
str_user_processed = str_user_orig
if '}}' in str_user_processed:
str_user_processed = pywikibot.Site().expand_text(text=str_user_processed)
DYKUpdateBotUtils.log('Special case: Credit username contains template "{0}"->"{1}"'.format(str_user_orig, str_user_processed))
user = pywikibot.User(pywikibot.Site(), str_user_processed)
is_valid_user = user.isRegistered() or (user.isAnonymous() and user.last_edit)
if not is_valid_user:
# was the user recently renamed?
# example API call: https://en.wikipedia.org/w/api.php?action=query&list=logevents&letype=renameuser&letitle=User:Carrot%20official&lelimit=1
for entry in pywikibot.Site().logevents('renameuser', page=user.title(), total=1):
if entry['params']['olduser'] == user.username:
user = pywikibot.User(pywikibot.Site(), entry['params']['newuser'])
DYKUpdateBotUtils.log('Special case: User listed in credit was renamed "{0}"->"{1}"'.format(str_user_orig, user.username))
is_valid_user = user.isRegistered() or (user.isAnonymous() and user.last_edit)
if is_valid_user:
page_usertalk = user.getUserTalkPage()
if page_usertalk.isRedirectPage():
DYKUpdateBotUtils.log('Special case: User talk is a redirect "{0}"'.format(page_usertalk.title()))
page_usertalk = page_usertalk.getRedirectTarget()
if page_usertalk.isTalkPage():
# no funny business - the redirect above shouldn't make the bot, eg, tag the Main Page with a DYK credit
credit.str_user_talk = page_usertalk.title()
else:
fn_log_warning('The username \'{0}\' is invalid'.format(str_user_orig))
dict_processed[str_user_orig] = credit
# This method makes network calls to the Wikipedia API (read-only) if:
# * There's a template within the hooks
# * There's no string match between the article listed in the credit and the hooks - redirect search
# As "output", sets str_hook and (if first hook) str_file on credits
@staticmethod
def populate_hooks_and_file(rgcredits, str_hooks, str_file) -> None:
# remove stuff at the top that isn't hooks (eg image)
str_hooks = str_hooks[str_hooks.rfind('\n', 0, str_hooks.find('...')):].strip()
# expand templates
str_hooks_normalized = str_hooks
if '}}' in str_hooks_normalized:
str_hooks_normalized = pywikibot.Site().expand_text(text=str_hooks_normalized)
# unescape HTML and replace non-breaking spaces with normal spaces
str_hooks_normalized = html.unescape(str_hooks_normalized).replace(html.unescape(' '), ' ')
rghooks_orig = str_hooks.split('\n')
rghooks_normalized = str_hooks_normalized.lower().split('\n')
# remove any lines without '...' and trim any leading characters, like *
for idx_hook in reversed(range(len(rghooks_orig))):
str_hook = rghooks_orig[idx_hook]
idx_that = str_hook.find('...')
if idx_that == -1:
del rghooks_orig[idx_hook]
del rghooks_normalized[idx_hook]
else:
rghooks_orig[idx_hook] = str_hook[idx_that:]
# search for the hook for each article
dict_processed = {}
for credit in rgcredits:
if credit.str_article in dict_processed:
credit.str_hook = dict_processed[credit.str_article].str_hook
credit.str_file = dict_processed[credit.str_article].str_file
continue
idx_found_hook = DYKUpdateBotUtils._find_hook(credit.str_article, rghooks_normalized)
if idx_found_hook == -1: # maybe the hook links to a page that redirects to str_article?
page_article = pywikibot.Page(pywikibot.Site(), credit.str_article)
for page_redirect in page_article.getReferences(filter_redirects=True, namespaces=pywikibot.site.Namespace.MAIN):
idx_found_hook = DYKUpdateBotUtils._find_hook(page_redirect.title(), rghooks_normalized)
if idx_found_hook != -1:
DYKUpdateBotUtils.log('Special case: Hook matches redirect to article "{0}"'.format(credit.str_article))
break # got a hit! no need to keep iterating through redirects
if idx_found_hook >= 0:
credit.str_hook = rghooks_orig[idx_found_hook]
if idx_found_hook == 0:
credit.str_file = str_file
dict_processed[credit.str_article] = credit
@staticmethod
def _find_hook(str_article, rghooks_normalized) -> int:
str_article_lower = str_article.lower()
for idx_hook, str_hook_normalized in enumerate(rghooks_normalized):
if str_article_lower in str_hook_normalized:
return idx_hook
return -1
@staticmethod
def tag_article_history(str_talk, credit, time_update) -> (str, str):
template_ah = None
templates_on_talk = mwparserfromhell.parse(str_talk, skip_style_tags=True).filter_templates()
for template in templates_on_talk:
tname = template.name
if (tname.matches('Article history') or tname.matches('Articlehistory') or
tname.matches('Article History') or tname.matches('ArticleHistory') or
tname.matches('Article milestones') or tname.matches('Articlemilestones')):
template_ah = template
break
str_edit_summary = None
if template_ah:
str_edit_summary = ('Article appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}'
', adding to {{{{[[Template:Article history|Article history]]}}}}'.format(d=time_update))
str_article_history_orig = str(template_ah)
# According to documentation at Template:Article_history, DYK params go between |currentstatus and |topic
param_topic = template_ah.get('topic') if template_ah.has('topic') else None
template_ah.add('dykdate', '{d.day} {d:%B} {d.year}'.format(d=time_update), before=param_topic)
if credit.str_hook:
template_ah.add('dykentry', credit.str_hook, before=param_topic)
if credit.str_nompage:
template_ah.add('dyknom', credit.str_nompage, before=param_topic)
str_talk = str_talk.replace(str_article_history_orig, str(template_ah))
return str_talk, str_edit_summary
# Returns a tuple:
# * First value is the dyktalk tag
# * Second value is the edit summary
@staticmethod
def build_dyktalk_tag(credit, time_update) -> (str, str):
str_tag = '\n{{{{DYK talk|{d.day} {d:%B}|{d.year}{str_image_param}{str_hook_param}{str_nompage_param}}}}}'.format(
d=time_update,
str_image_param=('|image=' + credit.str_file) if credit.str_file else '',
str_hook_param=('|entry=' + credit.str_hook) if credit.str_hook else '',
str_nompage_param=('|nompage=' + credit.str_nompage) if credit.str_nompage else '')
str_edit_summary = ('Article appeared on [[WP:Did you know|DYK]] on {d.day} {d:%B} {d.year}'
', adding {{{{[[Template:DYK talk|DYK talk]]}}}}'.format(d=time_update))
return str_tag, str_edit_summary
@staticmethod
def add_template_to_talk(str_talk, str_tag) -> str:
idx_first_section = str_talk.find('==')
if idx_first_section == -1:
idx_first_section = len(str_talk)
str_header = str_talk[:idx_first_section]
idx_last_template = DYKUpdateBotUtils._last_template_index(str_header)
if (idx_last_template < len(str_talk)) and (str_talk[idx_last_template] != '\n'):
str_tag = str_tag + '\n'
return DYKUpdateBotUtils._insert_str(str_talk, idx_last_template, str_tag).strip()
@staticmethod
def _last_template_index(str_header) -> int:
# To a human reader, GA / DYK etc discussions aren't templates, they're part of the content
# so detect and remove them from what we consider the header
# GA discussion transclusion example from Talk:Icos: {{Talk:Icos/GA1}}
# DYK discussion transclusion example from Special:Diff/873606519: {{Did you know nominations/Bishop John Carroll (statue)}}
match = search('\{\{\s*([Tt]alk:|[Tt]emplate talk:|([Tt]emplate:\s*)?[Dd]id you know nominations/)', str_header)
if match:
str_header = str_header[:match.start()]
idx_last_template = str_header.rfind('}}')
if idx_last_template == -1:
idx_last_template = 0
else:
idx_last_template += 2
return idx_last_template
# Returns username if one was found, None if not
@staticmethod
def find_user_link(str_dykbotdo_signature) -> str:
links_in_sig = mwparserfromhell.parse(str_dykbotdo_signature, skip_style_tags=True).filter_wikilinks()
for link in links_in_sig:
str_title = str(link.title)
idx_user_or_usertalk = max(str_title.find('User:'), str_title.find('User talk:'))
if idx_user_or_usertalk != -1:
str_user = str_title[str_title.find(':', idx_user_or_usertalk) + 1:]
idx_trailing = max(str_user.find('#'), str_user.find('/'))
if idx_trailing != -1:
str_user = str_user[:idx_trailing]
return str_user
return None
# Returns a tuple:
# * First value is the message on the talk page (section + credit + signature)
# * Second value is the edit summary
@staticmethod
def build_user_talk_credit(credit, str_dykbotdo_signature, str_promoting_admin) -> (str, str):
str_message = ('==DYK for {str_article}==\n'
'{{{{subst:Template:{str_template} |article={str_article} {str_hook_param} '
'{str_nompage_param} |optional= }}}} {str_sig}'
.format(str_article=credit.str_article,
str_template='DYKmake/DYKmakecredit' if credit.is_dykmake else 'DYKnom/DYKnomcredit',
str_hook_param=('|hook=' + credit.str_hook) if credit.str_hook else '',
str_nompage_param=('|nompage=' + credit.str_nompage) if credit.str_nompage else '',
str_sig=(str_dykbotdo_signature + ' ~~~~~') if str_dykbotdo_signature else '~~~~'))
str_edit_summary = 'Giving DYK credit for [[{str_article}]]'.format(str_article=credit.str_article)
if str_promoting_admin:
str_edit_summary += ' on behalf of [[User:{str_username}|{str_username}]]'.format(str_username=str_promoting_admin)
return str_message, str_edit_summary
@staticmethod
def _insert_str(str_target, idx, str_insert) -> str:
return str_target[:idx] + str_insert + str_target[idx:]
@staticmethod
def log(str_to_log) -> None:
print(str_to_log, flush=True)
class ValidationResults():
def __init__(self) -> None:
self.rgstr_errors = []
self.rgstr_warnings = []
self.page_TDYK = None
self.page_queue = None
self.num_queue = 0
self.file_incoming = None
self.hooks_incoming = None
self.hooks_outgoing = None
self.str_dykbotdo_signature = None
self.timedelta_between_updates = None
class DYKCredit():
def __init__(self) -> None:
self.str_article = None
self.str_user = None
self.str_user_talk = None
self.str_nompage = None
self.is_dykmake = True
self.str_hook = None
self.str_file = None
def __str__(self):
return 'DYKCredit! article:{0}, user:{1}, nompage:{2}, is_dykmake:{3}, hook:{4}, file:{5}'.format(
self.str_article, self.str_user, self.str_nompage, self.is_dykmake, self.str_hook, self.str_file)
def main() -> None:
bot = DYKUpdateBot()
bot.run()
if __name__ == '__main__':
main()