User:Theleekycauldron/sandbox.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
let output = []; //final table result
let papp = []; //explanation of the app/papp system is below
let api = new mw.Api();
let months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]; //this one seems self-explanatory
let dict = { //zero-padding months of the year
	"December":  "11",
	"November":  "10",
	"October":   "09",
	"September": "08",
	"August":    "07",
	"July":      "06",
	"June":      "05",
	"May":       "04",
	"April":     "03",
	"March":     "02",
	"February":  "01",
	"January":   "00"
}
function httpGet(theUrl) { //from StackOverflow; puts a GET request to an API (we'll use it to query pageviews)
    var xmlHttp = new XMLHttpRequest(); //it does stuff
    xmlHttp.open( "GET", theUrl, false ); // false for synchronous request
    xmlHttp.send( null );
    return xmlHttp.responseText;
}
function capitalizeFirstLetter(string) { //capitalizes first letter of a string
  return string.charAt(0).toUpperCase() + string.slice(1);
}
function getData(wikitext,t,jitter) { //parses a WP:Recent additions page into files, bolded articles, pageviews, and timestamps
    const splitLines = str => str.split(/\r?\n/); //function to split lines of string
    wikiarr = splitLines(wikitext); //splits wikitext by lines
    let date; //we'll use this to save timestamps later
    let r = []; //i have literally no idea what this does and i'm not willing to risk deleting it
    let multis = []; //an index of all the multi-hooks in a given set, since the app below doesn't care where it finds bolded articles in a set
    let app = []; //an "app" is the standard way of codifying a set of hooks; it contains the image filename, the timestamp, and several arrays containing data on hook performance by bolded article
    let pmultis = []; 
    papp = []; //pmultis and papp, or "previous multis" and "previous app" are a lagging indicator, to store the previous (i.e. later, since we iterate backwards from the end of the month) app and multi. We actually end up processing papps instead of apps; the papp comes after, so by subtracting the papp's timestamp from the app's, we can figure out how long the papp was on the main page
	let d;
	let setNum=0; //cheaty way of figuring out what set we're on
    for (var i=0; i<wikiarr.length; i++){ //iterate through every line of wikitext
		//the if statement here is trying to figure out what kind of wikitext we're dealing with here; if it's a hook, image, or timestamp, it needs to be processed; if not, it's water under the bridge
        if (wikiarr[i].includes(" (UTC)")) { //a line that has " (UTC)" in it is probably a timestamp, and every set of hooks is timestamped when it leaves the main page
			setNum++;
			let j=i+1;
			while (!wikiarr[j].includes(" (UTC)")){ //looking for the next timestamp down, e.g. when the set before was taken off the main page, tells us when this set was put on the main page; this while loop searches for the next timestamp in the wikitext
				j++;
				if (j == wikiarr.length){ //if we reach the end of the wikitext, then there's no point in looking for another timestamp, since the bottom set of a DYKA page actually belongs to the previous month
					j=i;
					break;
				}
			}
			d = convertToDate2(wikiarr[j].substring(6,wikiarr[j].length-11)); //Grabbing the timestamp from inside the string
			date = [goBack(d),goBack(d.addDays(-2))]; //setting the parameters in which we look for pageviews
            if (app.length > 0){ // when we come across a timestamp, we're starting a new set; before we dig into the new set, we want to process the set we just parsed and the one before it, so this is where we send it off to the processor
                if (papp.length > 0){ //but if this the first timestamp we come across, we just want to make sure this set is ready to process for the next time around; we can't compare it until we have the next (i.e. prior) set. So we only process the papp if we have both a papp and and app
                    processData(papp,app,pmultis,t);
                }
                pmultis = JSON.parse(JSON.stringify(multis)); //app becomes papp, multis becomes pmultis, freeing up app and multis for the next set; once we have a new app and multis, we can process the new papp and new pmultis
                papp = JSON.parse(JSON.stringify(app));
                app = [];
                multis = [];
            }
            app.push(wikiarr[i].substring(6,wikiarr[i].length-11)); //save the timestamp to the current app
        } else if (wikiarr[i].includes("{{main page image/DYK")){ //if this line of wikitext is the image file for a set, we're also gonna want to save this to the app, in case we need to put it in the stats table later
            let sub = wikiarr[i].split("|")[1]; //splits the wikitext line on a pipe; the second item in the new array should be our filename
			if (sub.includes("File:")){ //this is SUPPOSED to scrub the "File:" from the filename, but that doesn't freaking work; whatever, i'll put that in somwhere else
                sub = sub.substring(5);
            }
            app.push(sub.substring(sub.indexOf("=")+1)); //push the filename into the app
        } else if (wikiarr[i].substring(0,6) == "* ... "){ //if the wikitext line is a hook; if it's not a timestamp, file, or hook, we don't actually care what it is
            let re = /('{3}[^']*?\[\[.*?]].*?'{3})|(\[\[.*'{3}.*?'{3}]])/g; //... idk what it does honestly, i just fell asleep on my keyboard and now this mash looks for bolded articles in hooks (it looks for the '''[[Article]]''' wrapper)
            let matches = wikiarr[i].match(re); //save all bolded articles into an array
            let multi = []; //if it's a multihook, we'll have to save the articlenames to this so we can push it to multis later
			try {
				for (var j=0; j<matches.length; j++) { //search each match for pageviews
					//parsing the match to make sure we get the correct article title, to feed into the query
					matches[j] = matches[j].match(/\[\[.*?(\||]])/mg)[0]; //if the page is piped, e.g. '''[[Article|something else]]''', we're gonna want to separate out the "Article"
					if (matches[j].includes("|")){
						matches[j] = matches[j].substring(2,matches[j].indexOf("|"));
					} else {
						matches[j] = matches[j].substring(2,matches[j].lastIndexOf("]")-1);
					}
					if (matches[j].includes("#")){
						matches[j] = matches[j].substring(0,matches[j].indexOf("#")); // same thing if for some goddamn reason the bolded article is to a section
					}
					matches[j] = capitalizeFirstLetter(matches[j]); //capitalize the first letter of the matches, in case we got '''[[article]]'''
					if (matches.length > 1){ //if we got more than one match, save 'em all to the multis
						multi.push(matches[j]);
					}
					//querying wikimedia's pageviews api
					////the URL that we use to make the request; we're looking for pageviews only made by users, we put the article title in underscores (e.g. Josiah Bartlet -> Josiah_Bartlet) and sanitize any ?s and /s, feed our dates in as yyyymmdd00 (from the DYK date and two days prior), and if this is on the most recent recent additions page, we also want to jitter the request to make sure the api doesn't cache a wrong answer and sleep on the job; putting in a random parameter from 1 to 1000 ensures that the api generates a fresh response every time
					let url = "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/"+matches[j].replace(/ /g,"_").replace(/&nbsp;/g,"_").replace("/","%2F").replace("?","%3F")+"/daily/"+date[1]+"/"+date[0]+((((mw.config.get('wgPageName')=="Wikipedia:Recent_additions")||jitter)&&(setNum<3))?("?max-age="+randint(1000)):"");
					let result = httpGet(url); //push the URL to a get request, store the output in "result"
					result = JSON.parse(result); //use JSON.parse on the result
					let parsed = [matches[j]] //"parsed" is going to store the name of the article, the pageviews, and the hook
					result.items.forEach((item) => parsed.push(item.views)); //push the number of views from every day into parsed, because we get a bunch of other stuff, too
					while (parsed.length != 4){ //the api can be annoying, so if we don't get the correct number, we just zero-pad
						parsed.splice(1,0,0);
					}
					parsed.push(wikiarr[i].substring(2));//push the hook into the parsed
					app.push(parsed); //push the parsed into the app
				}
			} catch(err) { //if we encounter an error, spit out some basic info
				console.log(err);
				console.log(url);
				console.log("error",wikiarr[i],result,matches);
				
			}
            if (multi.length > 0){ //if this hook is a multi, put it in the larger array "multis" for the entire set
                multis.push(multi);
            }
        }
    }
    if (app.length > 0){ //process the last papp
        if (papp.length > 0){
            processData(papp,app,pmultis);
        }
        papp = JSON.parse(JSON.stringify(app));
        app = [];
        multis = [];
    }
}
function doSomethingInCaseOfError () {
    /* .. */
    console.log( 'err' );
}

function goBack(obj){
	month = obj.getMonth()+1;
	return obj.getFullYear().toString()+month.toString().padStart(2,"0")+obj.getDate().toString().padStart(2,"0")+"00";
}

Date.prototype.addDays = function(days) {
	var date = new Date(this.valueOf());
	date.setDate(date.getDate() + days);
	return date;
}

function randint(max) {
  return Math.floor(Math.random() * max);
}

async function getDYKViews(t,jitter) {
    console.log(mw.config.get('wgPageName'));
	let t1 = [];
	let t2 = []	
	let v2 = api.get( {
		prop: 'revisions',
		rvprop: 'content',
		rvlimit: 1,
		indexpageids: true,
		titles: mw.config.get('wgPageName')
	} )
	.then( function ( data ) {
		var q = data.query,
			id = q && q.pageids && q.pageids[0],
			pg = id && q.pages && q.pages[ id ],
			rv = pg && pg.revisions;
		if ( rv && rv[0] && rv[0]['*'] ) {
			t2 = rv[0]['*'];
		}
	} ).fail( doSomethingInCaseOfError );
	
	if (mw.config.get('wgPageName') != "Wikipedia:Recent_additions"){
		let s = mw.config.get('wgPageName').split("/");		
		let d = new Date(parseInt(s[1]),parseInt(dict[s[2]])+1,1);
		let d1 = Date.UTC(d.getUTCFullYear(),d.getUTCMonth(),1);
		var cd = new Date(); 
		var cd1 = Date.UTC(cd.getUTCFullYear(),cd.getUTCMonth(),1);
		let str = "";
		if (cd1 == d1){
			str = "Wikipedia:Recent_additions";
		} else {
			str = "Wikipedia:Recent_additions/"+d.getFullYear()+"/"+months[d.getMonth()];
		}
		v1 = api.get( {
			prop: 'revisions',
			rvprop: 'content',
			rvlimit: 1,
			indexpageids: true,
			titles: str
		} )
		.then( function ( data ) {
			var q = data.query,
				id = q && q.pageids && q.pageids[0],
				pg = id && q.pages && q.pages[ id ],
				rv = pg && pg.revisions;
			if ( rv && rv[0] && rv[0]['*'] ) {
				te = rv[0]['*'];
				te = te.substring(te.lastIndexOf("*\'\'\'\'\'"));
				te = "===1 "+months[d.getMonth()]+" "+d.getFullYear()+"===\n"+te;
				t1 = te;
			}
		} ).fail( doSomethingInCaseOfError );
		await Promise.all([v1,v2]);
	} else {
		await v2;
	}
	tt = t1 + t2;
	getData(tt,t,jitter);
	output.sort(function(a, b) {
	  return b[1] - a[1];
	});
	let pr = "{{DYK stats table|";
	output.forEach((item) => pr += "\n"+item[0]);
	pr += "\n}}";
	pr = pr.replace(/File:/g,""); //:)
	writeToStats(pr);
		
}
function processData(papp,app,multis,t){
    if (papp[2].length != 5){
        return null;
    }
    hours = subtract(app[0],papp[0]);
    let basefile = "[[File:{image}|100x100px]]"
    let baseviewsurl = "https://pageviews.toolforge.org/?project=en.wikipedia.org&platform=all-access&agent=user&redirects&start={startdate}&end={enddate}&pages={page_}"
	let base = "{{DYK stats table row|{page}|{file}|{date}|{views}|{vph}|{hook}{|b}}}";
	//let basemultibegin = "{{DYK stats table multi begin|{page}|{num}|{file}|{date}|{views|{vph}|{hook}}}";
	//let basemulti = "{{DYK stats table multi|{page}|{date}|{views}|{vph}}}";
	//let basetotal = "{{DYK stats table multi total|{views}|{vph}}}";
    let hookNames = [];
    papp.forEach((item) => hookNames.push(item[0]));
    for (let i=2; i<papp.length; i++) {
        
        let str = base;
        let strfile = basefile;
        let strviewsurl = baseviewsurl;
        let a = -1;
        for (let j=0; j<multis.length; j++){
            if (multis[j].includes(papp[i][0])){
                a = j;
                break;
            }
        }
        if (a != -1) { //multi hook handling
            rs = [];
            for (let j=i; j<i+multis[a].length; j++){
                rs.push(calculateViews(papp[j],hours,t));
            }
			vphs = [];
			rs.forEach((item) => vphs.push(item[1]));
			vs = [];
			rs.forEach((item) => vs.push(item[0]));
            const reducer = (previousValue, currentValue) => previousValue + currentValue;
			
            let sumh = vphs.reduce(reducer);
			let sum = vs.reduce(reducer);
			let res = "";
			console.log(multis[a],vphs,(Math.round(sumh*10)/10).toFixed(1),(sumh >= t)?":\)":null);
            if (sumh >= t){
                strs = Array.apply(null, Array(multis[a].length+1)).map(_ => "{{DYK stats table multi|");
				strs[0] = "{{DYK stats table multi begin|";
				strs[strs.length-1] = "{{DYK stats table multi total|";
                strs[0] += papp[i][0]+"|";
				if (i==2){
					strs[0] += multis[a].length+"|"+papp[1]+"|";
				} else {
					strs[0] += multis[a].length+"||";
				}
				let date = convertToDate2(papp[0]);
				let datestring = date.getUTCFullYear() + "-" + ("0" + (date.getUTCMonth()+1)).slice(-2) + "-" + ("0" + date.getUTCDate()).slice(-2);
				strs[0] += datestring + "|";
                strs[0] += numberWithCommas(rs[0][0]) + "|";
				strs[0] += numberWithCommas((Math.round(rs[0][1]*10)/10).toFixed(1)) + ((rs[0][2])?("|b="+numberWithCommas((Math.round(rs[0][3]*10)/10).toFixed(1))):"") + "|";
                strs[0] += papp[i][papp[i].length-1] +  "}}";
				for (let j=i+1; j<i+multis[a].length; j++){
					strs[j-i] += papp[j][0]+"|";
					strs[j-i] += datestring+"|";
					strs[j-i] += numberWithCommas(rs[j-i][0]) + "|";	
				    strs[j-i] += numberWithCommas((Math.round(rs[j-i][1]*10)/10).toFixed(1)) + ((rs[j-i][2])?("|b="+numberWithCommas((Math.round(rs[j-i][3]*10)/10).toFixed(1))):"") + "}}";					
				}
				strs[strs.length-1] += numberWithCommas(sum)+"|"+numberWithCommas((Math.round(sumh*10)/10).toFixed(1))+"}}";
				output.push([strs.join("\n"),sumh]);
            }
            i += multis[a].length-1;
        } else { //if hook is single, like me
            let r = calculateViews(papp[i],hours,t);
			console.log(papp[i][0]+":",(Math.round(r[1]*10)/10).toFixed(1),(r[1] >= t)?":\)":null)
            if (r[1] >= t){
                str = str.replace("{page}",papp[i][0]); //the name of the page, hyperlinked, e.g. "[[Jimmy Carter]]"
                
                if (i==2) {
                    str = str.replace("{file}",papp[1]); //the file to be used on the stats page
                } else {
                    str = str.replace("{file}","");
                }
                let date = convertToDate2(papp[0]);
				let datestring = date.getUTCFullYear() + "-" + ("0" + (date.getUTCMonth()+1)).slice(-2) + "-" + ("0" + date.getUTCDate()).slice(-2)
                let viewstring = "";
                str = str.replace("{date}",datestring);
                str = str.replace("{views}",numberWithCommas(r[0]));
                str = str.replace("{vph}",numberWithCommas((Math.round(r[1]*10)/10).toFixed(1))); //the hook views per hour
                str = str.replace("{hook}",papp[i][papp[i].length-1]); // text of the hook
				if (r[2]){
					str = str.replace("{|b}","|b="+numberWithCommas((Math.round(r[3]*10)/10).toFixed(1)));
				} else {
					str = str.replace("{|b}","");
				}
				output.push([str,r[1]]);
            }
        }
        
    }
}
function convertToDate2(str){
    str = str.split(", ");
    str[0] = str[0].split(":");
    str[1] = str[1].split(" ");
    str[1][1] = dict[str[1][1]];
    return new Date(str[1][2],str[1][1],str[1][0],str[0][0],str[0][1]);
}
function subtract(stra,strb){
	let dateb = convertToDate2(strb);
	let datea = convertToDate2(stra);
	if (datea.getDate() != dateb.getDate()){
		dateb.setHours(0);
		dateb.setMinutes(0);
	}
    return (dateb.getTime()-datea.getTime())/(3600000);
}

function indexOfMax(arr) {
    if (arr.length === 0) {
        return -1;
    }
    var max = arr[1];
    var maxIndex = 0;
    for (var i = 1; i < arr.length; i++) {
        if (arr[i] > max) {
            maxIndex = i;
            max = arr[i];
        }
    }
    return maxIndex;
}
function calculateViews(r,h,t){
    let v = 0;
    let s = false;
    if ((r[1]+r[2])/2 > 1024 || (r[3] - (r[1]+r[2])/2)/h < t){
		s = true;
		if ((r[3] - (r[1]+r[2])/2)/h < t) {
			s = false;
		}
        v = r[3] - (r[1]+r[2])/2;
	} else {
        v = r[3];
    }
    let vph = v/h;
    return [v,vph,s,(r[1]+r[2])/2];
}
function numberWithCommas(x) {
    return x.toString().replace(/\B(?=(\d{3})+(?!\d))/g, ",");
}

async function writeToStats(res){
	let statspagetext;
	statspage = api.get( {
			prop: 'revisions',
			rvprop: 'content',
			rvlimit: 1,
			indexpageids: true,
			titles: "User:Theleekycauldron/sandbox"
		}).then( function (data) {
		var q = data.query,
			id = q && q.pageids && q.pageids[0],
			pg = id && q.pages && q.pages[ id ],
			rv = pg && pg.revisions;
		if ( rv && rv[0] && rv[0]['*'] ) {
			statspagetext = rv[0]['*'];
		}
	})
	await statspage;
	let monthyear;
	if (mw.config.get('wgPageName') == "Wikipedia:Recent_additions"){
		if (papp.length == 0){
			throw new Error("Howdy there, pardner! Looks like you've tried to run a this ol' horse on a page with no manure to shovel for this month, and that just don't make jack. Check back tomorruh, maybe we can higgle over something from the hoosegow. Now, pull in your horns and light a shuck, wouldya?")
		} else {
			let date = convertToDate2(papp[0]);
			monthyear = months[date.getUTCMonth()] + " " + date.getUTCFullYear();
		}
	} else {
		let s = mw.config.get('wgPageName').split("/");
		monthyear = s[2] + " " + s[1];
	}
	console.log(monthyear);
	if (statspagetext.includes("===="+monthyear+"====")){
		sptsplit = statspagetext.split("\n");
		let i = sptsplit.length-1;
		while (!sptsplit[i].includes("===="+monthyear+"====")){i--;}
		let j=i+2;
		while (!sptsplit[j].includes("====")){
			j++;
			if (j==sptsplit.length){break;}
		}
		
		console.log(i,j);
		let resplit = res.split("\n");
		sptsplit = sptsplit.slice(0,i+1).concat(resplit,sptsplit.slice(j,sptsplit.length));
		statspagetext=sptsplit.join("\n");
	} else {
		// either we're going to the archives, or we're now creating a new table
	}
	var params = {
			action: 'edit',
			title: 'User:Theleekycauldron/sandbox',
			text: statspagetext,
			format: 'json'
		}

	api.postWithToken( 'csrf', params ).done( function ( data ) {
		console.log( data );
	} );
}

if (mw.config.get('wgPageName').includes("Wikipedia:Recent_additions")){
    $('#bodyContent').prepend('<button onclick="getDYKViews(416 + 2/3,false)">Get views!</button>');
}