Jump to content

User:Enterprisey/unreliable.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// WARNING: DO NOT USE. TESTING ONLY

// Instructions available at [[User:Headbomb/unreliable]]
// Feel free to request tweaks or additional sources to be covered on the talk page

// Adapted from https://en.wikipedia.org/w/index.php?title=Wikipedia:User_scripts/Requests&diff=939432100&oldid=939403363 by [[User:SD0001]]
// Updated from https://en.wikipedia.org/w/index.php?title=User:Jorm/unreliable.js&oldid=940556311 by [[User:Jorm]]
// Updated from https://en.wikipedia.org/w/index.php?title=User:SD0001/unreliabe.js&oldid=941114456 by [[User:SD0001]]
// Updated from https://en.wikipedia.org/w/index.php?title=User:SD0001/unreliable.js&action=edit by [[User:SD0001]]
// Updated from https://en.wikipedia.org/w/index.php?title=User:Creffett/unreliable.js&oldid=957395306 by [[User:Creffett]]
// Updated to use JSON by [[User:Enterprisey]]

// Unreliable websites based on [[WP:RSPSOURCES]] and [[WP:NPPSG]] (mostly)
// Unreliable publishers/journals based on [[WP:CITEWATCH]] (mostly)

$.when(
	$.ready,
	mw.loader.using([ 'mediawiki.api' ])
).then(function () {

	// Fetch main list of rules
	// TODO localStorage
	var mainRulesPromise = new mw.Api().get({
		action: 'query',
		prop: 'revisions',
		titles: 'Wikipedia:TESTING-DONT-USE-unreliable.json',
		rvslots: '*',
		rvprop: 'content',
		formatversion: '2',
		uselang: 'content', // needed for caching
		smaxage: '86400', // cache for 1 day
		maxage: '86400' // cache for 1 day
	}).then(function(data) {
		return JSON.parse(data.query.pages[0].revisions[0].slots.main.content);
	});
	
	var customRulesPromise;

	if (window.unreliableUseCustomRules) {

		// Dynamically load a user's custom rules from User:USERNAME/unreliable-rules.js
		customRulesPromise = mw.loader.getScript('/w/index.php?title=User:' + encodeURIComponent(mw.config.get('wgUserName')) + 
			'/unreliable-rules.js&action=raw&ctype=text/javascript')
		.fail( function(e) {
			// Something's gone very wrong
			mw.log.error("Error retrieving your unreliable-rules.js");
			// More detailed error in the console if someone feels nice enough to file a bug report
			console.log("Error getting local unreliable-rules.js: " + e.message);
		})
		.then( function () {
			// Script succeeded. You can use X now.
			if (Array.isArray(window.unreliableCustomRules)) {
				window.unreliableCustomRules.forEach(function(customRule) {
					if (!(customRule.regex instanceof RegExp) || (typeof customRule.css !== 'object')) {
						mw.log.warn("Error parsing custom unreliable links rule: ", rule);
						return [];
					}
				});
				return window.unreliableCustomRules;
			}
		});
	} else {
		customRulesPromise = $.when([]);
	}

	mw.loader.using('mediawiki.util').then(function() {
		mw.util.addCSS('.unreliable-borderline { background-color: #fffdd0; }');
		mw.util.addCSS('.unreliable-unreliable { background-color: #ffdddd; }');
		mw.util.addCSS('.unreliable-predatory  { background-color: #ffbbbb; text-decoration: underline; text-decoration-style: wavy; }');
		mw.util.addCSS('.unreliable-blacklist  { background-color: #dddddd; text-decoration: underline; text-decoration-style: wavy; text-decoration-color: #cc0000; }');
	});

	function tryRules(rules, originalText) {
		originalText = originalText.toLowerCase().replace('%2F', '/').replace('https://', '').replace('http://', '');
		var text = originalText.substring(0, originalText.indexOf('/'));
		var numDots = text.split('.').length - 1;
		// For example, if originalText was a.b.c.d, subdomains would be ['c.d', 'b.c.d', 'a.b.c.d'].
		// This is so that we can look up each "subdomain" in the appropriate "set".
		var subdomains = [text];
		for (var i = 0; i < numDots - 1; i++) {
			subdomains.splice(0, 0, subdomains[0].substring(subdomains[0].indexOf('.') + 1));
		}
		for (var i = 0; i < rules.length; i++) {
			var rule = rules[i];
			if (typeof rule.namespaces !== 'undefined' && rule.namespaces.indexOf(mw.config.get('wgNamespaceNumber')) < 0) {
				return false;
			}
			if (rule.regex && rule.regex.test(originalText)) {
				return rule;
			}
			if (rule.sets) {
				for (var numDots in rule.sets) {
					if (rule.sets[numDots][subdomains[numDots - 1]]) {
						return rule;
					}
				}
			}
		}
	}

	$.when(
		mainRulesPromise,
		customRulesPromise
	).then(function(mainRules, customRules) {
		//var start=+new Date();
		//console.log(start)

		// Preprocess the rules to make checking faster
		var rules = mainRules.concat(customRules).map(function(rule) {
			if (rule.regex) {
				rule.regex = new RegExp('\\b(?:' + rule.regex + ')\\b', 'i');
			}
			if (rule.list) {
				// Divide the domains by the number of dots in them, so that all we need to do for link checks is to do a "in this set?" check instead of a substring
				rule.sets = {};
				rule.list.forEach(function(domain) {
					var numDots = domain.split('.').length - 1;
					if (!rule.sets[numDots]) rule.sets[numDots] = {};
					rule.sets[numDots][domain] = true;
				});
				//console.log(rule.sets);
			}
			return rule;
		});

		// Check each external link on the page against each regex
		$('.mw-parser-output a.external').each(function(_, link) {
			var rule = tryRules(rules, link.href);
			if (rule) {
				$(link).addClass('unreliable-' + rule.kind);
				$(link).attr('title', rule.comment || '');
			}
		});

		// Check list items against each regex to catch further reading/bibliography items without links 
		$('.mw-parser-output ul li, .mw-parser-output ol:not(.references) li, .reference-text:not(:has(a))')
		.each(function(_, li) {
			var rule = tryRules(rules, li.textContent);
			if (rule) {
				$(li).addClass('unreliable-' + rule.kind);
				$(li).attr('title', rule.comment || '');
			}
		});
		//var end=+new Date();
		//console.log(end,end-start);
	});
});