User:Michael Devore/wiksort.js

Source: Wikipedia, the free encyclopedia.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/*
 * wiksort.js
 * Version 1.1
 * Sort out unique words in Wikipedia articles to separate page
 *  pop-ups must be enabled for the wikipedia domain
 *  works best with Firefox due to its built-in spelling checker for textareas
 *
 * Written by Michael Devore
 * Comments to: http://en.wikipedia.org/wiki/User_talk:Michael_Devore
 * Released to the public domain
*/
var wiksGlobals = 
{
	regularNounCutoff : 6,
	properNounCutoff : 6,
	baseApiString : mw.config.get('wgServer') + mw.config.get('wgScriptPath') + "/api.php?action=parse&format=xml&page=",
	wiksText : "Sort words",
	wiksId : "t-sort-words",
	wiksToolTip : "Sort unique article words"
};

function wiksMain()
{
	var wikiPage = mw.config.get('wgPageName').replace('_', ' ');
//	wiksLoadPageXML(wikiPage);
	wiksLoadPageXML(encodeURIComponent(wikiPage));
}

function wiksLoad()
{
	if (mw.config.get('wgCanonicalNamespace') == "Special")
	{
		return;
	}
	// set up the tool link
	if (mw.config.get('skin') != "cologneblue")
	{
		mw.util.addPortletLink('p-tb', 'javascript: wiksMain();',
			wiksGlobals.wiksText,
			wiksGlobals.wiksId,
			wiksGlobals.wiksToolTip);
	}
	else
	{
		// my favorite cologne blue skin doesn't support portlet links
		// place sort word link  right before 'This page' quickbar section,
		//  hopefully as last entry in previous 'Edit' section
		var quickEl = document.getElementById("quickbar");
		var childNode = quickEl ? quickEl.firstChild : null;
		while (childNode)
		{
			var tNode = childNode.firstChild;
			if (tNode && tNode.nodeName == "#text" && tNode.nodeValue == "This page")
			{
				break;
			}
			childNode = childNode.nextSibling;
		}
		if (childNode)
		{
			var aBR = document.createElement("BR");
			var aEl = document.createElement("A");
			aEl.setAttribute("href", "javascript: wiksMain();");
			aEl.appendChild(document.createTextNode(wiksGlobals.wiksText));
			quickEl.insertBefore(aEl, childNode);
			quickEl.insertBefore(aBR, childNode);
		}
	}
}

function wiksLoadPageXML(wikiPage)
{
	var wikiApiString = wiksGlobals.baseApiString + wikiPage;
	var request =  new XMLHttpRequest();
	request.open("GET", wikiApiString, true);
	request.setRequestHeader("User-Agent", "Mozilla/5.0");
	request.setRequestHeader("Accept", "text/xml");
	request.onreadystatechange = function() {
		if (request.readyState == 4 && request.status == 200)
		{
			if (request.responseText)
			{
				 wiksProcessPage(request.responseText);
			}
		}
	};
	request.send(null);
}

function wiksProcessPage(pText)
{
//	var pDoc = (new DOMParser()).parseFromString(pText, "application/xml");
	var pDoc;
	var isIE = false;
	try //Internet Explorer
	{
		pDoc = new ActiveXObject("Microsoft.XMLDOM");
		pDoc.async = "false";
		pDoc.loadXML(pText);
		isIE = true;
	}
	catch(e)
	{
		try //Firefox, Mozilla, Opera, etc.
		{
			var dParser = new DOMParser();
			pDoc = dParser.parseFromString(pText, "application/xml");
		}
		catch(e)
		{
			alert("wiksort could not process this page.");
			return false;
		}
	}

	var tElements = pDoc.getElementsByTagName("text");
	var tElem = tElements[0];
	var d = document.createElement("div");
	var htmlContent = "";
	var cNode = tElem.firstChild;
	while (cNode)
	{
		htmlContent += cNode.nodeValue;
		cNode = cNode.nextSibling;
	}
	d.innerHTML = htmlContent;
	var theText;
	if (isIE)
	{
		theText = d.innerText;
	}
	else
	{
		theText = d.textContent;
	}
	wiksFilterAndShow(theText);
	return true;
}

function wiksFilterAndShow(articleText)
{
	var regNoun = new RegExp("\\b[a-z][a-zA-Zé]{" + (wiksGlobals.regularNounCutoff - 1) + ",}", "g");
	propNoun = new RegExp("\\b[A-Z][a-zA-Zé]{" + (wiksGlobals.properNounCutoff - 1) + ",}", "g");

	var resultLC = new Array();
	// initial capped/proper nouns
	var result1 = articleText.match(propNoun);
	// any noun
	var result2 = articleText.match(regNoun);
	var result;
	if (result1 != null)
	{
		result = result1;
		if (result2 != null)
		{
			result = result1.concat(result2);
		}
	}
	else
	{
		result = result2;
	}
	if (result)
	{
		result.sort(function(x,y) { 
			var a = String(x).toUpperCase(); 
			var b = String(y).toUpperCase(); 
			if (a > b) 
			{
				return 1;
			}
			if (a < b)
			{
				return -1;
			}
			return 0; 
		} );
		var wordCount = new Array();
		for (var i = 0; i < result.length; i++)
		{
			var lcWord = result[i].toLowerCase();
			if (lcWord == "reverse" ||
				lcWord == "constructor" ||
				lcWord == "every" ||
				lcWord == "reduce" ||
				lcWord == "splice" ||
				lcWord == "filter")
			{
				lcWord += "_x_wiks";
			}
			if (wordCount[lcWord] !== undefined)
			{
				wordCount[lcWord]++;
			}
			else
			{
				wordCount[lcWord] = 1;
			}
		}
		wiksShowResults(result, wordCount);
	}
}

function wiksShowResults(wordList, wordCount)
{
	var feedback = "";
	var feedFormat = "<html><head><title>";
	var feedTitle = "Sorted words in " + window.location.href.match(/[^\/]+$/);
	feedTitle = feedTitle.replace(/_/g," ");
	feedFormat += feedTitle;
	feedFormat += '</title><body>';
	feedFormat += "==" + feedTitle + '==<br><span id="wiksort">';
	var previousWord = "";
	var rowCount = 1;
	var lastLetter = "";
	for (var i = 0; i < wordList.length; i++)
	{
		var lcWord = wordList[i].toLowerCase();
		var adjustWord = lcWord;
		if (lcWord == "reverse" ||
			lcWord == "constructor" ||
			lcWord == "every" ||
			lcWord == "reduce" ||
			lcWord == "splice" ||
			lcWord == "filter")
		{
			adjustWord += "_x_wiks";
		}
		var count = wordCount[adjustWord];
		if (lcWord !== previousWord + "s")
		{
			var startLetter = lcWord.slice(0,1);
			if (lastLetter != startLetter)
			{
				lastLetter = startLetter;
				feedFormat += "\n<br>&nbsp;== " + startLetter.toUpperCase()+" ==\n<br>";
				rowCount += 2;
			}
			feedback += wordList[i] + "(" + count + ") - " + (((i+1) % 10 == 0) ? "\n" : "");
			feedFormat += wordList[i] + "&nbsp;(" + count + ")&nbsp;\n<br>";
			rowCount++;
		}
		i += count - 1;
		previousWord = lcWord;
	}
	feedFormat += "</span>\n";
	feedFormat += '<textarea id="sorttext" wrap="hard" rows="';
	feedFormat += rowCount + '"></textarea>\n';
	feedFormat += '<script language="JavaScript">\n';
	feedFormat += 'var st=document.getElementById("sorttext");\n';
	feedFormat += 'var sortSpan=document.getElementById("wiksort");\n';
	feedFormat += 'st.style.width="340px";\n';
	feedFormat += 'st.style.fontSize="large";\n';
	feedFormat += 'st.value=(sortSpan.textContent === undefined ? sortSpan.innerText : sortSpan.textContent);\n';
	feedFormat += 'sortSpan.style.display="none";\n';
	feedFormat += '</script>';
	feedFormat += "</body></html>";

	if (feedback.length)
	{
		var feedWin = null;
		var iCounter = 0;
		while ((feedWin = window.open()) === null && iCounter < 10)
		{
			iCounter++;
		}
		if (iCounter >= 10)
		{
			alert("You need to enable pop-ups for the Wikipedia site\n"
				+ " (or the browser doesn't work with wiksort)\n");
		}
		var feedDoc = feedWin.document;
		feedDoc.open();
		feedDoc.write(feedFormat);
		feedDoc.close();
	}
	else
	{
		alert("wiksort found no sortable words.");
	}
}

addOnloadHook(wiksLoad);