User:Lingzhi2/reviewsourcecheck.js

Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump.
This code will be executed when previewing this page.
This user script seems to have a documentation page at User:Lingzhi2/reviewsourcecheck.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
//<nowiki>
jQuery(document).ready(function($) {
	if (document.querySelectorAll('#editform a').length > 0){
		return;
	}
    if ((mw.config.get('wgPageName').indexOf('talk:') < 0) && (mw.config.get('wgPageName').indexOf('Talk:') < 0) && (mw.config.get('wgPageName').indexOf('Special:') < 0) && (mw.config.get('wgPageName').indexOf('Wikipedia:') < 0)) {

        // spantitles  gives direct access to span.title,
        // which is used extensively

        var spantitles = document.getElementsByClassName("Z3988");
        var myTOC = document.getElementsByClassName("toctext");
        // var myHeadings = $("h2, h3, h4");
        myTOCarray = [];
        for (var z = 0; z < myTOC.length; z++) {
            myTOCtxt = myTOC[z].innerText;
            myTOCtxt = myTOCtxt.replace(" ", "_");
            myTOCtxt = "#" + myTOCtxt;
            myTOCarray.push(myTOCtxt);
        }

        ///the TOC is used to get a list of section headers used on page
        // these are reversed and checked from bottom-to-top while sorting
        /// so that if an article has stacked headings, for 
        /// example Primary and Secondary references listed under Works cited,
        // the script won't try to insert Secondary (i.e., lower on the page)
        ///into the sorted list of Primary ones (higher on page) causing 
        // numerous confusing false positives in the sorting process

        myTOCarray.reverse();

        //var citejournals = document.getElementsByClassName("citation journal");
        // var reftext = document.getElementsByClassName("reference-text");  // all sfns
        // var bookspan = document.querySelectorAll("citation book.span title");
        //var webspan = document.querySelectorAll("citation web.span title");
        // spantitle[13].title


        // first check: 
        //  Hyphen in pg. range; 
        //  P/PP error?

        var links = document.links;
        for (var i = 0; i < links.length; i++) {
            var href = links[i].getAttribute('href');

            var srctxt = links[i].parentNode.textContent;
            try {
                var id = links[i].getAttribute('id');
            } catch (err) {
                continue;
            }

            var parent = links[i].parentNode;

            // var index is used below to address the case of sfnm,
            // whose output (i. e., links[i].parentNode.innerHTML) includes different links as siblings, 
            // so the output would be recursively expanded/duplicated
            // within the loop unless you prevent that

            var index = Array.prototype.indexOf.call(parent.children, links[i - 1]);
            var spline = srctxt.split(";");
            for (var k = 0; k < spline.length; k++) {
                var commacount = (spline[k].match(/,/g) || []).length;
                //var hrefcount = (links[i].parentNode.innerHTML.match(spline[k], 'g') || []).length;
                if (index < 0) {

                    if (spline[k].indexOf('pp.') > 0) {

                        // so output from Ucucha's script won't be grabbed and
                        // added to this output

                        spline[k] = spline[k].replace("Harv error: link to", "            ");

                        /* temptxt and commacount2 are used to avoid false positives 
                            like " Brennan, Heathcote & Lucas 1984, p. 9" (commas and 
                            ampersand before p. 9)
                           and "Jones 1942, p. 10, note 3" (commas irrelevant to pagination) */
                        var myPos = spline[k].indexOf('pp.');
                        var temptxt = spline[k].substring(myPos, myPos + 12);
                        var commacount2 = (temptxt.match(/,/g) || []).length;

                        if ((temptxt.indexOf('-') > 0) && (links[i].parentNode.innerHTML.indexOf('Hyphen')  < 0))
{
                            links[i].parentNode.innerHTML +=
                                " <strong class=refckErr> Hyphen in pg. range;  </strong>";
                        }

                        if ((href.indexOf('#CITEREF') === 0) && (spline[k].indexOf('–') < 0) && (spline[k].indexOf('&') < 0) && (commacount < 2) && (spline[k].indexOf('-') < 0) && (spline[k].indexOf(' and ') < 0) && (spline[k].indexOf('&ndash;') < 0)) {

                            links[i].parentNode.innerHTML +=
                                " <strong class=refckErr> P/PP error? " +
                                temptxt + "; </strong>";

                        }
                    } else if ((href.indexOf('#CITEREF') === 0) && (spline[k].indexOf(' p.')) > 0) {
                        spline[k] = spline[k].replace("Harv error: link to", "            ");
                        var myPos = spline[k].indexOf(' p.');

                        var temptxt = spline[k].substring(myPos, myPos + 12);
                        var commacount2 = (temptxt.match(/,/g) || []).length;

                        if ((temptxt.indexOf('–') > 0) || (commacount2 > 0) || (temptxt.indexOf('-') > 0) || (temptxt.indexOf('&ndash;') > 0)) {

                            //p. 23, note 7; p. 23, n. 7; p.23, citing Smith 1989
                            //
                            if ((temptxt.indexOf(', not') < 0) && (temptxt.indexOf(', n.') < 0) && (temptxt.indexOf(', cit')) < 0) {


                                links[i].parentNode.innerHTML +=
                                    " <strong class=refckErr> P/PP error? " +
                                    temptxt + "; </strong>";
                            }
                        }
                        if (temptxt.indexOf('-') > 0) {
                            links[i].parentNode.innerHTML +=
                                " <strong class=refckErr> Hyphen in pg. range;  </strong>";
                        }
                    }
                }
            }
        }



        // second check: 
        // Warning: Unexpected result – extra formatting in template? 
        // Caution: Missing pagenums for book chapter? 
        // Missing first name for:
        // Inconsistent use of Publisher Location
        // Missing Publisher
        // Missing ISBN
        // Pub. too early for ISBN, perhaps needs {{orig-year}};
        // Missing Identifier/control number, e.g. OCLC;
        // Missing Year/Date;
        //  Missing access date;
        // Missing archive link; 

        ///withLocs etc. used for "Inconsistent use of Publisher Location"

        var withLocs = false;
        var withoutLocs = false;
        var contraryLocs = false;
        var withLocsCnt = 0;
        var withoutLocsCnt = 0;
        idArray = ["arXiv", "ASIN", "Bibcode", "doi:", "ISBN", "ISSN", "JFM", "JSTOR", "LCCN", " MR ", "OCLC", " OL ", "OSTI", "PMC", "PMID", "RFC", "SSRN", "Zbl"];


        for (i = 0; i < spantitles.length; i++) {

            // there is nothing in spantitles[i].title
            // which indicates that a link has been archived, so 
            // srctext is used to catch from textContent

            srctxt = spantitles[i].parentNode.textContent;
            spline = spantitles[i].title.split("rft.au=");
            typoCk = spantitles[i].parentNode.nodeName;

            if ((typoCk === "I") || (typoCk === "B")) {

                spantitles[i].parentNode.innerHTML +=
                    " <strong class=refckErr> Warning: Unexpected result – extra formatting in template? </strong>";

            }

            if ((spantitles[i].title.indexOf("rft.atitle=") > 0) && (spantitles[i].title.indexOf("rft.btitle=") > 0)) {
                if ((srctxt.indexOf(" pp.") < 0) && (srctxt.indexOf(" p.") < 0)) {
                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=refckErr> Caution: Missing pagenums for book chapter? </strong>";
                }
            }
            for (k = 1; k < spline.length; k++) {
                if ((spantitles[i].title.indexOf("rft.au=") > 0) && (srctxt.indexOf("et al.") < 0)) {
                    if ((spline[k].indexOf("+") < 0)) {
                        var spline2 = spline[k].split("&");
                        spantitles[i].parentNode.innerHTML +=
                            " <strong class=refckErr> Missing first name for: <u>" + spline2[0] +
                            "</u>; </strong>";
                    }
                }
            }

            var hasID = false;
            for (qq = 0; qq < idArray.length; qq += 1) {
                if (srctxt.indexOf(idArray[qq]) > 0) {
                    hasID = true;
                }
            }
            if (spantitles[i].title.indexOf("rft.genre=article") > 0) {
                if (hasID === false) {

                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=refckErr> Missing identifier (ISSN, JSTOR, etc.); </strong>";
                }


            }


            if (spantitles[i].title.indexOf("rft.genre=book") > 0) {

                if ((srctxt.indexOf("Oxford University Press") < 0) && (srctxt.indexOf("University of Calcutta") < 0) && (srctxt.indexOf("Princeton University Press") < 0) && (srctxt.indexOf("Cambridge University Press") < 0)) {

                    if ((spantitles[i].title.indexOf("rft.place") < 0)) {
                        withoutLocs = true;
                        withoutLocsCnt += 1;
                        if ((withLocs === true) && (withoutLocs === true)) {
                            contraryLocs = true;

                        }
                        if (contraryLocs === true) {
                            spantitles[i].parentNode.innerHTML +=
                                "<strong class=refckErr> Inconsistent use of Publisher Location (" +
                                withLocsCnt + " with; " + withoutLocsCnt + " <u>without</u>); </strong>";
                        }
                    } else {
                        withLocs = true;
                        withLocsCnt += 1;
                        if ((withLocs === true) && (withoutLocs === true)) {
                            contraryLocs = true;
                            spantitles[i].parentNode.innerHTML +=
                                "<strong class=refckErr> Inconsistent use of Publisher Location (" +
                                withLocsCnt + " <u>with;</u> " + withoutLocsCnt + " without); </strong>";


                        }
                    }
                }


                if (spantitles[i].title.indexOf("rft.pub") < 0) {
                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=refckErr> Missing Publisher; </strong>";
                }

                //                if (spantitles[i].parentNode.innerHTML.indexOf("open access publication – free to read") > 0) {
                //                    hasID = true;
                //                }

                if ((spantitles[i].title.indexOf("rft.date") > 0)) {
                    var myDate = spantitles[i].title.slice(spantitles[i].title.indexOf("rft.date") + 9, spantitles[i].title.indexOf("rft.date") + 13);
                    if (myDate >= 1970) {
                        if (hasID === false) {

                            spantitles[i].parentNode.innerHTML +=
                                " <strong class=refckErr> Missing ISBN; </strong>";

                        }
                    } else {
                        if ((spantitles[i].title.indexOf("rft.isbn") > 0) && (srctxt.indexOf(") [") < 0)) {

                            // OK this (") [") is a huge kluge but there's  
                            // nothing to indicate whether origyear is 
                            // populated except by the srctext
                            // having (pubdate) [origdate] & there's 
                            // little restriction on the format of the two dates

                            spantitles[i].parentNode.innerHTML +=
                                " <strong class=refckErr> Pub. too early for ISBN, perhaps needs {{para|orig-year}}; </strong>";
                        }
                        if (hasID === false) {
                            spantitles[i].parentNode.innerHTML +=
                                " <strong class=refckErr> Missing Identifier/control number, e.g. OCLC; </strong>";
                        }
                    }
                } else {
                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=refckErr> Missing Year/Date; </strong>";
                }
            }

            if ((spantitles[i].title.indexOf("http") > 0) && (spantitles[i].title.indexOf("rft.genre=book") < 0)) {
                if (srctxt.indexOf("rchived") < 0) {
                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=refckErr> Missing archive link; </strong>";
                    if ((srctxt.indexOf("Retrieved") < 0) && (spantitles[i].title.indexOf("rft.date") < 0)) {
                        spantitles[i].parentNode.innerHTML +=
                            " <strong class=refckErr> Missing access date; </strong>";
                    }
                }
            }
        }

        // third check: 
        // sorting
        // finding duplicate author/title, 
        // removing meaningless initial words, 
        // handling odd date formats "(April 2007)" etc., 
        // sorting stacked reference sections
        ///reverse TOC order and skipping if alreadySorted 


        var refheaders = [];
        var allRefheaders = ["#Books", "#Journals", "#Articles", "#Biographies", "#Bibliography", "#References", "#Citations_and_notes", "#Literature_cited", "#Works_cited", "#Book_sources", "#Primary_sources", "#Secondary_sources", "#Sources", "#Specialized_studies"];


        /// refheaders lets us sort in reverse TOC order
        for (var d = 0; d < myTOCarray.length; d++) {

            if (allRefheaders.indexOf(myTOCarray[d]) > -1) {
                refheaders.push(myTOCarray[d]);
            }
        }

        var alreadySorted = [];
        for (var r = 0; r < refheaders.length; r++) {

            var refsection = jQuery(refheaders[r]).parent().next();
            var newcites = refsection.find('.citation');

            sortedCites = [];
            unsortedCites = [];
            sortIndices = [];
            var id3 = '';
            var oldAuth = '';
            var mySortTxt2 = '';
            for (var h = 0; h < newcites.length; h++) {

                if (alreadySorted.indexOf(newcites[h]) > -1) {
                    continue;
                }

                try {
                    id3 = newcites[h].getAttribute('id');
                } catch (err) {
                    //sortIndices.push(h);
                    continue;
                }
                if (!id3 || id3.indexOf('CITEREF') === 0) {
                    var parentid =
                        newcites[h].parentNode.parentNode.getAttribute('id');

                    if (parentid && parentid.indexOf('cite_note') > -1) {

                        continue;
                    }

                    if (!id3 || id3.indexOf('CITEREF') < 0) {
                        newcites[h].innerHTML +=
                            " <strong class=refckWarn> Caution: Missing <i>ref=<i/> anchor?; </strong>";
                    }

                    if (id3 == null) {
                        mySortTxt2 = newcites[h].innerText;
                        mySortTxt2 = mySortTxt2.replace('"', '');

                        // check for empty string
                        if (mySortTxt2 === '') {

                            continue;
                        }
                        mySortTxt2 = mySortTxt2.trim();
                        // A, An, The..
                        if (mySortTxt2.slice(0, 2) === "A ") {
                            mySortTxt2 = mySortTxt2.slice(2);
                            mySortTxt2 = mySortTxt2.charAt(0).toUpperCase() + mySortTxt2.slice(1);
                        }
                        if (mySortTxt2.slice(0, 3) === "An ") {
                            mySortTxt2 = mySortTxt2.slice(3);
                            mySortTxt2 = mySortTxt2.charAt(0).toUpperCase() + mySortTxt2.slice(1);
                        }
                        if (mySortTxt2.slice(0, 4) === "The ") {
                            mySortTxt2 = mySortTxt2.slice(4);
                            mySortTxt2 = mySortTxt2.charAt(0).toUpperCase() + mySortTxt2.slice(1);
                        }

                        // for example, in |author-mask={{long dash}} 
                        if (mySortTxt2[0] == "—") {

                            mySortTxt2 = oldAuth + mySortTxt2;
                        }
                        mySortTxt2 = mySortTxt2.replace('"', '');
                        if (mySortTxt2.indexOf(")") > 0) {
                            mySortTxt2 = mySortTxt2.substring(0, mySortTxt2.indexOf(")")) + ")";

                            // handle (April 2006) or (04-11-2006) or anything not (YYYY)
                            mySortTxt3 = /\d{4}/.exec(mySortTxt2);
                            mySortTxt2 = mySortTxt2.replace(mySortTxt3, "");
                            if (mySortTxt3 != null) {
                                mySortTxt2 = mySortTxt2.replace("(", "(" + mySortTxt3 + "-");
                            }
                            mySortTxt2 = mySortTxt2.replace(" )", ")")

                        }
                        if ((mySortTxt2.match(/\s/g) || []).length > 5) {

                            mySortPos2 = mySortTxt2.split(" ", 5).join(" ").length;
                            mySortTxt2 = mySortTxt2.substring(0, mySortPos2);
                        }
                    } else {
                        mySortTxt2 = id3;
                        mySortTxt2 = mySortTxt2.replace("CITEREF", "");
                        firstAuthLname = '';
                        firstAuthLname = newcites[h].innerText;
                        fspline = firstAuthLname.split(",");
                        firstAuthLname = fspline[0].replace('"', '');
                        if (mySortTxt2.indexOf(firstAuthLname) > 0) {

                            //for example, A._Sanjoy2011 --> Sanjoy_A.2011

                            mySortTxt2 = firstAuthLname + "_" + mySortTxt2.replace(firstAuthLname, "");
                        }
                    }

                    //remove html  '' mySortTxt2=mySortTxt2.replace(/<(?:.|\n)*?>/gm, '');

                    // oldauth is for |author-mask={{long dash}} 
                    // below, rmv all digits, parens, curly braces, endashes
                    //    oldAuth = mySortTxt2.replace(/\d+/g, '');
                    oldAuth = mySortTxt2.replace(/[{()}]/g, '');
                    oldAuth = oldAuth.replace(/—/g, "");
                    oldAuth = oldAuth.replace(/\)/g, '');
                    oldAuth = oldAuth.trim();

                    mySortTxt2 = mySortTxt2.toLowerCase();
                    mySortTxt2 = mySortTxt2.replace("'", "");

                    sortedCites.push(mySortTxt2);
                    if (unsortedCites.indexOf(mySortTxt2) > -1) {
                        newcites[h].innerHTML +=
                            " <strong class=refckWarn> Warning: duplicate author/date: " + mySortTxt2 + "; </strong>";
                    }
                    unsortedCites.push(mySortTxt2);
                    sortIndices.push(h);

                    alreadySorted.push(newcites[h]);

                }
            }
            sortedCites.sort(Intl.Collator().compare);

            var myPos2 = 0;
            var txtOut = '';
            for (var p = 0; p < unsortedCites.length; p++) {
                if (unsortedCites[p] != sortedCites[p]) {
                    myPos2 = sortIndices[p];
                    txtOut = sortedCites[p];
                    //              txtOut = sortedCites[p].replace(/\+/g, " ");
                    //              txtOut = txtOut.replace(/\&/g, " ");
                    //txtOut = txtOut.replace(/\%3A/g, ":");
                    //              txtOut = txtOut.replace(/\%2C/g, ",");

                    newcites[myPos2].innerHTML +=
                        " <strong class=refckWarn> Sort error, expected: <u>" + txtOut + "</u>; </strong>";
                }
            }
        }

        // First wait for mediawiki.util to load, and the page to be ready.
        $.when(mw.loader.using('mediawiki.util'), $.ready).then(function() {
            // Default state
            var isHidden = false;
            $(".refckErr").hide();
            $(".refckWarn").hide();

            // Determine previous state from localStorage, if available
            try {
                if (localStorage.getItem('reviewsourcecheck-state') === 'hidden') {
                    isHidden = true;
                }
            } catch (e) {}
            // General usage:
            mw.util.addPortletLink('p-cactions', '#', 'Hide ref check', 'ca-hideCk', "Hide ref check");
            mw.util.addPortletLink('p-cactions', '#', 'Show ref check', 'ca-showCk', "Show ref check");
            $('#ca-showCk').toggle(!isHidden);
            $('#ca-hideCk').toggle(isHidden);

            $('#ca-hideCk').on('click', function() {
                $(".refckErr").hide();
                $(".refckWarn").hide();
                $('#ca-showCk').show();
                $('#ca-hideCk').hide();
                try {
                    localStorage.setItem('reviewsourcecheck-state', 'hidden');
                } catch (e) {}
                return false;
            });

            $('#ca-showCk').on('click', function() {
                $(".refckErr").show();
                $(".refckWarn").show();
                $('#ca-showCk').hide();
                $('#ca-hideCk').show();
                try {
                    localStorage.setItem('reviewsourcecheck-state', 'shown');
                } catch (e) {}
                return false;
            });
        });

    }
});
//</nowiki>