User:Wmahan/despam.js

Source: Wikipedia, the free encyclopedia.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
var despamPage = 'User:Wmahan/despam';
var query_php = '/w/query.php';
var wiki = 'http://en.wikipedia.org/wiki/';
var wPage = 'http://en.wikipedia.org/w/index.php';
var tab = '     ';

// number of history entries to fetch at a time
var revPage = 50;
// stop after searching this many revisions
var maxRevs = 5000;

var http_request;
var rvoffset;
var despamPage, despamUrl, despamUserRe;
var logDiv;

// parse parameters passed in the query string
function parseParams() {
  var query = window.location.search.substring(1);
  var params = new Array();
  var qlist = query.split('&');
  for (var i=0; i < qlist.length; i++) {
    var pos = qlist[i].indexOf('=');
    if (pos > 0) {
      var key = unescape(qlist[i].substring(0,pos));
      var val = unescape(qlist[i].substring(pos+1));
      params[key] = val;
    }
  }
  return params;
}

// messages indicating success or failure in removing a link
var removeSuccess = '<span style="color: #008000">link removed</span>\n';
var removeFailure = '<span style="color: #a00000">unable to remove link</span>\n';


// save the edit page and close this window
function saveAndClose() {
  window.onunload = function() {
    window.close();
  };
  document.getElementById('wpSave').click();
}

// click the diff button on the edit page
function doDiff() {
  document.getElementById('wpDiff').click();
}

// on the edit page, remove the link "url", noting in the
// edit summary that it was added by "user" on "timestamp"
function removeLink(url, user, timestamp) {
  var es = document.getElementById('wpSummary');
  if (es && es.value == '') {
    document.getElementById('wpMinoredit').checked = true;

    // remove the link;
    var tb = document.getElementById('wpTextbox1');

    var cont = document.getElementById('content');
    logDiv = document.createElement('div');
    logDiv.id = 'despamLog';

    // XXX
    logDiv.innerHTML = "<hr />\n<a name=\"despam\"></a>\n"
      + "<h2>Removing link</h2><br />\n"
      + "<input type='button' value='Save page and close window'"
      + " onclick='saveAndClose()' /> ***"
      + " <input type='button' value='Show changes'"
      + " onclick='doDiff()' />\n";

    cont.appendChild(logDiv);

    
    // turn url into a regex
    var regexChars = new RegExp('([!*+?^\\\\$\\]\\[.])', 'g');
    url = url.replace(regexChars, '\\$1');
    //log('url regex: ' + url);

    // now look for various externa link styles

    var cb = function(a) {
      log('deleted line: <span style="color: #c00000">' + a + '</span>');
      return '\n';
    };

    // *[http://www.example.com link] plus any following text
    //log('regex1: \\n\\** *\\[' + url + '[^\\n]*?\\][^\\n]*?\\n');
    var re1 = new RegExp('\\n\\**[^\\n]*\\[' + url + '[^\\n]*?\\][^\\n]*?\\n', 'g');
    var newText = tb.value.replace(re1, cb);

    // *http://www.example.com plus any following text
    //log('regex2: \\n\\** *' + url + '[^\\n]*?\\n');
    //var re2 = new RegExp('\\n\\** *' + url + '[^\\n]*?\\n', 'g');
    //newText = newText.replace(re2, '\n');

    if (newText != tb.value) {
      log(removeSuccess);
      tb.value = newText;
      es.value = 'rm linkspam by [[Special:Contributions/' + user + '|'
        + user + ']] on ' + timestamp;
    }
    else {
      log(removeFailure);
    }
    document.location.href = '#despam';
  }
}


// write the log message "msg" to the logging area
function log(msg) {
  var div = document.createElement('div');
  div.innerHTML = msg; // XXX

  if (logDiv) {
    logDiv.appendChild(div);
  }
}

// fetch "url" in with a synchronous (blocking) call
function sync_fetch(url) {
  log('fetching ' + url + '...');
  var http_request2 = new XMLHttpRequest();
  http_request2.open("GET", url, false); // synchronous
  http_request2.send(null);
  return http_request2.responseText;
}

// Check whether the link "url" is in "page" revision
// "afterid", but not in "beforeid"
function wasLinkAdded(page, url, beforid, afterid) {
  qpage = wiki + page + '?action=raw&oldid=';

  beforeText = sync_fetch(qpage + beforeid);
  if (beforeText.indexOf(url) != -1) {
    // link already existed; it was not added in the next edit
    return false;
  }

  afterText = sync_fetch(qpage + afterid);

  if (afterText.indexOf(url) != -1) {
    // it was added
    return true;
  }
  else {
    // it wasn't added, and there's no point searching
    // further back in the history
    // XXX doesn't consider vandalism
    //stopSearch = true;
    return false;
  }
}

// examine the list of history entries "results"
// for edits by a user matching "userRe" that add
// the link "url"
function processHistory(results, url, page, userRe) {
  var pages = results['pages'];
  //var info = pages.shift();
  var info;
  for (var i in pages) { // XXX hack to get first element of associative array
    info = pages[i];
    break;
  }
  var revs = info['revisions'];

  var found = false;
  var i = -1;

  for (var prevI in revs) {
    if (i == -1) {
      i = prevI;
      continue;
    }
    //alert('i=' + i + '; user=' + revs[i]['user']);
    var curuser = revs[i]['user'];
    if (curuser.match(userRe)) {
      // found a possible match
      afterid = revs[i]['revid'];
      beforeid = revs[prevI]['revid'];
      log('checking possible match: ' + curuser + ' on ' + revs[i]['timestamp']);
      //alert('beforeid=' + beforeid + '; afterid=' + afterid);
      if (wasLinkAdded(page, url, beforeid, afterid)) {
        found = true;
        break;
      }
      else {
        log(tab + 'no match');
      }
    }
    i = prevI;
  }

  if (found) {
    timestamp = revs[i]['timestamp'];
    log('found addition of link by ' + curuser + ' on ' + timestamp
      + ' (<a target="_blank" href="' + wPage + '?title=' + escape(page)
      + '&diff=' + afterid + '&oldid=' + beforeid + '">diff</a>, <a href="'
      + wPage + '?title=' + escape(page) + '&action=edit&fakeaction=rmlink'
      + '&user=' + escape(curuser) + '&timestamp=' + escape(timestamp)
      + '&url=' + escape(url) + '">remove link</a>)');
    //removeLink(page, url, curuser, timestamp);

    return true;
  }
  else {
    rvoffset += revPage;
    if (rvoffset > maxRevs || revs.length < revPage) {
      // we reached the end without finding anything
      log('<span style="color: #aa0000">No match found!</span>');
      return false;
    }
    else {
      // go on to the next page
      fetchHistory();
    }
  }
}

// set everythig up and start fetching pages of history entries
function doDespam(url, page, users) {
  var regexCharsExceptStar = new RegExp('([!+?^\\\\$\\]\\[.])', 'g');
  users = users.replace(regexCharsExceptStar, '\\$1');
  // turn wildcards into regexes
  starRe = new RegExp('\\*', 'g');
  users = users.replace(starRe, '\\d+');

  // remove extraneous spaces
  users = users.replace(/ +/g, ' ');
  users = users.replace(/(^ +| +$)/g, '');
  var userlist = users.split(':');
  var userRe = new RegExp('(' + userlist.join('|') + ')');
  if (!userRe) {
    log('Error: invalid user list');
    log('debugging info: (' + userlist.join('|') + ')');
    return false;
  }

  nicePage = page.replace(/_/g, ' ');
  page = page.replace(/ /g, '_');
  log('<h2>Scanning history for <a href="'
    + wiki + escape(page) + '">' + nicePage + '</a> (<a href="'
    + wPage + '?title=' + escape(page) + '&action=history">history</a>)</h2>\n');

  rvoffset = 0;
  despamPage = page;
  despamUrl = url;
  despamUserRe = userRe;

  http_request = new XMLHttpRequest();

  fetchHistory();
}

// fetch 
function fetchHistory() {
  log('fetching history entries #' + rvoffset + ' through #' + (rvoffset + revPage));

  // fetch the query page
  var qpage = query_php + '?what=revisions&format=json&rvlimit=' + revPage
    + '&rvoffset=' + rvoffset + '&titles=' + escape(despamPage);

  var results;
  http_request.open("GET", qpage, true);
  http_request.onreadystatechange = function () {
    if (http_request.readyState == 4) {
        if (http_request.status == 200) {
            results = eval("(" + http_request.responseText + ")");
            processHistory(results, despamUrl, despamPage, despamUserRe);
        } else {
            log('<span style="color: #aa0000">There was a problem querying the page history.</span>');
            return false;
        }
    }
    //http_request = null;
  };
  http_request.send(null);

  return true;
}

function despamClick(url, page) {
  var despamUrl = '/wiki/' + despamPage;

  users = document.getElementById('despamUsers').value;

  if (users == '') {
    alert('No user name or IP address was entered');
    return false;
  }

  url = escape(url);
  page = escape(page);
  users = escape(users);

  window.open(despamUrl + '?url=' + url + '&page=' + page + '&users=' + users);

  return false;
}

addOnloadHook( function() {

  if ((location.href.indexOf(':Linksearch') != -1
    || location.href.indexOf('%3ALinksearch') != -1)
    && location.href.indexOf('target=') != -1)
  {
    var textLabel = 'IPs or usernames for despam (e.g. <i>SpamUser</i>, <i>192.168.0.*</i>):<br />';
    var cont = document.getElementById("content");

    var html = cont.innerHTML; // XXX un-DOM

    var re = new RegExp('<li>(<a href.*?>(.*?)</a>.*?<a .*?>(.*?)</a>)</li>', 'g');

    var quoteChars = new RegExp('([\'\\\\])', 'g');
    var cb = function(a, b, c, d) {
      var skipPages = new RegExp('(talk|user):', 'i');

      if (!d.match(skipPages)) {
        // escape quotes
        c = c.replace(quoteChars, '\\$1');   
        d = d.replace(quoteChars, '\\$1');   
        return '<li>' + b + '  [<a href="" onclick="return despamClick(\'' + c + '\',\'' + d
          + '\')">despam</a>]</li>';
      }
      else {
        return a; // skip entry
      }
    };

    div = document.createElement('div');
    div.innerHTML = textLabel; // XXX
    inp = document.createElement('input');
    inp.type = 'text';
    inp.id = 'despamUsers';
    inp.size = 40;
    div.appendChild(inp);

    html = html.replace(re, cb);

    cont.innerHTML = html;

    bc = document.getElementById('bodyContent');
    bc.insertBefore(div, document.getElementsByTagName('ol')[0]);
  }
  else if (document.location.href.indexOf(despamPage) != -1
    && document.location.href.indexOf('action=') == -1)
  {
    logDiv = document.getElementById('despamLog');

    var params = parseParams();
    if (params['url'] && params['page'] && params['users']) {
      doDespam(params['url'], params['page'], params['users']);
    }
    else {
      log('<span style="color: #aa0000">The url, page, or users parameter was missing.</span>');
    }
  }
  else if (document.location.href.indexOf('action=edit&fakeaction=rmlink') != -1) {
    var params = parseParams();
    if (params['url'] && params['user'] && params['timestamp']) {
      removeLink(params['url'], params['user'], params['timestamp']);
    }
  }
  
} );