Changeset 9373

Show
Ignore:
Timestamp:
04/13/08 02:00:55 (5 years ago)
Author:
drry
Message:

lang/javascript/userscripts/googlereaderfullfeed.user.js:

  • fixed regexps.
  • added XPaths suggested by snj14.
  • et cetera.
Files:
1 modified

Legend:

Unmodified
Added
Removed
  • lang/javascript/userscripts/googlereaderfullfeed.user.js

    r8886 r9373  
    2929AAAAACwAAAAAEwATAAAETBDISWsNOOuNJf+aB4LiyJUZ0awNsqGB0RSYkAwhoAnKYaIEBm4EXJgC 
    3030QGFA1VBmUDwfJjjs6DQy2tJp5TBX0uf1mCO/xmYk2mxptyMAOw== 
    31 ]]></>.toString().replace(/\s+/g, ""); 
     31]]></>.toString() 
     32      .replace(/\s+/g, ""); 
    3233const ICON2 = <><!-- (c) id:Constellation --><![CDATA[data:image/gif;base64, 
    3334R0lGODdhEwATAPQAMUFp4YfO64fO7ofW9Yfe+LHO68XW68X3/MX3/9ne6+zn7uz/+Oz//Oz////v 
     
    3536EwATAAAFUCAgjmRZBmiqrqjIvqoLw/LM1unQQLyz4gECJIESHAwxgEqAUNhwAwZyBl0UnsqcNKCD 
    3637PKatbLGpBQeAQiJ3mwJydzxn0vZy0+1Y+s3EN4UAADs= 
    37 ]]></>.toString().replace(/\s+/g, ""); 
     38]]></>.toString() 
     39      .replace(/\s+/g, ""); 
    3840 
    3941// == [Config] ====================================================== 
     
    6567  this.itemInfo.item_body = getFirstElementByXPath(bodyXPath); 
    6668  this.state = 'wait'; 
    67  
    68   if (this.info.enc) { 
    69     this.mime = 'text/html; charset=' + this.info.enc; 
    70   } else { 
    71     this.mime = 'text/html; charset=' + document.characterSet; 
    72   } 
    73  
     69  this.mime = 'text/html; charset=' + (this.info.enc || document.characterSet); 
    7470 
    7571  this.request(); 
     
    9086    }, 
    9187    onerror: function() { 
    92       self.requestError.apply(self, ['Request Error']) 
     88      self.requestError.apply(self, ['Request Error']); 
    9389    }, 
    9490    onload: function(res) { 
    9591      if (res.status == 302) { 
    9692        try { 
    97           opt.url = res.responseHeaders.match(/\sLocation:\s+([^\s]+)/)[1]; 
     93          opt.url = res.responseHeaders.match(/\sLocation:\s+(\S+)/)[1]; //i 
    9894          window.setTimeout(GM_xmlhttpRequest, 0, opt); 
    9995          return; 
     
    10298        } 
    10399      } 
    104       self.requestLoad.apply(self, [res]) 
     100      self.requestLoad.apply(self, [res]); 
    105101    } 
    106102  }; 
     
    119115  text = text.replace(/(<[^>]+?[\s"'])on(?:(?:un)?load|(?:dbl)?click|mouse(?:down|up|over|move|out)|key(?:press|down|up)|focus|blur|submit|reset|select|change)\s*=\s*(?:"(?:\\"|[^"])*"?|'(\\'|[^'])*'?|[^\s>]+(?=[\s>]|<\w))(?=[^>]*?>|<\w|\s*$)/gi, 
    120116    "$1"); 
    121   if (REMOVE_SCRIPT) text = text.replace(/<script[^>]*>[\S\s]*?<\/script\s*>/gi, ""); 
    122   if (REMOVE_H2TAG)  text = text.replace(/<h2[^>]*>[\S\s]*?<\/h2\s*>/gi, ""); 
     117  if (REMOVE_SCRIPT) text = text.replace(/<script(?:\s[^>]+?)?>[\S\s]*?<\/script\s*>/gi, ""); 
     118  if (REMOVE_H2TAG)  text = text.replace(/<h2(?:\s[^>]+?)?>[\S\s]*?<\/h2\s*>/gi, ""); 
    123119  var htmldoc = parseHTML(text); 
    124120  removeXSSRisks(htmldoc); 
     
    128124    relativeToAbsolutePath(htmldoc, this.itemInfo.itemURL); 
    129125  } 
    130   for (var i = 0, l = FullFeed.documentFilters.length; i < l; 
    131     FullFeed.documentFilters[i++](htmldoc, this.itemInfo.itemURL, this.info)); 
     126  var self = this; 
     127  FullFeed.documentFilters.forEach(function(filter) { 
     128    filter(htmldoc, self.itemInfo.itemURL, this.info); 
     129  }); 
    132130  try { 
    133131    var entry = getElementsByXPath(this.info.xpath, htmldoc); 
     
    145143    this.removeEntry(); 
    146144    entry = this.addEntry(entry); 
    147     for (var i = 0, l = FullFeed.filters.length; i < l; 
    148       FullFeed.filters[i++](entry)); 
     145    FullFeed.filters.forEach(function(filter) { 
     146      filter(entry); 
     147    }); 
    149148    this.requestEnd(); 
    150149  } else { 
     
    189188 
    190189FullFeed.parser = function(text) { 
    191   var lines = text.split(/\r?\n|\r/); 
     190  var lines = text.split(/[\r\n]+/); 
    192191  var reg = /^([^:]+):(.*)$/; 
    193192  var trimspace = function(str) { 
     
    195194  }; 
    196195  var info = {}; 
    197   for (var i = lines.length; i --> 0; ) { 
    198     if (reg.test(lines[i])) { 
     196  lines.forEach(function(line) { 
     197    if (reg.test(line)) { 
    199198      info[RegExp.$1] = trimspace(RegExp.$2); 
    200199    } 
    201   } 
     200  }); 
    202201  var isValid = function(info) { 
    203202    var infoProp = ['url', 'xpath']; 
    204     for (var i = infoProp.length; i --> 0; ) { 
    205       if (!info[infoProp[i]]) { 
    206         return false; 
    207       } 
    208     } 
     203    if (infoProp.some(function(prop) { 
     204      return !info[prop]; 
     205    })) return false; 
    209206    try { 
    210207      new RegExp(info.url); 
     
    240237  var info = []; 
    241238  var doc = parseHTML(res.responseText); 
    242   var lists = getElementsByXPath( 
    243       '//textarea[@class="ldrfullfeed_data"]', doc); 
     239  var lists = 
     240    getElementsByXPath('//textarea[@class="ldrfullfeed_data"]', doc); 
    244241  lists.forEach(function(list) { 
    245242    var data = FullFeed.parser(list.value); 
     
    267264  if (!WIDGET) return; 
    268265  var exps = []; 
    269   for (var i = 0, l = SITE_INFO.length; i < l; exps.push(SITE_INFO[i++].url)); 
    270   for (var url in cacheInfo) { 
    271     var site = cacheInfo[url]; 
    272     for (var i = 0, l = site.info.length; i < l; exps.push(site.info[i++].url)); 
     266  SITE_INFO.forEach(function(info) { 
     267    exps.push(info.url); 
     268  }); 
     269  for each (var i in cacheInfo) { 
     270    i.info.forEach(function(info) { 
     271      exps.push(info.url); 
     272    }); 
    273273  } 
    274274  pattern = exps.join('|'); 
     
    393393  this.item_container = getFirstElementByXPath('id("current-entry")//div[contains(concat(" ",normalize-space(@class)," ")," entry-body ")]'); 
    394394  this.title = this.item.title; 
    395   this.find = false; 
     395  this.found = false; 
    396396}; 
    397397 
    398398var launchFullFeed = function(list, c) { 
    399   if (!list) return; 
    400   for (var i = 0, l = list.length; i < l; i++) { 
    401     var reg = new RegExp(list[i].url); 
     399  if (typeof list.some != "function") return; 
     400  list.some(function(i) { 
     401    var reg = new RegExp(i.url); 
    402402    if (reg.test(c.itemURL) || reg.test(c.feedURL)) { 
    403       c.find = true; 
    404       var ff = new FullFeed(list[i], c); 
    405       break; 
     403      c.found = true; 
     404      var ff = new FullFeed(i, c); 
     405      return true; 
     406    } else { 
     407      return false; 
    406408    } 
    407   } 
     409  }); 
    408410}; 
    409411 
     
    420422  launchFullFeed(SITE_INFO, c); 
    421423 
    422   if (!c.find) { 
    423     for (var i = 0, l = SITEINFO_IMPORT_URLS.length; i < l && !c.find; 
    424       launchFullFeed(cacheInfo[SITEINFO_IMPORT_URLS[i++]].info, c)); 
    425   } 
    426  
    427   if (!c.find) { 
     424  if (!c.found && !SITEINFO_IMPORT_URLS.some(function(url) { 
     425    launchFullFeed(cacheInfo[url].info, c); 
     426    return c.found; 
     427  })) { 
    428428    message('This entry is not listed on SITE_INFO'); 
    429429    if (OPEN) window.open(c.itemURL) || message('Cannot popup'); 
     
    477477 
    478478function searchEntry(htmldoc) { 
     479 
    479480  var xpath = [ 
    480481    '//*', 
    481     '[(..//h2) or (.//h3) or (.//h4) or (.//h5) or (.//h6) or (..//*[contains(concat(@id,@class,""),"title")])]', 
    482     '[not(.//form)]', 
    483     '[not((.|.//*)[contains(concat("",@class,""),"robots-nocontent")])]', 
    484     '[not((.|.//*)[contains(concat(@id,@class,""),"side")])]', 
    485     '[not((.|.//*)[contains(concat(@id,@class,""),"navi")])]', 
    486     '[not((.|.//*)[contains(concat(@id,@class,""),"footer")])]', 
    487     '[not((.|.//*)[contains(concat(@id,@class,""),"header")])]', 
     482    '[(..//h2) or (.//h3) or (.//h4) or (.//h5) or (.//h6) or (..//*[contains(concat(@id, " ", @class), "title")])]', 
     483    '[not(.//form|ancestor-or-self::form)]', 
     484    '[not(.//script|ancestor-or-self::script)]', 
     485    '[not((.|.//*|ancestor-or-self::*)[contains(@class, "robots-nocontent")])]', 
     486    '[not((.|.//*|ancestor-or-self::*)[contains(concat(@id, " ", @class), "side")])]', 
     487    '[not((.|.//*|ancestor-or-self::*)[contains(concat(@id, " ", @class), "navi")])]', 
     488    '[not((.|.//*|ancestor-or-self::*)[contains(concat(@id, " ", @class), "footer")])]', 
     489    '[not((.|.//*|ancestor-or-self::*)[contains(concat(@id, " ", @class), "header")])]', 
    488490  ].join(''); 
    489491  try { 
     
    492494    var elms = getElementsByXPath(xpath, htmldoc); 
    493495    if(!elms) return null; 
    494         // get content which has most text elements. 
     496        // get content which has most text elements. 
    495497    Array.forEach(elms, function(e) { 
    496       var n = e.textContent.length; 
     498      if(typeof e.textContent != "string") return; 
     499      var n = e.textContent.replace(/^\s+|\s+$|(?:\r?\n|\r){2,}/g, "").length; 
    497500      if(max < n){ 
    498501        max = n; 
     
    619622  var nodesSnapshot = (node.ownerDocument || node). 
    620623    evaluate(xpath, node, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); 
    621   var data = [] 
     624  var data = []; 
    622625  for (var i = 0, l = nodesSnapshot.snapshotLength; i < l; 
    623626    data.push(nodesSnapshot.snapshotItem(i++))); 
     
    634637// copied from Pagerization (c) id:ofk 
    635638function parseHTML(str) { 
    636   str = str.replace(/^[\s\S]*?<html[^>]*>|<\/html\s*>[\s\S]*$/ig, ''); 
     639  str = str.replace(/^[\s\S]*?<html(?:\s[^>]+?)?>|<\/html\s*>[\S\s]*$/ig, ''); 
    637640  var res = document.implementation.createDocument(null, 'html', null); 
    638641  var range = document.createRange(); 
    639642  range.setStartAfter(document.body); 
    640   res.documentElement.appendChild( 
    641     res.importNode(range.createContextualFragment(str), true) 
    642   ); 
     643  res.documentElement 
     644     .appendChild(res.importNode(range.createContextualFragment(str), true)); 
    643645  return res; 
    644646}