Changeset 867
- Timestamp:
- 10/30/07 09:35:46 (6 years ago)
- Files:
-
- 1 modified
-
lang/lua/LuaScraper/luascraper.lua (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
-
lang/lua/LuaScraper/luascraper.lua
r866 r867 1 1 local http = require("socket.http") 2 local url = require("socket.url") 2 3 local xml = require("xml") 3 4 … … 7 8 end 8 9 -- not supported 9 function result(p) 10 return p 10 function result(self) 11 self.name = "result" 12 return self 11 13 end 12 14 -- return scraper structure … … 14 16 self.name = "scraper" 15 17 -- scrape method 16 function self.scrape(ur l, ctx)18 function self.scrape(uri, ctx) 17 19 -- create http session and parse HTML 18 20 if ctx == nil then … … 20 22 local b, c = http.request { 21 23 method = "GET", 22 url = ur l,24 url = uri, 23 25 sink = ltn12.sink.table(chunk) 24 26 } … … 39 41 for k1,v1 in pairs(ctx.doc:select(v.process.xpath)) do 40 42 local newctx = {top=ctx.top, doc=v1} 41 self.res[#(self.res)+1] = v.process.scraper.scrape(ur l, newctx)43 self.res[#(self.res)+1] = v.process.scraper.scrape(uri, newctx) 42 44 end 43 45 else … … 49 51 elseif string.sub(attr, 1, 1) == "@" then 50 52 attr = string.sub(attr, 2) 51 self.res[v.process.name] = node[1]:attribute(attr) 53 val = node[1]:attribute(attr) 54 nname = node[1]:name() 55 if (nname == "img" and attr == "src") or (nname == "a" and attr == "href") then 56 if string.sub(uri, -1) == "/" then 57 val = url.absolute(uri, val) 58 else 59 val = url.absolute(uri + "/", val) 60 end 61 end 62 self.res[v.process.name] = val 52 63 end 53 64 end
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)