Changeset 14401 for lang/python
- Timestamp:
- 06/22/08 08:57:12 (5 months ago)
- Files:
-
- 1 modified
-
lang/python/incsearch/incsearch/indexer.py (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
lang/python/incsearch/incsearch/indexer.py
r14277 r14401 7 7 from incsearch.db import DocDbWriter 8 8 9 TITLE = re.compile(r'<title>([^<]+)</title>') 10 TITLENUM = re.compile('^[0-9\. ]+') 9 11 FUNC = re.compile(r'<tt id[^>]*>([^<]+)</tt>') 10 12 def extractor(cont): 13 t = TITLE.search(cont) 14 title = "" 15 if t: 16 title = unicode(t.group(1), "euc_jp", "ignore").encode("utf-8", "ignore") 17 titlehead = TITLENUM.sub("", title) 18 yield dict(pos=None, title=title, word=titlehead.lower()) 11 19 scanner = FUNC.scanner(cont) 12 20 m = scanner.search() 13 21 while m: 14 yield dict(pos=m.start(), title=m.group(1) , word=m.group(1))22 yield dict(pos=m.start(), title=m.group(1)+" - "+titlehead, word=m.group(1).lower()) 15 23 m = scanner.search() 16 24 … … 44 52 pre = 0 45 53 for i, ipoint in enumerate(extractor(whole)): 46 ipoint.update({"fname": fname}) 47 db.put(ipoint["word"], "%s\t%d\t%s" % (fname, ipoint["pos"], ipoint["title"])) 48 nametag = "<a name='incsearch_%d'/>" % i 49 tagged.append(whole[pre:ipoint["pos"]]) 50 tagged.append(nametag) 51 pre = ipoint["pos"] 54 if ipoint["pos"]: 55 db.put(ipoint["word"], "%s\t%d\t%s" % (fname, i, ipoint["title"])) 56 nametag = "<a name='incsearch_%d'/>" % i 57 tagged.append(whole[pre:ipoint["pos"]]) 58 tagged.append(nametag) 59 pre = ipoint["pos"] 60 else: 61 db.put(ipoint["word"], "%s\t-\t%s" % (fname, ipoint["title"])) 52 62 tagged.append(whole[pre:]) 53 63 gp = file(os.path.join(outdir, os.path.basename(fname)), "w")
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)