/[hyperestraier_wrappers]/trunk/examples/oogatherer.py
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/examples/oogatherer.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (hide annotations)
Sat Sep 3 18:04:41 2005 UTC (18 years, 6 months ago) by dpavlin
File MIME type: text/x-python
File size: 933 byte(s)
make working copy
1 dpavlin 1 #!/usr/bin/env python
2     # vim:fileencoding=utf-8
3    
4     import HyperEstraier
5    
6     import os
7    
8     class HEGatherer:
9     def __init__(self, dbpath):
10     self.db = HyperEstraier.Database()
11     self.db.open(dbpath, HyperEstraier.Database.DBWRITER | HyperEstraier.Database.DBCREAT)
12    
13     def _put_doc(self, fname):
14     print fname
15    
16     doc = HyperEstraier.Document()
17    
18     doc.add_attr('@uri', "file://" + fname)
19     doc.add_attr('@title', fname)
20    
21     text = unicode(open(fname).read(), 'iso-2022-jp', 'ignore')
22     doc.add_text(text.encode('utf-8'))
23    
24     self.db.put_doc(doc, HyperEstraier.Database.PDCLEAN)
25    
26     def put_dir(self, dirname):
27     for root, dirs, files in os.walk(dirname):
28     for fname in files:
29     self._put_doc("%s/%s" % (root, fname))
30    
31     if __name__ == '__main__':
32     import sys
33    
34     if len(sys.argv) != 3:
35     print >>sys.stderr, "%s dbname dirname" % sys.argv[0]
36     sys.exit()
37    
38     dbname = sys.argv[1]
39     dirname = sys.argv[2]
40    
41     heg = HEGatherer(dbname)
42     heg.put_dir(dirname)
43    

  ViewVC Help
Powered by ViewVC 1.1.26