/[hyperestraier_wrappers]/trunk/perl/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/perl/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (hide annotations)
Sat Sep 3 18:04:41 2005 UTC (18 years, 6 months ago) by dpavlin
File size: 9407 byte(s)
make working copy
1 dpavlin 1 /**
2     * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3     */
4     #include <estraier.h>
5     #include <estmtdb.h>
6     #include <cabin.h>
7     #include <cstdlib>
8     #include <string>
9     #include <vector>
10     #include <map>
11     #include <cassert>
12    
13     namespace estraier {
14     class Condition {
15     public:
16     enum { // enumeration for options
17     SURE = ESTCONDSURE, // check every N-gram key
18     USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
19     FAST = ESTCONDFAST, // check N-gram keys skipping by two
20     AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
21     NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
22     SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
23     };
24     ESTCOND * cond;
25     Condition() {
26     /**
27     * constructor
28     */
29     cond = est_cond_new();
30     }
31     ~Condition() {
32     /**
33     * destructor
34     */
35     est_cond_delete(cond);
36     }
37     void set_phrase(const char *phrase) {
38     /**
39     * set the search phrase
40     */
41     est_cond_set_phrase(cond, phrase);
42     }
43     void add_attr(const char *expr) {
44     /**
45     * set the attribute expression
46     */
47     est_cond_add_attr(cond, expr);
48     }
49     void set_order(const char *expr) {
50     /**
51     * set the order of a condition object
52     */
53     est_cond_set_order(cond, expr);
54     }
55     void set_max(int _max) {
56     /**
57     * set the maximum number of retrieval of a condition object
58     */
59     est_cond_set_max(cond, _max);
60     }
61     void set_options(int options) {
62     /**
63     * set options of retrieval of a condition object
64     */
65     est_cond_set_options(cond, options);
66     }
67     };
68    
69     class Document {
70     private:
71     std::string text_buf;
72     public:
73     ESTDOC *doc;
74    
75     Document() {
76     /**
77     * constructor
78     */
79     doc = est_doc_new();
80     }
81     Document(const char* draft) {
82     /**
83     * constructor
84     */
85     doc = est_doc_new_from_draft(draft);
86     }
87     Document(ESTDOC *_doc) {
88     /**
89     * constructor
90     */
91     doc = _doc;
92     }
93     ~Document() {
94     /**
95     * destructor
96     */
97     est_doc_delete(doc);
98     }
99     void add_attr(const char * name, const char*value) {
100     /**
101     * add an attribute to a document object
102     */
103     est_doc_add_attr(doc, name, value);
104     }
105     void add_text(const char *text) {
106     /**
107     * add a sentence of text to a document object
108     */
109     est_doc_add_text(doc, text);
110     }
111     void add_hidden_text(const char * text) {
112     /**
113     * add a hidden sentence to a document object
114     */
115     est_doc_add_hidden_text(doc, text);
116     }
117     int id() {
118     /**
119     * get the ID number of a document object
120     */
121     return est_doc_id(doc);
122     }
123     std::vector<std::string> * attr_names() {
124     /**
125     * get a list of attribute names of a document object
126     */
127     std::vector<std::string> * vs = new std::vector<std::string>;
128     CBLIST * attr_names = est_doc_attr_names(doc);
129     for (int i=0; i < cblistnum(attr_names); i++) {
130     vs->push_back(cblistval(attr_names, i, NULL));
131     }
132     cblistclose(attr_names);
133     return vs;
134     }
135     const char * attr(const char *name) {
136     /**
137     * get the value of an attribute of a document object
138     */
139     return est_doc_attr(doc, name);
140     }
141     const char * cat_texts() {
142     /**
143     * get a list of sentences of the text of a document object
144     */
145     // return est_doc_cat_texts(doc);
146     return "This is mockup!";
147     }
148     std::vector<std::string>* texts() {
149     /**
150     * get a list of sentences of the text of a document object
151     */
152     std::vector<std::string> * vs = new std::vector<std::string>;
153     const CBLIST *texts;
154     texts = est_doc_texts(doc);
155     for(int i = 0; i < cblistnum(texts); i++) {
156     vs->push_back(cblistval(texts, i, NULL));
157     }
158     return vs;
159     }
160     const char * dump_draft() {
161     /**
162     * dump draft data of a document object
163     */
164     return est_doc_dump_draft(doc);
165     }
166     const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
167     /**
168     * make a snippet of the body text of a document object
169     */
170     CBLIST * words;
171     std::vector<std::string>::iterator iter;
172    
173     words = cblistopen();
174    
175     for (iter = _words.begin(); _words.end() != iter; iter++) {
176     cblistpush(words, iter->c_str(), -1);
177     }
178    
179     const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
180    
181     cblistclose(words);
182    
183     return result;
184     }
185     };
186    
187     class Database {
188     private:
189     ESTMTDB *db;
190     public:
191     enum { // enumeration for error codes
192     ERRNOERR = ESTENOERR, // no error
193     ERRINVAL = ESTEINVAL, // invalid argument
194     ERRACCES = ESTEACCES, // access forbidden
195     ERRLOCK = ESTELOCK, // lock failure
196     ERRDB = ESTEDB, // database problem
197     ERRIO = ESTEIO, // I/O problem
198     ERRNOITEM = ESTENOITEM, // no item
199     ERRMISC = ESTEMISC // miscellaneous
200     };
201     enum { // enumeration for open modes
202     DBREADER = ESTDBREADER, // open as a reader
203     DBWRITER = ESTDBWRITER, // open as a writer
204     DBCREAT = ESTDBCREAT, // a writer creating
205     DBTRUNC = ESTDBTRUNC, // a writer truncating
206     DBNOLCK = ESTDBNOLCK, // open without locking
207     DBLCKNB = ESTDBLCKNB, // lock without blocking
208     DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
209     };
210     enum { // enumeration for options of document registration
211     PDCLEAN = ESTPDCLEAN // clean up dispensable regions
212     };
213     enum { // enumeration for options of document deletion
214     ODCLEAN = ESTODCLEAN // clean up dispensable regions
215     };
216     enum { // enumeration for options of optimization
217     OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
218     OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
219     };
220     enum { // enumeration for options of document retrieval
221     GDNOATTR = ESTGDNOATTR, // no attributes
222     GDNOTEXT = ESTGDNOTEXT // no text
223     };
224     Database() {
225     /**
226     * constructor(dummy)
227     */
228     }
229     ~Database() {
230     close();
231     }
232     bool open(const char * dbname, int mode) {
233     /**
234     * open the database
235     */
236     int ecode;
237     db = est_mtdb_open(dbname, mode, &ecode);
238     return db;
239     }
240     bool close() {
241     /**
242     * close the database
243     */
244     if (db) {
245     int ecode;
246     bool result = est_mtdb_close(db, &ecode);
247     db = NULL;
248     return result;
249     } else {
250     return false;
251     }
252     }
253     bool put_doc(Document *doc, int options) {
254     /**
255     * add a document to a database
256     */
257     return est_mtdb_put_doc(db, doc->doc, options);
258     }
259     std::vector<int> * search(Condition * cond, int options) {
260     /**
261     * search documents corresponding a condition for a database
262     */
263     int resnum;
264     int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
265     std::vector<int> *numbers = new std::vector<int>;
266     for (int i=0; i<resnum; i++) {
267     numbers->push_back(result[i]);
268     }
269     return numbers;
270     }
271     static const char * err_msg(int ecode) {
272     /**
273     * get the string of an error
274     */
275     return est_err_msg(ecode);
276     }
277     int error() {
278     /**
279     * get the last happended error code of a database
280     */
281     return est_mtdb_error(db);
282     }
283     bool fatal() {
284     /**
285     * check whether a database has a fatal error
286     */
287     return est_mtdb_fatal(db);
288     }
289     bool flush(int _max) {
290     /**
291     * flush index words in the cache of a database
292     */
293     return est_mtdb_flush(db, _max);
294     }
295     bool sync() {
296     /**
297     * synchronize updating contents of a database
298     */
299     return est_mtdb_sync(db);
300     }
301     bool optimize(int options) {
302     /**
303     * optimize a database
304     */
305     return est_mtdb_optimize(db, options);
306     }
307     bool out_doc(int id, int options) {
308     /**
309     * remove a document from a database
310     */
311     return est_mtdb_out_doc(db, id, options);
312     }
313     Document * get_doc(int id, int options) {
314     /**
315     * retrieve a document in a database
316     */
317     ESTDOC *doc = est_mtdb_get_doc(db, id, options);
318     if (!doc) {
319     throw est_err_msg(est_mtdb_error(db));
320     } else {
321     return new Document(doc);
322     }
323     }
324     int uri_to_id(const char *uri) {
325     /**
326     * get the ID of a document spacified by URI
327     */
328     return est_mtdb_uri_to_id(db, uri);
329     }
330     std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
331     /**
332     * extract keywords of a document object
333     */
334     std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
335    
336     CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
337    
338     cbmapiterinit(keys);
339     int ksiz;
340     while (const char *key = cbmapiternext(keys, &ksiz)) {
341     mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
342     }
343     return mss;
344     }
345     bool iter_init() {
346     /**
347     * initialize the iterator of a database
348     */
349     return est_mtdb_iter_init(db);
350     }
351     int iter_next() {
352     /**
353     * get the next ID of the iterator of a database
354     */
355     return est_mtdb_iter_next(db);
356     }
357     const char * name() {
358     /**
359     * get the name of a database
360     */
361     return est_mtdb_name(db);
362     }
363     int doc_num() {
364     /**
365     * get the number of documents in a database
366     */
367     return est_mtdb_doc_num(db);
368     }
369     int word_num() {
370     /**
371     * get the number of unique words in a database
372     */
373     return est_mtdb_word_num(db);
374     }
375     double size() {
376     /**
377     * get the size of a database
378     */
379     return est_mtdb_size(db);
380     }
381     void set_cache_size(size_t size, int anum, int tnum) {
382     /**
383     * set the maximum size of the cache memory of a database
384     */
385     est_mtdb_set_cache_size(db, size, anum, tnum);
386     }
387     void set_special_cache(const char *name, int num) {
388     /**
389     * Set the special cache for narrowing and sorting
390     * with document attributes
391     */
392     est_mtdb_set_special_cache(db, name, num);
393     }
394     };
395     };

  ViewVC Help
Powered by ViewVC 1.1.26