/[hyperestraier_wrappers]/trunk/perl/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/perl/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 32 - (hide annotations)
Tue Oct 11 14:04:20 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 15675 byte(s)
fix for 0.9.2 est_db_set_cache_size

1 dpavlin 1 /**
2     * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3     */
4     #include <estraier.h>
5     #include <estmtdb.h>
6     #include <cabin.h>
7     #include <cstdlib>
8     #include <string>
9     #include <vector>
10     #include <map>
11     #include <cassert>
12 dpavlin 6 #include <stdexcept>
13 dpavlin 9 #include <estnode.h>
14 dpavlin 1
15 dpavlin 4 /* backward compatibility for 0.5.4 */
16 dpavlin 32 /*
17 dpavlin 4 #ifndef ESTCONDAGITO
18     #define ESTCONDAGITO ESTCONDAGIT
19     #endif
20 dpavlin 32 */
21 dpavlin 4
22 dpavlin 1 namespace estraier {
23 dpavlin 6
24     class IOError : public std::runtime_error {
25     public:
26     explicit IOError (const std::string& w) : std::runtime_error(w) {}
27     };
28    
29 dpavlin 1 class Condition {
30     public:
31     enum { // enumeration for options
32     SURE = ESTCONDSURE, // check every N-gram key
33     USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
34     FAST = ESTCONDFAST, // check N-gram keys skipping by two
35     AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
36     NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
37     SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
38     };
39     ESTCOND * cond;
40     Condition() {
41     /**
42     * constructor
43     */
44     cond = est_cond_new();
45     }
46     ~Condition() {
47     /**
48     * destructor
49     */
50     est_cond_delete(cond);
51     }
52     void set_phrase(const char *phrase) {
53     /**
54     * set the search phrase
55     */
56     est_cond_set_phrase(cond, phrase);
57     }
58     void add_attr(const char *expr) {
59     /**
60     * set the attribute expression
61     */
62     est_cond_add_attr(cond, expr);
63     }
64     void set_order(const char *expr) {
65     /**
66     * set the order of a condition object
67     */
68     est_cond_set_order(cond, expr);
69     }
70     void set_max(int _max) {
71     /**
72     * set the maximum number of retrieval of a condition object
73     */
74     est_cond_set_max(cond, _max);
75     }
76     void set_options(int options) {
77     /**
78     * set options of retrieval of a condition object
79     */
80     est_cond_set_options(cond, options);
81     }
82     };
83    
84     class Document {
85     private:
86     std::string text_buf;
87     public:
88     ESTDOC *doc;
89     Document() {
90     /**
91     * constructor
92     */
93     doc = est_doc_new();
94     }
95     Document(const char* draft) {
96     /**
97     * constructor
98     */
99     doc = est_doc_new_from_draft(draft);
100     }
101     Document(ESTDOC *_doc) {
102     /**
103     * constructor
104     */
105     doc = _doc;
106     }
107     ~Document() {
108     /**
109     * destructor
110     */
111     est_doc_delete(doc);
112     }
113     void add_attr(const char * name, const char*value) {
114     /**
115     * add an attribute to a document object
116     */
117     est_doc_add_attr(doc, name, value);
118     }
119     void add_text(const char *text) {
120     /**
121     * add a sentence of text to a document object
122     */
123     est_doc_add_text(doc, text);
124     }
125     void add_hidden_text(const char * text) {
126     /**
127     * add a hidden sentence to a document object
128     */
129     est_doc_add_hidden_text(doc, text);
130     }
131     int id() {
132     /**
133     * get the ID number of a document object
134     */
135     return est_doc_id(doc);
136     }
137     std::vector<std::string> * attr_names() {
138     /**
139     * get a list of attribute names of a document object
140     */
141     std::vector<std::string> * vs = new std::vector<std::string>;
142     CBLIST * attr_names = est_doc_attr_names(doc);
143     for (int i=0; i < cblistnum(attr_names); i++) {
144     vs->push_back(cblistval(attr_names, i, NULL));
145     }
146     cblistclose(attr_names);
147     return vs;
148     }
149     const char * attr(const char *name) {
150     /**
151     * get the value of an attribute of a document object
152     */
153     return est_doc_attr(doc, name);
154     }
155     const char * cat_texts() {
156     /**
157     * get a list of sentences of the text of a document object
158     */
159 dpavlin 6 return est_doc_cat_texts(doc);
160 dpavlin 1 }
161     std::vector<std::string>* texts() {
162     /**
163     * get a list of sentences of the text of a document object
164     */
165     std::vector<std::string> * vs = new std::vector<std::string>;
166     const CBLIST *texts;
167     texts = est_doc_texts(doc);
168     for(int i = 0; i < cblistnum(texts); i++) {
169     vs->push_back(cblistval(texts, i, NULL));
170     }
171     return vs;
172     }
173     const char * dump_draft() {
174     /**
175     * dump draft data of a document object
176     */
177     return est_doc_dump_draft(doc);
178     }
179     const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
180     /**
181     * make a snippet of the body text of a document object
182     */
183     CBLIST * words;
184     std::vector<std::string>::iterator iter;
185     words = cblistopen();
186     for (iter = _words.begin(); _words.end() != iter; iter++) {
187     cblistpush(words, iter->c_str(), -1);
188     }
189     const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
190     cblistclose(words);
191     return result;
192     }
193 dpavlin 6 const char * hidden_texts() {
194     /**
195     * get the hidden texts of a document object.
196     */
197     return est_doc_hidden_texts(doc);
198     }
199 dpavlin 1 };
200    
201     class Database {
202     private:
203     ESTMTDB *db;
204 dpavlin 6 int ecode;
205 dpavlin 1 public:
206     enum { // enumeration for error codes
207     ERRNOERR = ESTENOERR, // no error
208     ERRINVAL = ESTEINVAL, // invalid argument
209     ERRACCES = ESTEACCES, // access forbidden
210     ERRLOCK = ESTELOCK, // lock failure
211     ERRDB = ESTEDB, // database problem
212     ERRIO = ESTEIO, // I/O problem
213     ERRNOITEM = ESTENOITEM, // no item
214     ERRMISC = ESTEMISC // miscellaneous
215     };
216     enum { // enumeration for open modes
217     DBREADER = ESTDBREADER, // open as a reader
218     DBWRITER = ESTDBWRITER, // open as a writer
219     DBCREAT = ESTDBCREAT, // a writer creating
220     DBTRUNC = ESTDBTRUNC, // a writer truncating
221     DBNOLCK = ESTDBNOLCK, // open without locking
222     DBLCKNB = ESTDBLCKNB, // lock without blocking
223     DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
224     };
225     enum { // enumeration for options of document registration
226     PDCLEAN = ESTPDCLEAN // clean up dispensable regions
227     };
228     enum { // enumeration for options of document deletion
229     ODCLEAN = ESTODCLEAN // clean up dispensable regions
230     };
231     enum { // enumeration for options of optimization
232     OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
233     OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
234     };
235     enum { // enumeration for options of document retrieval
236     GDNOATTR = ESTGDNOATTR, // no attributes
237     GDNOTEXT = ESTGDNOTEXT // no text
238     };
239     Database() {
240     /**
241     * constructor(dummy)
242     */
243 dpavlin 6 db = NULL;
244     ecode = ERRNOERR;
245 dpavlin 1 }
246     ~Database() {
247 dpavlin 6 if (db) close();
248 dpavlin 1 }
249     bool open(const char * dbname, int mode) {
250     /**
251     * open the database
252     */
253 dpavlin 6 if (db) close();
254     int ec;
255     db = est_mtdb_open(dbname, mode, &ec);
256     if (!db) ecode = ec;
257 dpavlin 1 return db;
258     }
259     bool close() {
260     /**
261     * close the database
262     */
263 dpavlin 6 if (!db) throw IOError("closed database");
264     int ec;
265     bool result = est_mtdb_close(db, &ec);
266     if (!result) ecode = ec;
267     db = NULL;
268     return result;
269 dpavlin 1 }
270     bool put_doc(Document *doc, int options) {
271     /**
272     * add a document to a database
273     */
274 dpavlin 6 if (!db) throw IOError("closed database");
275     bool result = est_mtdb_put_doc(db, doc->doc, options);
276     if (!result) ecode = est_mtdb_error(db);
277     return result;
278 dpavlin 1 }
279     std::vector<int> * search(Condition * cond, int options) {
280     /**
281     * search documents corresponding a condition for a database
282     */
283 dpavlin 6 if (!db) throw IOError("closed database");
284 dpavlin 1 int resnum;
285     int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
286     std::vector<int> *numbers = new std::vector<int>;
287     for (int i=0; i<resnum; i++) {
288     numbers->push_back(result[i]);
289     }
290     return numbers;
291     }
292     static const char * err_msg(int ecode) {
293     /**
294     * get the string of an error
295     */
296     return est_err_msg(ecode);
297     }
298     int error() {
299     /**
300     * get the last happended error code of a database
301     */
302 dpavlin 6 return ecode;
303 dpavlin 1 }
304     bool fatal() {
305     /**
306     * check whether a database has a fatal error
307     */
308 dpavlin 6 if (!db) throw IOError("closed database");
309 dpavlin 1 return est_mtdb_fatal(db);
310     }
311     bool flush(int _max) {
312     /**
313     * flush index words in the cache of a database
314     */
315 dpavlin 6 if (!db) throw IOError("closed database");
316     bool result = est_mtdb_flush(db, _max);
317     if (!result) ecode = est_mtdb_error(db);
318     return result;
319 dpavlin 1 }
320     bool sync() {
321     /**
322     * synchronize updating contents of a database
323     */
324 dpavlin 6 if (!db) throw IOError("closed database");
325     bool result = est_mtdb_sync(db);
326     if (!result) ecode = est_mtdb_error(db);
327     return result;
328 dpavlin 1 }
329     bool optimize(int options) {
330     /**
331     * optimize a database
332     */
333 dpavlin 6 if (!db) throw IOError("closed database");
334     bool result = est_mtdb_optimize(db, options);
335     if (!result) ecode = est_mtdb_error(db);
336     return result;
337 dpavlin 1 }
338     bool out_doc(int id, int options) {
339     /**
340     * remove a document from a database
341     */
342 dpavlin 6 if (!db) throw IOError("closed database");
343     bool result = est_mtdb_out_doc(db, id, options);
344     if (!result) ecode = est_mtdb_error(db);
345     return result;
346 dpavlin 1 }
347 dpavlin 6 bool edit_doc(Document *doc) {
348     /**
349     * edit an attribute of a document in a database
350     */
351     if (!db) throw IOError("closed database");
352     bool result = est_mtdb_edit_doc(db, doc->doc);
353     if (!result) ecode = est_mtdb_error(db);
354     return result;
355     }
356 dpavlin 1 Document * get_doc(int id, int options) {
357     /**
358     * retrieve a document in a database
359     */
360 dpavlin 6 if (!db) throw IOError("closed database");
361 dpavlin 1 ESTDOC *doc = est_mtdb_get_doc(db, id, options);
362     if (!doc) {
363 dpavlin 6 ecode = est_mtdb_error(db);
364 dpavlin 1 throw est_err_msg(est_mtdb_error(db));
365     } else {
366     return new Document(doc);
367     }
368     }
369     int uri_to_id(const char *uri) {
370     /**
371     * get the ID of a document spacified by URI
372     */
373 dpavlin 6 if (!db) throw IOError("closed database");
374     int result = est_mtdb_uri_to_id(db, uri);
375     if(result == -1) ecode = est_mtdb_error(db);
376     return result;
377 dpavlin 1 }
378     std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
379     /**
380     * extract keywords of a document object
381     */
382 dpavlin 6 if (!db) throw IOError("closed database");
383 dpavlin 1 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
384     CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
385     cbmapiterinit(keys);
386     int ksiz;
387     while (const char *key = cbmapiternext(keys, &ksiz)) {
388     mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
389     }
390     return mss;
391     }
392     const char * name() {
393     /**
394     * get the name of a database
395     */
396 dpavlin 6 if (!db) throw IOError("closed database");
397 dpavlin 1 return est_mtdb_name(db);
398     }
399     int doc_num() {
400     /**
401     * get the number of documents in a database
402     */
403 dpavlin 6 if (!db) throw IOError("closed database");
404 dpavlin 1 return est_mtdb_doc_num(db);
405     }
406     int word_num() {
407     /**
408     * get the number of unique words in a database
409     */
410 dpavlin 6 if (!db) throw IOError("closed database");
411 dpavlin 1 return est_mtdb_word_num(db);
412     }
413     double size() {
414     /**
415     * get the size of a database
416     */
417 dpavlin 6 if (!db) throw IOError("closed database");
418 dpavlin 1 return est_mtdb_size(db);
419     }
420 dpavlin 32 void set_cache_size(size_t size, int anum, int tnum, int rnum) {
421 dpavlin 1 /**
422     * set the maximum size of the cache memory of a database
423     */
424 dpavlin 6 if (!db) throw IOError("closed database");
425 dpavlin 32 est_mtdb_set_cache_size(db, size, anum, tnum, rnum);
426 dpavlin 1 }
427     void set_special_cache(const char *name, int num) {
428     /**
429     * Set the special cache for narrowing and sorting
430     * with document attributes
431     */
432     est_mtdb_set_special_cache(db, name, num);
433     }
434     };
435 dpavlin 6
436     static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) {
437     std::vector<std::string> * vs = new std::vector<std::string>;
438     CBLIST *list;
439     list = cblistopen();
440     est_break_text(text, list, norm, tail);
441     for (int i=0; i < cblistnum(list); i++) {
442     vs->push_back(cblistval(list, i, NULL));
443     }
444     cblistclose(list);
445     return vs;
446     }
447    
448     static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) {
449     std::vector<std::string> * vs = new std::vector<std::string>;
450     CBLIST *list;
451     list = cblistopen();
452     est_break_text_perfng(text, list, norm, tail);
453     for (int i=0; i < cblistnum(list); i++) {
454     vs->push_back(cblistval(list, i, NULL));
455     }
456     cblistclose(list);
457     return vs;
458     }
459    
460 dpavlin 12 class ResultDocument {
461     public:
462 dpavlin 9 ESTRESDOC *rdoc;
463 dpavlin 12 ResultDocument(ESTRESDOC *_rdoc) {
464     rdoc = _rdoc;
465 dpavlin 9 }
466     const char *uri(void) {
467     return est_resdoc_uri(rdoc);
468     }
469     std::vector<std::string> * attr_names() {
470     std::vector<std::string> * vs = new std::vector<std::string>;
471     CBLIST * attr_names = est_resdoc_attr_names(rdoc);
472     for (int i=0; i < cblistnum(attr_names); i++) {
473     vs->push_back(cblistval(attr_names, i, NULL));
474     }
475     cblistclose(attr_names);
476     return vs;
477     }
478     const char *attr(const char *name) {
479     return est_resdoc_attr(rdoc, name);
480     }
481     const char *snippet(void) {
482     return est_resdoc_snippet(rdoc);
483     }
484     };
485    
486     class NodeRes {
487     private:
488     ESTNODERES *nres;
489     public:
490 dpavlin 12 NodeRes(ESTNODE *node, Condition *cond, int depth) {
491     nres = est_node_search(node, cond->cond, depth);
492 dpavlin 9 }
493     ~NodeRes() {
494     est_noderes_delete(nres);
495     }
496     std::map<std::string, std::string> * hints(void) {
497     std::map<std::string, std::string> * hints = new std::map<std::string, std::string>;
498     CBMAP * keys = est_noderes_hints(nres);
499     cbmapiterinit(keys);
500     int ksiz;
501     while (const char *key = cbmapiternext(keys, &ksiz)) {
502     hints->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
503     }
504     return hints;
505     }
506     int doc_num(void) {
507     return est_noderes_doc_num(nres);
508     }
509 dpavlin 12 ResultDocument * get_doc(int index) {
510     ESTRESDOC *rdoc = est_noderes_get_doc(nres, index);
511     if (rdoc) {
512     return new ResultDocument(rdoc);
513     } else {
514     return NULL;
515     }
516 dpavlin 9 }
517     };
518    
519     class Node {
520     private:
521     ESTNODE *node;
522 dpavlin 11 int netenv_ok;
523 dpavlin 9 public:
524     Node(const char *url) {
525 dpavlin 11 netenv_ok = est_init_net_env();
526     if (! netenv_ok) throw IOError("can't init net env");
527 dpavlin 9 node = est_node_new(url);
528     if (! node) throw IOError("can't create node");
529     }
530     ~Node() {
531     est_node_delete(node);
532 dpavlin 11 est_free_net_env();
533 dpavlin 9 }
534     void set_proxy(const char *host, int port) {
535     est_node_set_proxy(node, host, port);
536     }
537     void set_timeout(int sec) {
538     est_node_set_timeout(node, sec);
539     }
540     void set_auth(const char *name, const char *passwd) {
541     est_node_set_auth(node, name, passwd);
542     }
543     int status(void) {
544     return est_node_status(node);
545     }
546     bool put_doc(Document *doc) {
547     return est_node_put_doc(node, doc->doc);
548     }
549     bool out_doc(int id) {
550     return est_node_out_doc(node, id);
551     }
552     bool out_doc_by_uri(const char *uri) {
553     return est_node_out_doc_by_uri(node, uri);
554     }
555     #ifdef est_node_edit_doc
556     bool edit_doc(Document *doc) {
557     return est_node_edit_doc(node, doc->doc);
558     }
559     #endif
560     Document * get_doc(int id) {
561     ESTDOC *doc = est_node_get_doc(node, id);
562     if (!doc) {
563     return NULL;
564     } else {
565     return new Document(doc);
566     }
567     }
568     Document * get_doc_by_uri(const char *uri) {
569     ESTDOC *doc = est_node_get_doc_by_uri(node, uri);
570     if (!doc) {
571     return NULL;
572     } else {
573     return new Document(doc);
574     }
575     }
576     char * get_doc_attr(int id, const char *name) {
577     /* is this leeking memory? shouldn't I create
578     * object and free memory region returned?
579     */
580     return est_node_get_doc_attr(node, id, name);
581     }
582     char * get_doc_attr_by_uri(const char *uri, const char *name) {
583     return est_node_get_doc_attr_by_uri(node, uri, name);
584     }
585     int uri_to_id(const char *uri) {
586     return est_node_uri_to_id(node, uri);
587     }
588     const char * name(void) {
589     return est_node_name(node);
590     }
591     const char * label(void) {
592     return est_node_label(node);
593     }
594     int doc_num(void) {
595     return est_node_doc_num(node);
596     }
597     int word_num(void) {
598     return est_node_word_num(node);
599     }
600     double size(void) {
601     return est_node_size(node);
602     }
603 dpavlin 12 NodeRes * search(Condition *cond, int depth) {
604 dpavlin 9 return new NodeRes(node, cond, depth);
605     }
606     int set_user(const char *name, int mode) {
607     return est_node_set_user(node, name, mode);
608     }
609     int set_link(const char *url, const char *label, int credit) {
610     return est_node_set_link(node, url, label, credit);
611     }
612     };
613    
614 dpavlin 1 };

  ViewVC Help
Powered by ViewVC 1.1.26