/[hyperestraier_wrappers]/trunk/perl/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/perl/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 12 - (hide annotations)
Thu Sep 8 22:51:10 2005 UTC (18 years, 6 months ago) by dpavlin
File size: 15693 byte(s)
rename NodeDocument to ResultDocument (it was really mis-named),
fixed condition in NodeRes, move est_noderes_get_doc to NodeRes
class (so that NodeDocument resambles more Document class),
added (working :-) example of node search

1 dpavlin 1 /**
2     * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3     */
4     #include <estraier.h>
5     #include <estmtdb.h>
6     #include <cabin.h>
7     #include <cstdlib>
8     #include <string>
9     #include <vector>
10     #include <map>
11     #include <cassert>
12 dpavlin 6 #include <stdexcept>
13 dpavlin 9 #include <estnode.h>
14 dpavlin 1
15 dpavlin 4 /* backward compatibility for 0.5.4 */
16     #ifndef ESTCONDAGITO
17     #define ESTCONDAGITO ESTCONDAGIT
18     #endif
19    
20 dpavlin 1 namespace estraier {
21 dpavlin 6
22     class IOError : public std::runtime_error {
23     public:
24     explicit IOError (const std::string& w) : std::runtime_error(w) {}
25     };
26    
27 dpavlin 1 class Condition {
28     public:
29     enum { // enumeration for options
30     SURE = ESTCONDSURE, // check every N-gram key
31     USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
32     FAST = ESTCONDFAST, // check N-gram keys skipping by two
33     AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
34     NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
35     SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
36     };
37     ESTCOND * cond;
38     Condition() {
39     /**
40     * constructor
41     */
42     cond = est_cond_new();
43     }
44     ~Condition() {
45     /**
46     * destructor
47     */
48     est_cond_delete(cond);
49     }
50     void set_phrase(const char *phrase) {
51     /**
52     * set the search phrase
53     */
54     est_cond_set_phrase(cond, phrase);
55     }
56     void add_attr(const char *expr) {
57     /**
58     * set the attribute expression
59     */
60     est_cond_add_attr(cond, expr);
61     }
62     void set_order(const char *expr) {
63     /**
64     * set the order of a condition object
65     */
66     est_cond_set_order(cond, expr);
67     }
68     void set_max(int _max) {
69     /**
70     * set the maximum number of retrieval of a condition object
71     */
72     est_cond_set_max(cond, _max);
73     }
74     void set_options(int options) {
75     /**
76     * set options of retrieval of a condition object
77     */
78     est_cond_set_options(cond, options);
79     }
80     };
81    
82     class Document {
83     private:
84     std::string text_buf;
85     public:
86     ESTDOC *doc;
87     Document() {
88     /**
89     * constructor
90     */
91     doc = est_doc_new();
92     }
93     Document(const char* draft) {
94     /**
95     * constructor
96     */
97     doc = est_doc_new_from_draft(draft);
98     }
99     Document(ESTDOC *_doc) {
100     /**
101     * constructor
102     */
103     doc = _doc;
104     }
105     ~Document() {
106     /**
107     * destructor
108     */
109     est_doc_delete(doc);
110     }
111     void add_attr(const char * name, const char*value) {
112     /**
113     * add an attribute to a document object
114     */
115     est_doc_add_attr(doc, name, value);
116     }
117     void add_text(const char *text) {
118     /**
119     * add a sentence of text to a document object
120     */
121     est_doc_add_text(doc, text);
122     }
123     void add_hidden_text(const char * text) {
124     /**
125     * add a hidden sentence to a document object
126     */
127     est_doc_add_hidden_text(doc, text);
128     }
129     int id() {
130     /**
131     * get the ID number of a document object
132     */
133     return est_doc_id(doc);
134     }
135     std::vector<std::string> * attr_names() {
136     /**
137     * get a list of attribute names of a document object
138     */
139     std::vector<std::string> * vs = new std::vector<std::string>;
140     CBLIST * attr_names = est_doc_attr_names(doc);
141     for (int i=0; i < cblistnum(attr_names); i++) {
142     vs->push_back(cblistval(attr_names, i, NULL));
143     }
144     cblistclose(attr_names);
145     return vs;
146     }
147     const char * attr(const char *name) {
148     /**
149     * get the value of an attribute of a document object
150     */
151     return est_doc_attr(doc, name);
152     }
153     const char * cat_texts() {
154     /**
155     * get a list of sentences of the text of a document object
156     */
157 dpavlin 6 return est_doc_cat_texts(doc);
158 dpavlin 1 }
159     std::vector<std::string>* texts() {
160     /**
161     * get a list of sentences of the text of a document object
162     */
163     std::vector<std::string> * vs = new std::vector<std::string>;
164     const CBLIST *texts;
165     texts = est_doc_texts(doc);
166     for(int i = 0; i < cblistnum(texts); i++) {
167     vs->push_back(cblistval(texts, i, NULL));
168     }
169     return vs;
170     }
171     const char * dump_draft() {
172     /**
173     * dump draft data of a document object
174     */
175     return est_doc_dump_draft(doc);
176     }
177     const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
178     /**
179     * make a snippet of the body text of a document object
180     */
181     CBLIST * words;
182     std::vector<std::string>::iterator iter;
183     words = cblistopen();
184     for (iter = _words.begin(); _words.end() != iter; iter++) {
185     cblistpush(words, iter->c_str(), -1);
186     }
187     const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
188     cblistclose(words);
189     return result;
190     }
191 dpavlin 6 const char * hidden_texts() {
192     /**
193     * get the hidden texts of a document object.
194     */
195     return est_doc_hidden_texts(doc);
196     }
197 dpavlin 1 };
198    
199     class Database {
200     private:
201     ESTMTDB *db;
202 dpavlin 6 int ecode;
203 dpavlin 1 public:
204     enum { // enumeration for error codes
205     ERRNOERR = ESTENOERR, // no error
206     ERRINVAL = ESTEINVAL, // invalid argument
207     ERRACCES = ESTEACCES, // access forbidden
208     ERRLOCK = ESTELOCK, // lock failure
209     ERRDB = ESTEDB, // database problem
210     ERRIO = ESTEIO, // I/O problem
211     ERRNOITEM = ESTENOITEM, // no item
212     ERRMISC = ESTEMISC // miscellaneous
213     };
214     enum { // enumeration for open modes
215     DBREADER = ESTDBREADER, // open as a reader
216     DBWRITER = ESTDBWRITER, // open as a writer
217     DBCREAT = ESTDBCREAT, // a writer creating
218     DBTRUNC = ESTDBTRUNC, // a writer truncating
219     DBNOLCK = ESTDBNOLCK, // open without locking
220     DBLCKNB = ESTDBLCKNB, // lock without blocking
221     DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
222     };
223     enum { // enumeration for options of document registration
224     PDCLEAN = ESTPDCLEAN // clean up dispensable regions
225     };
226     enum { // enumeration for options of document deletion
227     ODCLEAN = ESTODCLEAN // clean up dispensable regions
228     };
229     enum { // enumeration for options of optimization
230     OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
231     OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
232     };
233     enum { // enumeration for options of document retrieval
234     GDNOATTR = ESTGDNOATTR, // no attributes
235     GDNOTEXT = ESTGDNOTEXT // no text
236     };
237     Database() {
238     /**
239     * constructor(dummy)
240     */
241 dpavlin 6 db = NULL;
242     ecode = ERRNOERR;
243 dpavlin 1 }
244     ~Database() {
245 dpavlin 6 if (db) close();
246 dpavlin 1 }
247     bool open(const char * dbname, int mode) {
248     /**
249     * open the database
250     */
251 dpavlin 6 if (db) close();
252     int ec;
253     db = est_mtdb_open(dbname, mode, &ec);
254     if (!db) ecode = ec;
255 dpavlin 1 return db;
256     }
257     bool close() {
258     /**
259     * close the database
260     */
261 dpavlin 6 if (!db) throw IOError("closed database");
262     int ec;
263     bool result = est_mtdb_close(db, &ec);
264     if (!result) ecode = ec;
265     db = NULL;
266     return result;
267 dpavlin 1 }
268     bool put_doc(Document *doc, int options) {
269     /**
270     * add a document to a database
271     */
272 dpavlin 6 if (!db) throw IOError("closed database");
273     bool result = est_mtdb_put_doc(db, doc->doc, options);
274     if (!result) ecode = est_mtdb_error(db);
275     return result;
276 dpavlin 1 }
277     std::vector<int> * search(Condition * cond, int options) {
278     /**
279     * search documents corresponding a condition for a database
280     */
281 dpavlin 6 if (!db) throw IOError("closed database");
282 dpavlin 1 int resnum;
283     int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
284     std::vector<int> *numbers = new std::vector<int>;
285     for (int i=0; i<resnum; i++) {
286     numbers->push_back(result[i]);
287     }
288     return numbers;
289     }
290     static const char * err_msg(int ecode) {
291     /**
292     * get the string of an error
293     */
294     return est_err_msg(ecode);
295     }
296     int error() {
297     /**
298     * get the last happended error code of a database
299     */
300 dpavlin 6 return ecode;
301 dpavlin 1 }
302     bool fatal() {
303     /**
304     * check whether a database has a fatal error
305     */
306 dpavlin 6 if (!db) throw IOError("closed database");
307 dpavlin 1 return est_mtdb_fatal(db);
308     }
309     bool flush(int _max) {
310     /**
311     * flush index words in the cache of a database
312     */
313 dpavlin 6 if (!db) throw IOError("closed database");
314     bool result = est_mtdb_flush(db, _max);
315     if (!result) ecode = est_mtdb_error(db);
316     return result;
317 dpavlin 1 }
318     bool sync() {
319     /**
320     * synchronize updating contents of a database
321     */
322 dpavlin 6 if (!db) throw IOError("closed database");
323     bool result = est_mtdb_sync(db);
324     if (!result) ecode = est_mtdb_error(db);
325     return result;
326 dpavlin 1 }
327     bool optimize(int options) {
328     /**
329     * optimize a database
330     */
331 dpavlin 6 if (!db) throw IOError("closed database");
332     bool result = est_mtdb_optimize(db, options);
333     if (!result) ecode = est_mtdb_error(db);
334     return result;
335 dpavlin 1 }
336     bool out_doc(int id, int options) {
337     /**
338     * remove a document from a database
339     */
340 dpavlin 6 if (!db) throw IOError("closed database");
341     bool result = est_mtdb_out_doc(db, id, options);
342     if (!result) ecode = est_mtdb_error(db);
343     return result;
344 dpavlin 1 }
345 dpavlin 6 bool edit_doc(Document *doc) {
346     /**
347     * edit an attribute of a document in a database
348     */
349     if (!db) throw IOError("closed database");
350     bool result = est_mtdb_edit_doc(db, doc->doc);
351     if (!result) ecode = est_mtdb_error(db);
352     return result;
353     }
354 dpavlin 1 Document * get_doc(int id, int options) {
355     /**
356     * retrieve a document in a database
357     */
358 dpavlin 6 if (!db) throw IOError("closed database");
359 dpavlin 1 ESTDOC *doc = est_mtdb_get_doc(db, id, options);
360     if (!doc) {
361 dpavlin 6 ecode = est_mtdb_error(db);
362 dpavlin 1 throw est_err_msg(est_mtdb_error(db));
363     } else {
364     return new Document(doc);
365     }
366     }
367     int uri_to_id(const char *uri) {
368     /**
369     * get the ID of a document spacified by URI
370     */
371 dpavlin 6 if (!db) throw IOError("closed database");
372     int result = est_mtdb_uri_to_id(db, uri);
373     if(result == -1) ecode = est_mtdb_error(db);
374     return result;
375 dpavlin 1 }
376     std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
377     /**
378     * extract keywords of a document object
379     */
380 dpavlin 6 if (!db) throw IOError("closed database");
381 dpavlin 1 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
382     CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
383     cbmapiterinit(keys);
384     int ksiz;
385     while (const char *key = cbmapiternext(keys, &ksiz)) {
386     mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
387     }
388     return mss;
389     }
390     const char * name() {
391     /**
392     * get the name of a database
393     */
394 dpavlin 6 if (!db) throw IOError("closed database");
395 dpavlin 1 return est_mtdb_name(db);
396     }
397     int doc_num() {
398     /**
399     * get the number of documents in a database
400     */
401 dpavlin 6 if (!db) throw IOError("closed database");
402 dpavlin 1 return est_mtdb_doc_num(db);
403     }
404     int word_num() {
405     /**
406     * get the number of unique words in a database
407     */
408 dpavlin 6 if (!db) throw IOError("closed database");
409 dpavlin 1 return est_mtdb_word_num(db);
410     }
411     double size() {
412     /**
413     * get the size of a database
414     */
415 dpavlin 6 if (!db) throw IOError("closed database");
416 dpavlin 1 return est_mtdb_size(db);
417     }
418     void set_cache_size(size_t size, int anum, int tnum) {
419     /**
420     * set the maximum size of the cache memory of a database
421     */
422 dpavlin 6 if (!db) throw IOError("closed database");
423 dpavlin 1 est_mtdb_set_cache_size(db, size, anum, tnum);
424     }
425     void set_special_cache(const char *name, int num) {
426     /**
427     * Set the special cache for narrowing and sorting
428     * with document attributes
429     */
430     est_mtdb_set_special_cache(db, name, num);
431     }
432     };
433 dpavlin 6
434     static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) {
435     std::vector<std::string> * vs = new std::vector<std::string>;
436     CBLIST *list;
437     list = cblistopen();
438     est_break_text(text, list, norm, tail);
439     for (int i=0; i < cblistnum(list); i++) {
440     vs->push_back(cblistval(list, i, NULL));
441     }
442     cblistclose(list);
443     return vs;
444     }
445    
446     static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) {
447     std::vector<std::string> * vs = new std::vector<std::string>;
448     CBLIST *list;
449     list = cblistopen();
450     est_break_text_perfng(text, list, norm, tail);
451     for (int i=0; i < cblistnum(list); i++) {
452     vs->push_back(cblistval(list, i, NULL));
453     }
454     cblistclose(list);
455     return vs;
456     }
457    
458 dpavlin 12 class ResultDocument {
459     public:
460 dpavlin 9 ESTRESDOC *rdoc;
461 dpavlin 12 ResultDocument(ESTRESDOC *_rdoc) {
462     rdoc = _rdoc;
463 dpavlin 9 }
464     const char *uri(void) {
465     return est_resdoc_uri(rdoc);
466     }
467     std::vector<std::string> * attr_names() {
468     std::vector<std::string> * vs = new std::vector<std::string>;
469     CBLIST * attr_names = est_resdoc_attr_names(rdoc);
470     for (int i=0; i < cblistnum(attr_names); i++) {
471     vs->push_back(cblistval(attr_names, i, NULL));
472     }
473     cblistclose(attr_names);
474     return vs;
475     }
476     const char *attr(const char *name) {
477     return est_resdoc_attr(rdoc, name);
478     }
479     const char *snippet(void) {
480     return est_resdoc_snippet(rdoc);
481     }
482     };
483    
484     class NodeRes {
485     private:
486     ESTNODERES *nres;
487     public:
488 dpavlin 12 NodeRes(ESTNODE *node, Condition *cond, int depth) {
489     nres = est_node_search(node, cond->cond, depth);
490 dpavlin 9 }
491     ~NodeRes() {
492     est_noderes_delete(nres);
493     }
494     std::map<std::string, std::string> * hints(void) {
495     std::map<std::string, std::string> * hints = new std::map<std::string, std::string>;
496     CBMAP * keys = est_noderes_hints(nres);
497     cbmapiterinit(keys);
498     int ksiz;
499     while (const char *key = cbmapiternext(keys, &ksiz)) {
500     hints->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
501     }
502     return hints;
503     }
504     int doc_num(void) {
505     return est_noderes_doc_num(nres);
506     }
507 dpavlin 12 ResultDocument * get_doc(int index) {
508     ESTRESDOC *rdoc = est_noderes_get_doc(nres, index);
509     if (rdoc) {
510     return new ResultDocument(rdoc);
511     } else {
512     throw IOError("wtf? no document?");
513     return NULL;
514     }
515 dpavlin 9 }
516     };
517    
518     class Node {
519     private:
520     ESTNODE *node;
521 dpavlin 11 int netenv_ok;
522 dpavlin 9 public:
523     Node(const char *url) {
524 dpavlin 11 netenv_ok = est_init_net_env();
525     if (! netenv_ok) throw IOError("can't init net env");
526 dpavlin 9 node = est_node_new(url);
527     if (! node) throw IOError("can't create node");
528     }
529     ~Node() {
530     est_node_delete(node);
531 dpavlin 11 est_free_net_env();
532 dpavlin 9 }
533     void set_proxy(const char *host, int port) {
534     est_node_set_proxy(node, host, port);
535     }
536     void set_timeout(int sec) {
537     est_node_set_timeout(node, sec);
538     }
539     void set_auth(const char *name, const char *passwd) {
540     est_node_set_auth(node, name, passwd);
541     }
542     int status(void) {
543     return est_node_status(node);
544     }
545     bool put_doc(Document *doc) {
546     return est_node_put_doc(node, doc->doc);
547     }
548     bool out_doc(int id) {
549     return est_node_out_doc(node, id);
550     }
551     bool out_doc_by_uri(const char *uri) {
552     return est_node_out_doc_by_uri(node, uri);
553     }
554     #ifdef est_node_edit_doc
555     bool edit_doc(Document *doc) {
556     return est_node_edit_doc(node, doc->doc);
557     }
558     #endif
559     Document * get_doc(int id) {
560     ESTDOC *doc = est_node_get_doc(node, id);
561     if (!doc) {
562     return NULL;
563     } else {
564     return new Document(doc);
565     }
566     }
567     Document * get_doc_by_uri(const char *uri) {
568     ESTDOC *doc = est_node_get_doc_by_uri(node, uri);
569     if (!doc) {
570     return NULL;
571     } else {
572     return new Document(doc);
573     }
574     }
575     char * get_doc_attr(int id, const char *name) {
576     /* is this leeking memory? shouldn't I create
577     * object and free memory region returned?
578     */
579     return est_node_get_doc_attr(node, id, name);
580     }
581     char * get_doc_attr_by_uri(const char *uri, const char *name) {
582     return est_node_get_doc_attr_by_uri(node, uri, name);
583     }
584     int uri_to_id(const char *uri) {
585     return est_node_uri_to_id(node, uri);
586     }
587     const char * name(void) {
588     return est_node_name(node);
589     }
590     const char * label(void) {
591     return est_node_label(node);
592     }
593     int doc_num(void) {
594     return est_node_doc_num(node);
595     }
596     int word_num(void) {
597     return est_node_word_num(node);
598     }
599     double size(void) {
600     return est_node_size(node);
601     }
602 dpavlin 12 NodeRes * search(Condition *cond, int depth) {
603 dpavlin 9 return new NodeRes(node, cond, depth);
604     }
605     int set_user(const char *name, int mode) {
606     return est_node_set_user(node, name, mode);
607     }
608     int set_link(const char *url, const char *label, int credit) {
609     return est_node_set_link(node, url, label, credit);
610     }
611     };
612    
613 dpavlin 1 };

  ViewVC Help
Powered by ViewVC 1.1.26