/[hyperestraier_wrappers]/trunk/perl/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/perl/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 11 - (show annotations)
Thu Sep 8 21:22:10 2005 UTC (18 years, 6 months ago) by dpavlin
File size: 15585 byte(s)
Remove NetEnv class, and do network enviroment initialization
in constructor and destructor of node. Added example for gatherer
using node API.

1 /**
2 * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3 */
4 #include <estraier.h>
5 #include <estmtdb.h>
6 #include <cabin.h>
7 #include <cstdlib>
8 #include <string>
9 #include <vector>
10 #include <map>
11 #include <cassert>
12 #include <stdexcept>
13 #include <estnode.h>
14
15 /* backward compatibility for 0.5.4 */
16 #ifndef ESTCONDAGITO
17 #define ESTCONDAGITO ESTCONDAGIT
18 #endif
19
20 namespace estraier {
21
22 class IOError : public std::runtime_error {
23 public:
24 explicit IOError (const std::string& w) : std::runtime_error(w) {}
25 };
26
27 class Condition {
28 public:
29 enum { // enumeration for options
30 SURE = ESTCONDSURE, // check every N-gram key
31 USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
32 FAST = ESTCONDFAST, // check N-gram keys skipping by two
33 AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
34 NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
35 SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
36 };
37 ESTCOND * cond;
38 Condition() {
39 /**
40 * constructor
41 */
42 cond = est_cond_new();
43 }
44 ~Condition() {
45 /**
46 * destructor
47 */
48 est_cond_delete(cond);
49 }
50 void set_phrase(const char *phrase) {
51 /**
52 * set the search phrase
53 */
54 est_cond_set_phrase(cond, phrase);
55 }
56 void add_attr(const char *expr) {
57 /**
58 * set the attribute expression
59 */
60 est_cond_add_attr(cond, expr);
61 }
62 void set_order(const char *expr) {
63 /**
64 * set the order of a condition object
65 */
66 est_cond_set_order(cond, expr);
67 }
68 void set_max(int _max) {
69 /**
70 * set the maximum number of retrieval of a condition object
71 */
72 est_cond_set_max(cond, _max);
73 }
74 void set_options(int options) {
75 /**
76 * set options of retrieval of a condition object
77 */
78 est_cond_set_options(cond, options);
79 }
80 };
81
82 class Document {
83 private:
84 std::string text_buf;
85 public:
86 ESTDOC *doc;
87 Document() {
88 /**
89 * constructor
90 */
91 doc = est_doc_new();
92 }
93 Document(const char* draft) {
94 /**
95 * constructor
96 */
97 doc = est_doc_new_from_draft(draft);
98 }
99 Document(ESTDOC *_doc) {
100 /**
101 * constructor
102 */
103 doc = _doc;
104 }
105 ~Document() {
106 /**
107 * destructor
108 */
109 est_doc_delete(doc);
110 }
111 void add_attr(const char * name, const char*value) {
112 /**
113 * add an attribute to a document object
114 */
115 est_doc_add_attr(doc, name, value);
116 }
117 void add_text(const char *text) {
118 /**
119 * add a sentence of text to a document object
120 */
121 est_doc_add_text(doc, text);
122 }
123 void add_hidden_text(const char * text) {
124 /**
125 * add a hidden sentence to a document object
126 */
127 est_doc_add_hidden_text(doc, text);
128 }
129 int id() {
130 /**
131 * get the ID number of a document object
132 */
133 return est_doc_id(doc);
134 }
135 std::vector<std::string> * attr_names() {
136 /**
137 * get a list of attribute names of a document object
138 */
139 std::vector<std::string> * vs = new std::vector<std::string>;
140 CBLIST * attr_names = est_doc_attr_names(doc);
141 for (int i=0; i < cblistnum(attr_names); i++) {
142 vs->push_back(cblistval(attr_names, i, NULL));
143 }
144 cblistclose(attr_names);
145 return vs;
146 }
147 const char * attr(const char *name) {
148 /**
149 * get the value of an attribute of a document object
150 */
151 return est_doc_attr(doc, name);
152 }
153 const char * cat_texts() {
154 /**
155 * get a list of sentences of the text of a document object
156 */
157 return est_doc_cat_texts(doc);
158 }
159 std::vector<std::string>* texts() {
160 /**
161 * get a list of sentences of the text of a document object
162 */
163 std::vector<std::string> * vs = new std::vector<std::string>;
164 const CBLIST *texts;
165 texts = est_doc_texts(doc);
166 for(int i = 0; i < cblistnum(texts); i++) {
167 vs->push_back(cblistval(texts, i, NULL));
168 }
169 return vs;
170 }
171 const char * dump_draft() {
172 /**
173 * dump draft data of a document object
174 */
175 return est_doc_dump_draft(doc);
176 }
177 const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
178 /**
179 * make a snippet of the body text of a document object
180 */
181 CBLIST * words;
182 std::vector<std::string>::iterator iter;
183 words = cblistopen();
184 for (iter = _words.begin(); _words.end() != iter; iter++) {
185 cblistpush(words, iter->c_str(), -1);
186 }
187 const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
188 cblistclose(words);
189 return result;
190 }
191 const char * hidden_texts() {
192 /**
193 * get the hidden texts of a document object.
194 */
195 return est_doc_hidden_texts(doc);
196 }
197 };
198
199 class Database {
200 private:
201 ESTMTDB *db;
202 int ecode;
203 public:
204 enum { // enumeration for error codes
205 ERRNOERR = ESTENOERR, // no error
206 ERRINVAL = ESTEINVAL, // invalid argument
207 ERRACCES = ESTEACCES, // access forbidden
208 ERRLOCK = ESTELOCK, // lock failure
209 ERRDB = ESTEDB, // database problem
210 ERRIO = ESTEIO, // I/O problem
211 ERRNOITEM = ESTENOITEM, // no item
212 ERRMISC = ESTEMISC // miscellaneous
213 };
214 enum { // enumeration for open modes
215 DBREADER = ESTDBREADER, // open as a reader
216 DBWRITER = ESTDBWRITER, // open as a writer
217 DBCREAT = ESTDBCREAT, // a writer creating
218 DBTRUNC = ESTDBTRUNC, // a writer truncating
219 DBNOLCK = ESTDBNOLCK, // open without locking
220 DBLCKNB = ESTDBLCKNB, // lock without blocking
221 DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
222 };
223 enum { // enumeration for options of document registration
224 PDCLEAN = ESTPDCLEAN // clean up dispensable regions
225 };
226 enum { // enumeration for options of document deletion
227 ODCLEAN = ESTODCLEAN // clean up dispensable regions
228 };
229 enum { // enumeration for options of optimization
230 OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
231 OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
232 };
233 enum { // enumeration for options of document retrieval
234 GDNOATTR = ESTGDNOATTR, // no attributes
235 GDNOTEXT = ESTGDNOTEXT // no text
236 };
237 Database() {
238 /**
239 * constructor(dummy)
240 */
241 db = NULL;
242 ecode = ERRNOERR;
243 }
244 ~Database() {
245 if (db) close();
246 }
247 bool open(const char * dbname, int mode) {
248 /**
249 * open the database
250 */
251 if (db) close();
252 int ec;
253 db = est_mtdb_open(dbname, mode, &ec);
254 if (!db) ecode = ec;
255 return db;
256 }
257 bool close() {
258 /**
259 * close the database
260 */
261 if (!db) throw IOError("closed database");
262 int ec;
263 bool result = est_mtdb_close(db, &ec);
264 if (!result) ecode = ec;
265 db = NULL;
266 return result;
267 }
268 bool put_doc(Document *doc, int options) {
269 /**
270 * add a document to a database
271 */
272 if (!db) throw IOError("closed database");
273 bool result = est_mtdb_put_doc(db, doc->doc, options);
274 if (!result) ecode = est_mtdb_error(db);
275 return result;
276 }
277 std::vector<int> * search(Condition * cond, int options) {
278 /**
279 * search documents corresponding a condition for a database
280 */
281 if (!db) throw IOError("closed database");
282 int resnum;
283 int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
284 std::vector<int> *numbers = new std::vector<int>;
285 for (int i=0; i<resnum; i++) {
286 numbers->push_back(result[i]);
287 }
288 return numbers;
289 }
290 static const char * err_msg(int ecode) {
291 /**
292 * get the string of an error
293 */
294 return est_err_msg(ecode);
295 }
296 int error() {
297 /**
298 * get the last happended error code of a database
299 */
300 return ecode;
301 }
302 bool fatal() {
303 /**
304 * check whether a database has a fatal error
305 */
306 if (!db) throw IOError("closed database");
307 return est_mtdb_fatal(db);
308 }
309 bool flush(int _max) {
310 /**
311 * flush index words in the cache of a database
312 */
313 if (!db) throw IOError("closed database");
314 bool result = est_mtdb_flush(db, _max);
315 if (!result) ecode = est_mtdb_error(db);
316 return result;
317 }
318 bool sync() {
319 /**
320 * synchronize updating contents of a database
321 */
322 if (!db) throw IOError("closed database");
323 bool result = est_mtdb_sync(db);
324 if (!result) ecode = est_mtdb_error(db);
325 return result;
326 }
327 bool optimize(int options) {
328 /**
329 * optimize a database
330 */
331 if (!db) throw IOError("closed database");
332 bool result = est_mtdb_optimize(db, options);
333 if (!result) ecode = est_mtdb_error(db);
334 return result;
335 }
336 bool out_doc(int id, int options) {
337 /**
338 * remove a document from a database
339 */
340 if (!db) throw IOError("closed database");
341 bool result = est_mtdb_out_doc(db, id, options);
342 if (!result) ecode = est_mtdb_error(db);
343 return result;
344 }
345 bool edit_doc(Document *doc) {
346 /**
347 * edit an attribute of a document in a database
348 */
349 if (!db) throw IOError("closed database");
350 bool result = est_mtdb_edit_doc(db, doc->doc);
351 if (!result) ecode = est_mtdb_error(db);
352 return result;
353 }
354 Document * get_doc(int id, int options) {
355 /**
356 * retrieve a document in a database
357 */
358 if (!db) throw IOError("closed database");
359 ESTDOC *doc = est_mtdb_get_doc(db, id, options);
360 if (!doc) {
361 ecode = est_mtdb_error(db);
362 throw est_err_msg(est_mtdb_error(db));
363 } else {
364 return new Document(doc);
365 }
366 }
367 int uri_to_id(const char *uri) {
368 /**
369 * get the ID of a document spacified by URI
370 */
371 if (!db) throw IOError("closed database");
372 int result = est_mtdb_uri_to_id(db, uri);
373 if(result == -1) ecode = est_mtdb_error(db);
374 return result;
375 }
376 std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
377 /**
378 * extract keywords of a document object
379 */
380 if (!db) throw IOError("closed database");
381 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
382 CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
383 cbmapiterinit(keys);
384 int ksiz;
385 while (const char *key = cbmapiternext(keys, &ksiz)) {
386 mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
387 }
388 return mss;
389 }
390 const char * name() {
391 /**
392 * get the name of a database
393 */
394 if (!db) throw IOError("closed database");
395 return est_mtdb_name(db);
396 }
397 int doc_num() {
398 /**
399 * get the number of documents in a database
400 */
401 if (!db) throw IOError("closed database");
402 return est_mtdb_doc_num(db);
403 }
404 int word_num() {
405 /**
406 * get the number of unique words in a database
407 */
408 if (!db) throw IOError("closed database");
409 return est_mtdb_word_num(db);
410 }
411 double size() {
412 /**
413 * get the size of a database
414 */
415 if (!db) throw IOError("closed database");
416 return est_mtdb_size(db);
417 }
418 void set_cache_size(size_t size, int anum, int tnum) {
419 /**
420 * set the maximum size of the cache memory of a database
421 */
422 if (!db) throw IOError("closed database");
423 est_mtdb_set_cache_size(db, size, anum, tnum);
424 }
425 void set_special_cache(const char *name, int num) {
426 /**
427 * Set the special cache for narrowing and sorting
428 * with document attributes
429 */
430 est_mtdb_set_special_cache(db, name, num);
431 }
432 };
433
434 static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) {
435 std::vector<std::string> * vs = new std::vector<std::string>;
436 CBLIST *list;
437 list = cblistopen();
438 est_break_text(text, list, norm, tail);
439 for (int i=0; i < cblistnum(list); i++) {
440 vs->push_back(cblistval(list, i, NULL));
441 }
442 cblistclose(list);
443 return vs;
444 }
445
446 static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) {
447 std::vector<std::string> * vs = new std::vector<std::string>;
448 CBLIST *list;
449 list = cblistopen();
450 est_break_text_perfng(text, list, norm, tail);
451 for (int i=0; i < cblistnum(list); i++) {
452 vs->push_back(cblistval(list, i, NULL));
453 }
454 cblistclose(list);
455 return vs;
456 }
457
458 class NodeDocument {
459 private:
460 ESTRESDOC *rdoc;
461 public:
462 NodeDocument(ESTNODERES *nres, int index) {
463 rdoc = est_noderes_get_doc(nres, index);
464 }
465 const char *uri(void) {
466 return est_resdoc_uri(rdoc);
467 }
468 std::vector<std::string> * attr_names() {
469 std::vector<std::string> * vs = new std::vector<std::string>;
470 CBLIST * attr_names = est_resdoc_attr_names(rdoc);
471 for (int i=0; i < cblistnum(attr_names); i++) {
472 vs->push_back(cblistval(attr_names, i, NULL));
473 }
474 cblistclose(attr_names);
475 return vs;
476 }
477 const char *attr(const char *name) {
478 return est_resdoc_attr(rdoc, name);
479 }
480 const char *snippet(void) {
481 return est_resdoc_snippet(rdoc);
482 }
483 };
484
485 class NodeRes {
486 private:
487 ESTNODERES *nres;
488 public:
489 NodeRes(ESTNODE *node, ESTCOND *cond, int depth) {
490 nres = est_node_search(node, cond, depth);
491 }
492 ~NodeRes() {
493 est_noderes_delete(nres);
494 }
495 std::map<std::string, std::string> * hints(void) {
496 std::map<std::string, std::string> * hints = new std::map<std::string, std::string>;
497 CBMAP * keys = est_noderes_hints(nres);
498 cbmapiterinit(keys);
499 int ksiz;
500 while (const char *key = cbmapiternext(keys, &ksiz)) {
501 hints->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
502 }
503 return hints;
504 }
505 int doc_num(void) {
506 return est_noderes_doc_num(nres);
507 }
508 NodeDocument * get_doc(int index) {
509 return new NodeDocument(nres, index);
510 }
511 };
512
513 class Node {
514 private:
515 ESTNODE *node;
516 int netenv_ok;
517 public:
518 Node(const char *url) {
519 netenv_ok = est_init_net_env();
520 if (! netenv_ok) throw IOError("can't init net env");
521 node = est_node_new(url);
522 if (! node) throw IOError("can't create node");
523 }
524 ~Node() {
525 est_node_delete(node);
526 est_free_net_env();
527 }
528 void set_proxy(const char *host, int port) {
529 est_node_set_proxy(node, host, port);
530 }
531 void set_timeout(int sec) {
532 est_node_set_timeout(node, sec);
533 }
534 void set_auth(const char *name, const char *passwd) {
535 est_node_set_auth(node, name, passwd);
536 }
537 int status(void) {
538 return est_node_status(node);
539 }
540 bool put_doc(Document *doc) {
541 return est_node_put_doc(node, doc->doc);
542 }
543 bool out_doc(int id) {
544 return est_node_out_doc(node, id);
545 }
546 bool out_doc_by_uri(const char *uri) {
547 return est_node_out_doc_by_uri(node, uri);
548 }
549 #ifdef est_node_edit_doc
550 bool edit_doc(Document *doc) {
551 return est_node_edit_doc(node, doc->doc);
552 }
553 #endif
554 Document * get_doc(int id) {
555 ESTDOC *doc = est_node_get_doc(node, id);
556 if (!doc) {
557 return NULL;
558 } else {
559 return new Document(doc);
560 }
561 }
562 Document * get_doc_by_uri(const char *uri) {
563 ESTDOC *doc = est_node_get_doc_by_uri(node, uri);
564 if (!doc) {
565 return NULL;
566 } else {
567 return new Document(doc);
568 }
569 }
570 char * get_doc_attr(int id, const char *name) {
571 /* is this leeking memory? shouldn't I create
572 * object and free memory region returned?
573 */
574 return est_node_get_doc_attr(node, id, name);
575 }
576 char * get_doc_attr_by_uri(const char *uri, const char *name) {
577 return est_node_get_doc_attr_by_uri(node, uri, name);
578 }
579 int uri_to_id(const char *uri) {
580 return est_node_uri_to_id(node, uri);
581 }
582 const char * name(void) {
583 return est_node_name(node);
584 }
585 const char * label(void) {
586 return est_node_label(node);
587 }
588 int doc_num(void) {
589 return est_node_doc_num(node);
590 }
591 int word_num(void) {
592 return est_node_word_num(node);
593 }
594 double size(void) {
595 return est_node_size(node);
596 }
597 NodeRes * search(ESTCOND *cond, int depth) {
598 return new NodeRes(node, cond, depth);
599 }
600 int set_user(const char *name, int mode) {
601 return est_node_set_user(node, name, mode);
602 }
603 int set_link(const char *url, const char *label, int credit) {
604 return est_node_set_link(node, url, label, credit);
605 }
606 };
607
608 };

  ViewVC Help
Powered by ViewVC 1.1.26